mirror of
https://github.com/gbdev/rgbds.git
synced 2025-11-20 10:12:06 +00:00
Port linkerscript parser to Bison (#1266)
Notable side effects: * Use the standard-conformant MSVC preproc * Add test for linker script INCLUDE * Improve wording of placement conflict errors * Fix errors from not newline-terminated files * Teach checkdiff about the linker script doc * Call linker script "commands" "directives" instead --------- Co-authored-by: Rangi42 <remy.oukaour+rangi42@gmail.com>
This commit is contained in:
490
src/link/script.y
Normal file
490
src/link/script.y
Normal file
@@ -0,0 +1,490 @@
|
||||
%language "c++"
|
||||
%define api.value.type variant
|
||||
%define api.token.constructor
|
||||
|
||||
%code requires {
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
|
||||
#include "linkdefs.hpp"
|
||||
}
|
||||
%code {
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <assert.h>
|
||||
#include <bit>
|
||||
#include <cinttypes>
|
||||
#include <fstream>
|
||||
#include <locale>
|
||||
#include <stdio.h>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
#include "helpers.hpp"
|
||||
#include "itertools.hpp"
|
||||
#include "util.hpp"
|
||||
|
||||
#include "link/main.hpp"
|
||||
#include "link/section.hpp"
|
||||
|
||||
using namespace std::literals;
|
||||
|
||||
static void includeFile(std::string &&path);
|
||||
static void incLineNo(void);
|
||||
|
||||
static void setSectionType(SectionType type);
|
||||
static void setSectionType(SectionType type, uint32_t bank);
|
||||
static void setAddr(uint32_t addr);
|
||||
static void alignTo(uint32_t alignment, uint32_t offset);
|
||||
static void pad(uint32_t length);
|
||||
static void placeSection(std::string const &name);
|
||||
|
||||
static yy::parser::symbol_type yylex(void);
|
||||
|
||||
struct Keyword {
|
||||
std::string_view name;
|
||||
yy::parser::symbol_type (* tokenGen)(void);
|
||||
};
|
||||
}
|
||||
|
||||
%token YYEOF 0 "end of file"
|
||||
%token newline
|
||||
%token ORG "ORG"
|
||||
INCLUDE "INCLUDE"
|
||||
ALIGN "ALIGN"
|
||||
DS "DS"
|
||||
%code {
|
||||
static std::array keywords{
|
||||
Keyword{"ORG"sv, yy::parser::make_ORG},
|
||||
Keyword{"INCLUDE"sv, yy::parser::make_INCLUDE},
|
||||
Keyword{"ALIGN"sv, yy::parser::make_ALIGN},
|
||||
Keyword{"DS"sv, yy::parser::make_DS},
|
||||
};
|
||||
}
|
||||
%token <std::string> string;
|
||||
%token <uint32_t> number;
|
||||
%token <SectionType> section_type;
|
||||
|
||||
%%
|
||||
|
||||
lines: %empty
|
||||
| line lines
|
||||
;
|
||||
|
||||
line: INCLUDE string newline { includeFile(std::move($2)); } // Note: this additionally increments the line number!
|
||||
| directive newline { incLineNo(); }
|
||||
| newline { incLineNo(); }
|
||||
| error newline { yyerrok; incLineNo(); } // Error recovery.
|
||||
;
|
||||
|
||||
directive: section_type { setSectionType($1); }
|
||||
| section_type number { setSectionType($1, $2); }
|
||||
| ORG number { setAddr($2); }
|
||||
| ALIGN number { alignTo($2, 0); }
|
||||
| DS number { pad($2); }
|
||||
| string { placeSection($1); }
|
||||
;
|
||||
|
||||
%%
|
||||
|
||||
#define scriptError(context, fmt, ...) \
|
||||
::error(NULL, 0, "%s(%" PRIu32 "): " fmt, \
|
||||
context.path.c_str(), context.lineNo __VA_OPT__(,) __VA_ARGS__)
|
||||
|
||||
// Lexer.
|
||||
|
||||
struct LexerStackEntry {
|
||||
std::filebuf file;
|
||||
std::string path;
|
||||
uint32_t lineNo;
|
||||
|
||||
explicit LexerStackEntry(std::string &&path_) : file(), path(path_), lineNo(1) {}
|
||||
};
|
||||
static std::vector<LexerStackEntry> lexerStack;
|
||||
static bool atEof;
|
||||
|
||||
void yy::parser::error(std::string const &msg) {
|
||||
auto const &script = lexerStack.back();
|
||||
scriptError(script, "%s", msg.c_str());
|
||||
}
|
||||
|
||||
static void includeFile(std::string &&path) {
|
||||
// `emplace_back` can invalidate references to the stack's elements!
|
||||
// This is why `newContext` must be gotten before `prevContext`.
|
||||
auto &newContext = lexerStack.emplace_back(std::move(path));
|
||||
auto &prevContext = lexerStack[lexerStack.size() - 2];
|
||||
|
||||
if (!newContext.file.open(newContext.path, std::ios_base::in)) {
|
||||
// The order is important: report the error, increment the line number, modify the stack!
|
||||
scriptError(prevContext, "Could not open included linker script \"%s\"",
|
||||
newContext.path.c_str());
|
||||
++prevContext.lineNo;
|
||||
lexerStack.pop_back();
|
||||
} else {
|
||||
// The lexer will use the new entry to lex the next token.
|
||||
++prevContext.lineNo;
|
||||
}
|
||||
}
|
||||
|
||||
static void incLineNo(void) {
|
||||
++lexerStack.back().lineNo;
|
||||
}
|
||||
|
||||
static bool isWhiteSpace(int c) {
|
||||
return c == ' ' || c == '\t';
|
||||
}
|
||||
|
||||
static bool isNewline(int c) {
|
||||
return c == '\r' || c == '\n';
|
||||
}
|
||||
|
||||
static bool isIdentChar(int c) {
|
||||
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9');
|
||||
}
|
||||
|
||||
static bool isDecDigit(int c) {
|
||||
return c >= '0' && c <= '9';
|
||||
}
|
||||
|
||||
static bool isBinDigit(int c) {
|
||||
return c >= '0' && c <= '1';
|
||||
}
|
||||
|
||||
static bool isHexDigit(int c) {
|
||||
return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
|
||||
}
|
||||
|
||||
static uint8_t parseHexDigit(int c) {
|
||||
if (c >= '0' && c <= '9') {
|
||||
return c - '0';
|
||||
} else if (c >= 'A' && c <= 'F') {
|
||||
return c - 'A' + 10;
|
||||
} else if (c >= 'a' && c <= 'f') {
|
||||
return c - 'a' + 10;
|
||||
} else {
|
||||
unreachable_();
|
||||
}
|
||||
}
|
||||
|
||||
yy::parser::symbol_type yylex(void) {
|
||||
try_again: // Can't use a `do {} while(0)` loop, otherwise compilers (wrongly) think it can end.
|
||||
auto &context = lexerStack.back();
|
||||
auto c = context.file.sbumpc();
|
||||
|
||||
// First, skip leading whitespace.
|
||||
while (isWhiteSpace(c)) {
|
||||
c = context.file.sbumpc();
|
||||
}
|
||||
// Then, skip a comment if applicable.
|
||||
if (c == ';') {
|
||||
while (!isNewline(c)) {
|
||||
c = context.file.sbumpc();
|
||||
}
|
||||
}
|
||||
|
||||
// Alright, what token should we return?
|
||||
if (c == EOF) {
|
||||
// Basically yywrap().
|
||||
if (lexerStack.size() != 1) {
|
||||
lexerStack.pop_back();
|
||||
goto try_again;
|
||||
} else if (!atEof) {
|
||||
// Inject a newline at EOF, to avoid errors for files that don't end with one.
|
||||
atEof = true;
|
||||
return yy::parser::make_newline();
|
||||
} else {
|
||||
return yy::parser::make_YYEOF();
|
||||
}
|
||||
} else if (isNewline(c)) {
|
||||
// Handle CRLF.
|
||||
if (c == '\r' && context.file.sgetc() == '\n') {
|
||||
context.file.sbumpc();
|
||||
}
|
||||
return yy::parser::make_newline();
|
||||
} else if (c == '"') {
|
||||
std::string str;
|
||||
|
||||
for (c = context.file.sgetc(); c != '"'; c = context.file.sgetc()) {
|
||||
if (c == EOF || isNewline(c)) {
|
||||
scriptError(context, "Unterminated string");
|
||||
break;
|
||||
}
|
||||
context.file.sbumpc();
|
||||
if (c == '\\') {
|
||||
c = context.file.sgetc();
|
||||
if (c == EOF || isNewline(c)) {
|
||||
scriptError(context, "Unterminated string");
|
||||
break;
|
||||
} else if (c == 'n') {
|
||||
c = '\n';
|
||||
} else if (c == 'r') {
|
||||
c = '\r';
|
||||
} else if (c == 't') {
|
||||
c = '\t';
|
||||
} else if (c != '\\' && c != '"') {
|
||||
scriptError(context, "Cannot escape character %s", printChar(c));
|
||||
}
|
||||
context.file.sbumpc();
|
||||
}
|
||||
str.push_back(c);
|
||||
}
|
||||
context.file.sbumpc(); // Consume the closing quote.
|
||||
|
||||
return yy::parser::make_string(std::move(str));
|
||||
} else if (c == '$') {
|
||||
c = context.file.sgetc();
|
||||
if (!isHexDigit(c)) {
|
||||
scriptError(context, "No hexadecimal digits found after '$'");
|
||||
return yy::parser::make_number(0);
|
||||
}
|
||||
|
||||
uint32_t number = parseHexDigit(c);
|
||||
context.file.sbumpc();
|
||||
for (c = context.file.sgetc(); isHexDigit(c); c = context.file.sgetc()) {
|
||||
number = number * 16 + parseHexDigit(c);
|
||||
context.file.sbumpc();
|
||||
}
|
||||
return yy::parser::make_number(number);
|
||||
} else if (c == '%') {
|
||||
c = context.file.sgetc();
|
||||
if (!isBinDigit(c)) {
|
||||
scriptError(context, "No binary digits found after '%%'");
|
||||
return yy::parser::make_number(0);
|
||||
}
|
||||
|
||||
uint32_t number = c - '0';
|
||||
context.file.sbumpc();
|
||||
for (c = context.file.sgetc(); isBinDigit(c); c = context.file.sgetc()) {
|
||||
number = number * 2 + (c - '0');
|
||||
context.file.sbumpc();
|
||||
}
|
||||
return yy::parser::make_number(number);
|
||||
} else if (isDecDigit(c)) {
|
||||
uint32_t number = c - '0';
|
||||
for (c = context.file.sgetc(); isDecDigit(c); c = context.file.sgetc()) {
|
||||
number = number * 10 + (c - '0');
|
||||
}
|
||||
return yy::parser::make_number(number);
|
||||
} else if (isIdentChar(c)) { // Note that we match these *after* digit characters!
|
||||
std::string ident;
|
||||
auto strUpperCmp = [](char cmp, char ref) {
|
||||
// `locale::classic()` yields the "C" locale.
|
||||
assert(!std::use_facet<std::ctype<char>>(std::locale::classic())
|
||||
.is(std::ctype_base::lower, ref));
|
||||
return std::use_facet<std::ctype<char>>(std::locale::classic())
|
||||
.toupper(cmp) == ref;
|
||||
};
|
||||
|
||||
ident.push_back(c);
|
||||
for (c = context.file.sgetc(); isIdentChar(c); c = context.file.snextc()) {
|
||||
ident.push_back(c);
|
||||
}
|
||||
|
||||
for (SectionType type : EnumSeq(SECTTYPE_INVALID)) {
|
||||
if (std::equal(ident.begin(), ident.end(),
|
||||
sectionTypeInfo[type].name.begin(), sectionTypeInfo[type].name.end(),
|
||||
strUpperCmp)) {
|
||||
return yy::parser::make_section_type(type);
|
||||
}
|
||||
}
|
||||
|
||||
for (Keyword const &keyword : keywords) {
|
||||
if (std::equal(ident.begin(), ident.end(),
|
||||
keyword.name.begin(), keyword.name.end(),
|
||||
strUpperCmp)) {
|
||||
return keyword.tokenGen();
|
||||
}
|
||||
}
|
||||
|
||||
scriptError(context, "Unknown keyword \"%s\"", ident.c_str());
|
||||
goto try_again; // Try lexing another token.
|
||||
} else {
|
||||
scriptError(context, "Unexpected character '%s'", printChar(c));
|
||||
// Keep reading characters until the EOL, to avoid reporting too many errors.
|
||||
for (c = context.file.sgetc(); !isNewline(c); c = context.file.sgetc()) {
|
||||
if (c == EOF) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
goto try_again;
|
||||
}
|
||||
// Not marking as unreachable; this will generate a warning if any codepath forgets to return.
|
||||
}
|
||||
|
||||
// Semantic actions.
|
||||
|
||||
static std::array<std::vector<uint16_t>, SECTTYPE_INVALID> curAddr;
|
||||
static SectionType activeType; // Index into curAddr
|
||||
static uint32_t activeBankIdx; // Index into curAddr[activeType]
|
||||
|
||||
static void setActiveTypeAndIdx(SectionType type, uint32_t idx) {
|
||||
activeType = type;
|
||||
activeBankIdx = idx;
|
||||
if (curAddr[activeType].size() <= activeBankIdx) {
|
||||
curAddr[activeType].resize(activeBankIdx + 1, sectionTypeInfo[type].startAddr);
|
||||
}
|
||||
}
|
||||
|
||||
static void setSectionType(SectionType type) {
|
||||
auto const &context = lexerStack.back();
|
||||
|
||||
if (nbbanks(type) != 1) {
|
||||
scriptError(context, "A bank number must be specified for %s",
|
||||
sectionTypeInfo[type].name.c_str());
|
||||
// Keep going with a default value for the bank index.
|
||||
}
|
||||
|
||||
setActiveTypeAndIdx(type, 0); // There is only a single bank anyway, so just set the index to 0.
|
||||
}
|
||||
|
||||
static void setSectionType(SectionType type, uint32_t bank) {
|
||||
auto const &context = lexerStack.back();
|
||||
auto const &typeInfo = sectionTypeInfo[type];
|
||||
|
||||
if (bank < typeInfo.firstBank) {
|
||||
scriptError(context, "%s bank %" PRIu32 " doesn't exist, the minimum is %" PRIu32,
|
||||
typeInfo.name.c_str(), bank, typeInfo.firstBank);
|
||||
bank = typeInfo.firstBank;
|
||||
} else if (bank > typeInfo.lastBank) {
|
||||
scriptError(context, "%s bank %" PRIu32 " doesn't exist, the maximum is %" PRIu32,
|
||||
typeInfo.name.c_str(), bank, typeInfo.lastBank);
|
||||
}
|
||||
|
||||
setActiveTypeAndIdx(type, bank - typeInfo.firstBank);
|
||||
}
|
||||
|
||||
static void setAddr(uint32_t addr) {
|
||||
auto const &context = lexerStack.back();
|
||||
auto &pc = curAddr[activeType][activeBankIdx];
|
||||
auto const &typeInfo = sectionTypeInfo[activeType];
|
||||
|
||||
if (addr < pc) {
|
||||
scriptError(context, "ORG cannot be used to go backwards (from $%04x to $%04x)", pc, addr);
|
||||
} else if (addr > endaddr(activeType)) { // Allow "one past the end" sections.
|
||||
scriptError(context, "Cannot go to $%04" PRIx32 ": %s ends at $%04" PRIx16 "",
|
||||
addr, typeInfo.name.c_str(), endaddr(activeType));
|
||||
pc = endaddr(activeType);
|
||||
} else {
|
||||
pc = addr;
|
||||
}
|
||||
}
|
||||
|
||||
static void alignTo(uint32_t alignment, uint32_t alignOfs) {
|
||||
auto const &context = lexerStack.back();
|
||||
auto const &typeInfo = sectionTypeInfo[activeType];
|
||||
auto &pc = curAddr[activeType][activeBankIdx];
|
||||
|
||||
// TODO: maybe warn if truncating?
|
||||
alignOfs %= 1 << alignment;
|
||||
|
||||
assert(pc >= typeInfo.startAddr);
|
||||
uint16_t length = alignment < 16 ? (uint16_t)(alignOfs - pc) % (1u << alignment)
|
||||
: alignOfs - pc; // Let it wrap around, this'll trip the check.
|
||||
if (uint16_t offset = pc - typeInfo.startAddr; length > typeInfo.size - offset) {
|
||||
scriptError(context, "Cannot align: the next suitable address after $%04" PRIx16 " is $%04" PRIx16 ", past $%04" PRIx16,
|
||||
pc, (uint16_t)(pc + length), (uint16_t)(endaddr(activeType) + 1));
|
||||
} else {
|
||||
pc += length;
|
||||
}
|
||||
}
|
||||
|
||||
static void pad(uint32_t length) {
|
||||
auto const &context = lexerStack.back();
|
||||
auto const &typeInfo = sectionTypeInfo[activeType];
|
||||
auto &pc = curAddr[activeType][activeBankIdx];
|
||||
|
||||
assert(pc >= typeInfo.startAddr);
|
||||
if (uint16_t offset = pc - typeInfo.startAddr; length + offset > typeInfo.size) {
|
||||
scriptError(context, "Cannot pad by %u bytes: only %u bytes to $%04" PRIx16,
|
||||
length, typeInfo.size - offset, (uint16_t)(endaddr(activeType) + 1));
|
||||
} else {
|
||||
pc += length;
|
||||
}
|
||||
}
|
||||
|
||||
static void placeSection(std::string const &name) {
|
||||
auto const &context = lexerStack.back();
|
||||
auto const &typeInfo = sectionTypeInfo[activeType];
|
||||
|
||||
// A type *must* be active.
|
||||
if (activeType == SECTTYPE_INVALID) {
|
||||
scriptError(context, "No memory region has been specified to place section \"%s\" in",
|
||||
name.c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
auto *section = sect_GetSection(name.c_str());
|
||||
if (!section) {
|
||||
scriptError(context, "Unknown section \"%s\"", name.c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
assert(section->offset == 0);
|
||||
// Check that the linker script doesn't contradict what the code says.
|
||||
if (section->type == SECTTYPE_INVALID) {
|
||||
// SDCC areas don't have a type assigned yet, so the linker script is used to give them one.
|
||||
for (Section *fragment = section; fragment; fragment = fragment->nextu) {
|
||||
fragment->type = activeType;
|
||||
}
|
||||
} else if (section->type != activeType) {
|
||||
scriptError(context, "\"%s\" is specified to be a %s section, but it is already a %s section",
|
||||
name.c_str(), typeInfo.name.c_str(), sectionTypeInfo[section->type].name.c_str());
|
||||
}
|
||||
|
||||
uint32_t bank = activeBankIdx + typeInfo.firstBank;
|
||||
if (section->isBankFixed && bank != section->bank) {
|
||||
scriptError(context, "The linker script places section \"%s\" in %s bank %" PRIu32 ", but it was already defined in bank %" PRIu32,
|
||||
name.c_str(), sectionTypeInfo[section->type].name.c_str(), bank, section->bank);
|
||||
}
|
||||
section->isBankFixed = true;
|
||||
section->bank = bank;
|
||||
|
||||
uint16_t &org = curAddr[activeType][activeBankIdx];
|
||||
if (section->isAddressFixed && org != section->org) {
|
||||
scriptError(context, "The linker script assigns section \"%s\" to address $%04" PRIx16 ", but it was already at $%04" PRIx16,
|
||||
name.c_str(), org, section->org);
|
||||
} else if (section->isAlignFixed && (org & section->alignMask) != section->alignOfs) {
|
||||
uint8_t alignment = std::countr_one(section->alignMask);
|
||||
scriptError(context, "The linker script assigns section \"%s\" to address $%04" PRIx16 ", but that would be ALIGN[%" PRIu8 ", %" PRIu16 "] instead of the requested ALIGN[%" PRIu8 ", %" PRIu16 "]",
|
||||
name.c_str(), org, alignment, (uint16_t)(org & section->alignMask), alignment, section->alignOfs);
|
||||
}
|
||||
section->isAddressFixed = true;
|
||||
section->isAlignFixed = false; // This can't be set when the above is.
|
||||
section->org = org;
|
||||
|
||||
uint16_t curOfs = org - typeInfo.startAddr;
|
||||
if (section->size > typeInfo.size - curOfs) {
|
||||
scriptError(context, "The linker script assigns section \"%s\" to address $%04" PRIx16 ", but then it would overflow %s by %" PRIx16 " bytes",
|
||||
name.c_str(), org, typeInfo.name.c_str(),
|
||||
(uint16_t)(section->size - (typeInfo.size - curOfs)));
|
||||
// Fill as much as possible without going out of bounds.
|
||||
org = typeInfo.startAddr + typeInfo.size;
|
||||
} else {
|
||||
org += section->size;
|
||||
}
|
||||
}
|
||||
|
||||
// External API.
|
||||
|
||||
void script_ProcessScript(char const *path) {
|
||||
activeType = SECTTYPE_INVALID;
|
||||
|
||||
lexerStack.clear();
|
||||
atEof = false;
|
||||
auto &newContext = lexerStack.emplace_back(std::string(path));
|
||||
|
||||
if (!newContext.file.open(newContext.path, std::ios_base::in)) {
|
||||
error(NULL, 0, "Could not open linker script \"%s\"", newContext.path.c_str());
|
||||
lexerStack.clear();
|
||||
} else {
|
||||
yy::parser linkerScriptParser;
|
||||
// We don't care about the return value, as any error increments the global error count,
|
||||
// which is what `main` checks.
|
||||
(void)linkerScriptParser.parse();
|
||||
|
||||
// Free up working memory.
|
||||
for (auto ®ion : curAddr) {
|
||||
region.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user