Port linkerscript parser to Bison (#1266)

Notable side effects:
* Use the standard-conformant MSVC preproc
* Add test for linker script INCLUDE
* Improve wording of placement conflict errors
* Fix errors from not newline-terminated files
* Teach checkdiff about the linker script doc
* Call linker script "commands" "directives" instead

---------

Co-authored-by: Rangi42 <remy.oukaour+rangi42@gmail.com>
This commit is contained in:
Eldred Habert
2023-12-11 02:29:37 +01:00
committed by GitHub
parent ab30690854
commit fd78a9ae83
28 changed files with 573 additions and 656 deletions

490
src/link/script.y Normal file
View File

@@ -0,0 +1,490 @@
%language "c++"
%define api.value.type variant
%define api.token.constructor
%code requires {
#include <stdint.h>
#include <string>
#include "linkdefs.hpp"
}
%code {
#include <algorithm>
#include <array>
#include <assert.h>
#include <bit>
#include <cinttypes>
#include <fstream>
#include <locale>
#include <stdio.h>
#include <string_view>
#include <vector>
#include "helpers.hpp"
#include "itertools.hpp"
#include "util.hpp"
#include "link/main.hpp"
#include "link/section.hpp"
using namespace std::literals;
static void includeFile(std::string &&path);
static void incLineNo(void);
static void setSectionType(SectionType type);
static void setSectionType(SectionType type, uint32_t bank);
static void setAddr(uint32_t addr);
static void alignTo(uint32_t alignment, uint32_t offset);
static void pad(uint32_t length);
static void placeSection(std::string const &name);
static yy::parser::symbol_type yylex(void);
struct Keyword {
std::string_view name;
yy::parser::symbol_type (* tokenGen)(void);
};
}
%token YYEOF 0 "end of file"
%token newline
%token ORG "ORG"
INCLUDE "INCLUDE"
ALIGN "ALIGN"
DS "DS"
%code {
static std::array keywords{
Keyword{"ORG"sv, yy::parser::make_ORG},
Keyword{"INCLUDE"sv, yy::parser::make_INCLUDE},
Keyword{"ALIGN"sv, yy::parser::make_ALIGN},
Keyword{"DS"sv, yy::parser::make_DS},
};
}
%token <std::string> string;
%token <uint32_t> number;
%token <SectionType> section_type;
%%
lines: %empty
| line lines
;
line: INCLUDE string newline { includeFile(std::move($2)); } // Note: this additionally increments the line number!
| directive newline { incLineNo(); }
| newline { incLineNo(); }
| error newline { yyerrok; incLineNo(); } // Error recovery.
;
directive: section_type { setSectionType($1); }
| section_type number { setSectionType($1, $2); }
| ORG number { setAddr($2); }
| ALIGN number { alignTo($2, 0); }
| DS number { pad($2); }
| string { placeSection($1); }
;
%%
#define scriptError(context, fmt, ...) \
::error(NULL, 0, "%s(%" PRIu32 "): " fmt, \
context.path.c_str(), context.lineNo __VA_OPT__(,) __VA_ARGS__)
// Lexer.
struct LexerStackEntry {
std::filebuf file;
std::string path;
uint32_t lineNo;
explicit LexerStackEntry(std::string &&path_) : file(), path(path_), lineNo(1) {}
};
static std::vector<LexerStackEntry> lexerStack;
static bool atEof;
void yy::parser::error(std::string const &msg) {
auto const &script = lexerStack.back();
scriptError(script, "%s", msg.c_str());
}
static void includeFile(std::string &&path) {
// `emplace_back` can invalidate references to the stack's elements!
// This is why `newContext` must be gotten before `prevContext`.
auto &newContext = lexerStack.emplace_back(std::move(path));
auto &prevContext = lexerStack[lexerStack.size() - 2];
if (!newContext.file.open(newContext.path, std::ios_base::in)) {
// The order is important: report the error, increment the line number, modify the stack!
scriptError(prevContext, "Could not open included linker script \"%s\"",
newContext.path.c_str());
++prevContext.lineNo;
lexerStack.pop_back();
} else {
// The lexer will use the new entry to lex the next token.
++prevContext.lineNo;
}
}
static void incLineNo(void) {
++lexerStack.back().lineNo;
}
static bool isWhiteSpace(int c) {
return c == ' ' || c == '\t';
}
static bool isNewline(int c) {
return c == '\r' || c == '\n';
}
static bool isIdentChar(int c) {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9');
}
static bool isDecDigit(int c) {
return c >= '0' && c <= '9';
}
static bool isBinDigit(int c) {
return c >= '0' && c <= '1';
}
static bool isHexDigit(int c) {
return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
}
static uint8_t parseHexDigit(int c) {
if (c >= '0' && c <= '9') {
return c - '0';
} else if (c >= 'A' && c <= 'F') {
return c - 'A' + 10;
} else if (c >= 'a' && c <= 'f') {
return c - 'a' + 10;
} else {
unreachable_();
}
}
yy::parser::symbol_type yylex(void) {
try_again: // Can't use a `do {} while(0)` loop, otherwise compilers (wrongly) think it can end.
auto &context = lexerStack.back();
auto c = context.file.sbumpc();
// First, skip leading whitespace.
while (isWhiteSpace(c)) {
c = context.file.sbumpc();
}
// Then, skip a comment if applicable.
if (c == ';') {
while (!isNewline(c)) {
c = context.file.sbumpc();
}
}
// Alright, what token should we return?
if (c == EOF) {
// Basically yywrap().
if (lexerStack.size() != 1) {
lexerStack.pop_back();
goto try_again;
} else if (!atEof) {
// Inject a newline at EOF, to avoid errors for files that don't end with one.
atEof = true;
return yy::parser::make_newline();
} else {
return yy::parser::make_YYEOF();
}
} else if (isNewline(c)) {
// Handle CRLF.
if (c == '\r' && context.file.sgetc() == '\n') {
context.file.sbumpc();
}
return yy::parser::make_newline();
} else if (c == '"') {
std::string str;
for (c = context.file.sgetc(); c != '"'; c = context.file.sgetc()) {
if (c == EOF || isNewline(c)) {
scriptError(context, "Unterminated string");
break;
}
context.file.sbumpc();
if (c == '\\') {
c = context.file.sgetc();
if (c == EOF || isNewline(c)) {
scriptError(context, "Unterminated string");
break;
} else if (c == 'n') {
c = '\n';
} else if (c == 'r') {
c = '\r';
} else if (c == 't') {
c = '\t';
} else if (c != '\\' && c != '"') {
scriptError(context, "Cannot escape character %s", printChar(c));
}
context.file.sbumpc();
}
str.push_back(c);
}
context.file.sbumpc(); // Consume the closing quote.
return yy::parser::make_string(std::move(str));
} else if (c == '$') {
c = context.file.sgetc();
if (!isHexDigit(c)) {
scriptError(context, "No hexadecimal digits found after '$'");
return yy::parser::make_number(0);
}
uint32_t number = parseHexDigit(c);
context.file.sbumpc();
for (c = context.file.sgetc(); isHexDigit(c); c = context.file.sgetc()) {
number = number * 16 + parseHexDigit(c);
context.file.sbumpc();
}
return yy::parser::make_number(number);
} else if (c == '%') {
c = context.file.sgetc();
if (!isBinDigit(c)) {
scriptError(context, "No binary digits found after '%%'");
return yy::parser::make_number(0);
}
uint32_t number = c - '0';
context.file.sbumpc();
for (c = context.file.sgetc(); isBinDigit(c); c = context.file.sgetc()) {
number = number * 2 + (c - '0');
context.file.sbumpc();
}
return yy::parser::make_number(number);
} else if (isDecDigit(c)) {
uint32_t number = c - '0';
for (c = context.file.sgetc(); isDecDigit(c); c = context.file.sgetc()) {
number = number * 10 + (c - '0');
}
return yy::parser::make_number(number);
} else if (isIdentChar(c)) { // Note that we match these *after* digit characters!
std::string ident;
auto strUpperCmp = [](char cmp, char ref) {
// `locale::classic()` yields the "C" locale.
assert(!std::use_facet<std::ctype<char>>(std::locale::classic())
.is(std::ctype_base::lower, ref));
return std::use_facet<std::ctype<char>>(std::locale::classic())
.toupper(cmp) == ref;
};
ident.push_back(c);
for (c = context.file.sgetc(); isIdentChar(c); c = context.file.snextc()) {
ident.push_back(c);
}
for (SectionType type : EnumSeq(SECTTYPE_INVALID)) {
if (std::equal(ident.begin(), ident.end(),
sectionTypeInfo[type].name.begin(), sectionTypeInfo[type].name.end(),
strUpperCmp)) {
return yy::parser::make_section_type(type);
}
}
for (Keyword const &keyword : keywords) {
if (std::equal(ident.begin(), ident.end(),
keyword.name.begin(), keyword.name.end(),
strUpperCmp)) {
return keyword.tokenGen();
}
}
scriptError(context, "Unknown keyword \"%s\"", ident.c_str());
goto try_again; // Try lexing another token.
} else {
scriptError(context, "Unexpected character '%s'", printChar(c));
// Keep reading characters until the EOL, to avoid reporting too many errors.
for (c = context.file.sgetc(); !isNewline(c); c = context.file.sgetc()) {
if (c == EOF) {
break;
}
}
goto try_again;
}
// Not marking as unreachable; this will generate a warning if any codepath forgets to return.
}
// Semantic actions.
static std::array<std::vector<uint16_t>, SECTTYPE_INVALID> curAddr;
static SectionType activeType; // Index into curAddr
static uint32_t activeBankIdx; // Index into curAddr[activeType]
static void setActiveTypeAndIdx(SectionType type, uint32_t idx) {
activeType = type;
activeBankIdx = idx;
if (curAddr[activeType].size() <= activeBankIdx) {
curAddr[activeType].resize(activeBankIdx + 1, sectionTypeInfo[type].startAddr);
}
}
static void setSectionType(SectionType type) {
auto const &context = lexerStack.back();
if (nbbanks(type) != 1) {
scriptError(context, "A bank number must be specified for %s",
sectionTypeInfo[type].name.c_str());
// Keep going with a default value for the bank index.
}
setActiveTypeAndIdx(type, 0); // There is only a single bank anyway, so just set the index to 0.
}
static void setSectionType(SectionType type, uint32_t bank) {
auto const &context = lexerStack.back();
auto const &typeInfo = sectionTypeInfo[type];
if (bank < typeInfo.firstBank) {
scriptError(context, "%s bank %" PRIu32 " doesn't exist, the minimum is %" PRIu32,
typeInfo.name.c_str(), bank, typeInfo.firstBank);
bank = typeInfo.firstBank;
} else if (bank > typeInfo.lastBank) {
scriptError(context, "%s bank %" PRIu32 " doesn't exist, the maximum is %" PRIu32,
typeInfo.name.c_str(), bank, typeInfo.lastBank);
}
setActiveTypeAndIdx(type, bank - typeInfo.firstBank);
}
static void setAddr(uint32_t addr) {
auto const &context = lexerStack.back();
auto &pc = curAddr[activeType][activeBankIdx];
auto const &typeInfo = sectionTypeInfo[activeType];
if (addr < pc) {
scriptError(context, "ORG cannot be used to go backwards (from $%04x to $%04x)", pc, addr);
} else if (addr > endaddr(activeType)) { // Allow "one past the end" sections.
scriptError(context, "Cannot go to $%04" PRIx32 ": %s ends at $%04" PRIx16 "",
addr, typeInfo.name.c_str(), endaddr(activeType));
pc = endaddr(activeType);
} else {
pc = addr;
}
}
static void alignTo(uint32_t alignment, uint32_t alignOfs) {
auto const &context = lexerStack.back();
auto const &typeInfo = sectionTypeInfo[activeType];
auto &pc = curAddr[activeType][activeBankIdx];
// TODO: maybe warn if truncating?
alignOfs %= 1 << alignment;
assert(pc >= typeInfo.startAddr);
uint16_t length = alignment < 16 ? (uint16_t)(alignOfs - pc) % (1u << alignment)
: alignOfs - pc; // Let it wrap around, this'll trip the check.
if (uint16_t offset = pc - typeInfo.startAddr; length > typeInfo.size - offset) {
scriptError(context, "Cannot align: the next suitable address after $%04" PRIx16 " is $%04" PRIx16 ", past $%04" PRIx16,
pc, (uint16_t)(pc + length), (uint16_t)(endaddr(activeType) + 1));
} else {
pc += length;
}
}
static void pad(uint32_t length) {
auto const &context = lexerStack.back();
auto const &typeInfo = sectionTypeInfo[activeType];
auto &pc = curAddr[activeType][activeBankIdx];
assert(pc >= typeInfo.startAddr);
if (uint16_t offset = pc - typeInfo.startAddr; length + offset > typeInfo.size) {
scriptError(context, "Cannot pad by %u bytes: only %u bytes to $%04" PRIx16,
length, typeInfo.size - offset, (uint16_t)(endaddr(activeType) + 1));
} else {
pc += length;
}
}
static void placeSection(std::string const &name) {
auto const &context = lexerStack.back();
auto const &typeInfo = sectionTypeInfo[activeType];
// A type *must* be active.
if (activeType == SECTTYPE_INVALID) {
scriptError(context, "No memory region has been specified to place section \"%s\" in",
name.c_str());
return;
}
auto *section = sect_GetSection(name.c_str());
if (!section) {
scriptError(context, "Unknown section \"%s\"", name.c_str());
return;
}
assert(section->offset == 0);
// Check that the linker script doesn't contradict what the code says.
if (section->type == SECTTYPE_INVALID) {
// SDCC areas don't have a type assigned yet, so the linker script is used to give them one.
for (Section *fragment = section; fragment; fragment = fragment->nextu) {
fragment->type = activeType;
}
} else if (section->type != activeType) {
scriptError(context, "\"%s\" is specified to be a %s section, but it is already a %s section",
name.c_str(), typeInfo.name.c_str(), sectionTypeInfo[section->type].name.c_str());
}
uint32_t bank = activeBankIdx + typeInfo.firstBank;
if (section->isBankFixed && bank != section->bank) {
scriptError(context, "The linker script places section \"%s\" in %s bank %" PRIu32 ", but it was already defined in bank %" PRIu32,
name.c_str(), sectionTypeInfo[section->type].name.c_str(), bank, section->bank);
}
section->isBankFixed = true;
section->bank = bank;
uint16_t &org = curAddr[activeType][activeBankIdx];
if (section->isAddressFixed && org != section->org) {
scriptError(context, "The linker script assigns section \"%s\" to address $%04" PRIx16 ", but it was already at $%04" PRIx16,
name.c_str(), org, section->org);
} else if (section->isAlignFixed && (org & section->alignMask) != section->alignOfs) {
uint8_t alignment = std::countr_one(section->alignMask);
scriptError(context, "The linker script assigns section \"%s\" to address $%04" PRIx16 ", but that would be ALIGN[%" PRIu8 ", %" PRIu16 "] instead of the requested ALIGN[%" PRIu8 ", %" PRIu16 "]",
name.c_str(), org, alignment, (uint16_t)(org & section->alignMask), alignment, section->alignOfs);
}
section->isAddressFixed = true;
section->isAlignFixed = false; // This can't be set when the above is.
section->org = org;
uint16_t curOfs = org - typeInfo.startAddr;
if (section->size > typeInfo.size - curOfs) {
scriptError(context, "The linker script assigns section \"%s\" to address $%04" PRIx16 ", but then it would overflow %s by %" PRIx16 " bytes",
name.c_str(), org, typeInfo.name.c_str(),
(uint16_t)(section->size - (typeInfo.size - curOfs)));
// Fill as much as possible without going out of bounds.
org = typeInfo.startAddr + typeInfo.size;
} else {
org += section->size;
}
}
// External API.
void script_ProcessScript(char const *path) {
activeType = SECTTYPE_INVALID;
lexerStack.clear();
atEof = false;
auto &newContext = lexerStack.emplace_back(std::string(path));
if (!newContext.file.open(newContext.path, std::ios_base::in)) {
error(NULL, 0, "Could not open linker script \"%s\"", newContext.path.c_str());
lexerStack.clear();
} else {
yy::parser linkerScriptParser;
// We don't care about the return value, as any error increments the global error count,
// which is what `main` checks.
(void)linkerScriptParser.parse();
// Free up working memory.
for (auto &region : curAddr) {
region.clear();
}
}
}