From 16e16cdf5112b594fc4460f970a3dd3d353f11d6 Mon Sep 17 00:00:00 2001 From: Rangi42 Date: Sun, 27 Jul 2025 13:03:28 -0400 Subject: [PATCH] Split up the linkerscript lexer and layout actions --- Makefile | 3 +- include/link/layout.hpp | 21 +- include/link/lexer.hpp | 17 ++ include/link/main.hpp | 14 +- include/link/warning.hpp | 5 +- src/CMakeLists.txt | 1 + src/asm/warning.cpp | 1 + src/link/layout.cpp | 419 ++-------------------------- src/link/lexer.cpp | 361 ++++++++++++++++++++++++ src/link/main.cpp | 18 +- src/link/script.y | 4 +- src/link/warning.cpp | 10 +- test/link/linkerscript-noexist.asm | 2 + test/link/linkerscript-noexist.link | 3 + test/link/linkerscript-noexist.out | 2 + 15 files changed, 448 insertions(+), 433 deletions(-) create mode 100644 include/link/lexer.hpp create mode 100644 src/link/lexer.cpp create mode 100644 test/link/linkerscript-noexist.asm create mode 100644 test/link/linkerscript-noexist.link create mode 100644 test/link/linkerscript-noexist.out diff --git a/Makefile b/Makefile index a55d0001..c1ce48af 100644 --- a/Makefile +++ b/Makefile @@ -80,6 +80,7 @@ src/asm/lexer.o src/asm/main.o: src/asm/parser.hpp rgblink_obj := \ ${common_obj} \ src/link/assign.o \ + src/link/lexer.o \ src/link/layout.o \ src/link/main.o \ src/link/object.o \ @@ -95,7 +96,7 @@ rgblink_obj := \ src/opmath.o \ src/util.o -src/link/layout.o src/link/main.o: src/link/script.hpp +src/link/lexer.o src/link/main.o: src/link/script.hpp rgbfix_obj := \ ${common_obj} \ diff --git a/include/link/layout.hpp b/include/link/layout.hpp index 88f38ce8..7e6d9af9 100644 --- a/include/link/layout.hpp +++ b/include/link/layout.hpp @@ -3,37 +3,20 @@ #ifndef RGBDS_LINK_LAYOUT_HPP #define RGBDS_LINK_LAYOUT_HPP -#include #include #include #include "linkdefs.hpp" -struct LexerStackEntry { - std::filebuf file; - std::string path; - uint32_t lineNo; - - explicit LexerStackEntry(std::string &&path_) : file(), path(path_), lineNo(1) {} -}; - -#define scriptError(context, fmt, ...) \ - ::error( \ - "%s(%" PRIu32 "): " fmt, context.path.c_str(), context.lineNo __VA_OPT__(, ) __VA_ARGS__ \ - ) - -LexerStackEntry &lexer_Context(); -void lexer_IncludeFile(std::string &&path); -void lexer_IncLineNo(); -bool lexer_Init(char const *linkerScriptName); - void layout_SetFloatingSectionType(SectionType type); void layout_SetSectionType(SectionType type); void layout_SetSectionType(SectionType type, uint32_t bank); + void layout_SetAddr(uint32_t addr); void layout_MakeAddrFloating(); void layout_AlignTo(uint32_t alignment, uint32_t offset); void layout_Pad(uint32_t length); + void layout_PlaceSection(std::string const &name, bool isOptional); #endif // RGBDS_LINK_LAYOUT_HPP diff --git a/include/link/lexer.hpp b/include/link/lexer.hpp new file mode 100644 index 00000000..5416110f --- /dev/null +++ b/include/link/lexer.hpp @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: MIT + +#ifndef RGBDS_LINK_LEXER_HPP +#define RGBDS_LINK_LEXER_HPP + +#include +#include + +[[gnu::format(printf, 1, 2)]] +void lexer_Error(char const *fmt, ...); + +void lexer_IncludeFile(std::string &&path); +void lexer_IncLineNo(); + +bool lexer_Init(char const *linkerScriptName); + +#endif // RGBDS_LINK_LEXER_HPP diff --git a/include/link/main.hpp b/include/link/main.hpp index 82406e59..71527adb 100644 --- a/include/link/main.hpp +++ b/include/link/main.hpp @@ -12,13 +12,13 @@ #include "linkdefs.hpp" struct Options { - bool isDmgMode; // -d - char const *mapFileName; // -m - bool noSymInMap; // -M - char const *symFileName; // -n - char const *overlayFileName; // -O - char const *outputFileName; // -o - uint8_t padValue; // -p + bool isDmgMode; // -d + char const *mapFileName; // -m + bool noSymInMap; // -M + char const *symFileName; // -n + char const *overlayFileName; // -O + char const *outputFileName; // -o + uint8_t padValue; // -p bool hasPadValue = false; // Setting these three to 0 disables the functionality uint16_t scrambleROMX; // -S diff --git a/include/link/warning.hpp b/include/link/warning.hpp index 2b659f84..35ecc669 100644 --- a/include/link/warning.hpp +++ b/include/link/warning.hpp @@ -3,6 +3,7 @@ #ifndef RGBDS_LINK_WARNING_HPP #define RGBDS_LINK_WARNING_HPP +#include #include #include "diagnostics.hpp" @@ -48,7 +49,9 @@ void error(char const *fmt, ...); [[gnu::format(printf, 1, 2)]] void errorNoDump(char const *fmt, ...); [[gnu::format(printf, 2, 3)]] -void argErr(char flag, char const *fmt, ...); +void argError(char flag, char const *fmt, ...); + +void scriptError(char const *name, uint32_t lineNo, char const *fmt, va_list args); [[gnu::format(printf, 3, 4), noreturn]] void fatal(FileStackNode const *src, uint32_t lineNo, char const *fmt, ...); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 19cc4021..eeaa1dad 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -57,6 +57,7 @@ set(rgbasm_src set(rgblink_src "${BISON_LINKER_SCRIPT_PARSER_OUTPUT_SOURCE}" "link/assign.cpp" + "link/lexer.cpp" "link/layout.cpp" "link/main.cpp" "link/object.cpp" diff --git a/src/asm/warning.cpp b/src/asm/warning.cpp index 05199224..3d3cf042 100644 --- a/src/asm/warning.cpp +++ b/src/asm/warning.cpp @@ -74,6 +74,7 @@ static void printDiag( } vfprintf(stderr, fmt, args); putc('\n', stderr); + lexer_DumpStringExpansions(); } diff --git a/src/link/layout.cpp b/src/link/layout.cpp index 770cd1c3..32fcee38 100644 --- a/src/link/layout.cpp +++ b/src/link/layout.cpp @@ -2,354 +2,17 @@ #include "link/layout.hpp" -#include #include #include -#include #include -#include -#include #include #include "helpers.hpp" -#include "itertools.hpp" #include "util.hpp" +#include "link/lexer.hpp" // lexer_Error #include "link/section.hpp" #include "link/warning.hpp" -// Include this last so it gets all type & constant definitions -#include "script.hpp" // For token definitions, generated from script.y - -/******************** Lexer ********************/ - -static std::vector lexerStack; -static bool atEof = false; - -LexerStackEntry &lexer_Context() { - return lexerStack.back(); -} - -void lexer_IncludeFile(std::string &&path) { - // `emplace_back` can invalidate references to the stack's elements! - // This is why `newContext` must be gotten before `prevContext`. - LexerStackEntry &newContext = lexerStack.emplace_back(std::move(path)); - LexerStackEntry &prevContext = lexerStack[lexerStack.size() - 2]; - - if (!newContext.file.open(newContext.path, std::ios_base::in)) { - // The order is important: report the error, increment the line number, modify the stack! - scriptError( - prevContext, "Failed to open included linker script \"%s\"", newContext.path.c_str() - ); - ++prevContext.lineNo; - lexerStack.pop_back(); - } else { - // The lexer will use the new entry to lex the next token. - ++prevContext.lineNo; - } -} - -void lexer_IncLineNo() { - ++lexerStack.back().lineNo; -} - -static bool isWhiteSpace(int c) { - return c == ' ' || c == '\t'; -} - -static bool isNewline(int c) { - return c == '\r' || c == '\n'; -} - -yy::parser::symbol_type yylex(); // Forward declaration for `yywrap` - -static yy::parser::symbol_type yywrap() { - if (lexerStack.size() != 1) { - if (!atEof) { - // Inject a newline at EOF to simplify parsing. - atEof = true; - return yy::parser::make_newline(); - } - lexerStack.pop_back(); - return yylex(); - } - if (!atEof) { - // Inject a newline at EOF to simplify parsing. - atEof = true; - return yy::parser::make_newline(); - } - return yy::parser::make_YYEOF(); -} - -static bool isIdentChar(int c) { - return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9'); -} - -static std::string readIdent(int c) { - LexerStackEntry &context = lexerStack.back(); - std::string ident; - ident.push_back(c); - for (c = context.file.sgetc(); isIdentChar(c); c = context.file.snextc()) { - ident.push_back(c); - } - return ident; -} - -static bool isDecDigit(int c) { - return c >= '0' && c <= '9'; -} - -static yy::parser::symbol_type parseDecNumber(int c) { - LexerStackEntry &context = lexerStack.back(); - uint32_t number = c - '0'; - for (c = context.file.sgetc(); isDecDigit(c) || c == '_'; c = context.file.sgetc()) { - if (c != '_') { - number = number * 10 + (c - '0'); - } - context.file.sbumpc(); - } - return yy::parser::make_number(number); -} - -static bool isBinDigit(int c) { - return c >= '0' && c <= '1'; -} - -static yy::parser::symbol_type parseBinNumber(char const *prefix) { - LexerStackEntry &context = lexerStack.back(); - int c = context.file.sgetc(); - if (!isBinDigit(c)) { - scriptError(context, "No binary digits found after '%s'", prefix); - return yy::parser::make_number(0); - } - - uint32_t number = c - '0'; - context.file.sbumpc(); - for (c = context.file.sgetc(); isBinDigit(c) || c == '_'; c = context.file.sgetc()) { - if (c != '_') { - number = number * 2 + (c - '0'); - } - context.file.sbumpc(); - } - return yy::parser::make_number(number); -} - -static bool isOctDigit(int c) { - return c >= '0' && c <= '7'; -} - -static yy::parser::symbol_type parseOctNumber(char const *prefix) { - LexerStackEntry &context = lexerStack.back(); - int c = context.file.sgetc(); - if (!isOctDigit(c)) { - scriptError(context, "No octal digits found after '%s'", prefix); - return yy::parser::make_number(0); - } - - uint32_t number = c - '0'; - context.file.sbumpc(); - for (c = context.file.sgetc(); isOctDigit(c) || c == '_'; c = context.file.sgetc()) { - if (c != '_') { - number = number * 8 + (c - '0'); - } - context.file.sbumpc(); - } - return yy::parser::make_number(number); -} - -static bool isHexDigit(int c) { - return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); -} - -static uint8_t parseHexDigit(int c) { - if (c >= '0' && c <= '9') { - return c - '0'; - } else if (c >= 'A' && c <= 'F') { - return c - 'A' + 10; - } else if (c >= 'a' && c <= 'f') { - return c - 'a' + 10; - } else { - unreachable_(); // LCOV_EXCL_LINE - } -} - -static yy::parser::symbol_type parseHexNumber(char const *prefix) { - LexerStackEntry &context = lexerStack.back(); - int c = context.file.sgetc(); - if (!isHexDigit(c)) { - scriptError(context, "No hexadecimal digits found after '%s'", prefix); - return yy::parser::make_number(0); - } - - uint32_t number = parseHexDigit(c); - context.file.sbumpc(); - for (c = context.file.sgetc(); isHexDigit(c) || c == '_'; c = context.file.sgetc()) { - if (c != '_') { - number = number * 16 + parseHexDigit(c); - } - context.file.sbumpc(); - } - return yy::parser::make_number(number); -} - -static yy::parser::symbol_type parseNumber(int c) { - LexerStackEntry &context = lexerStack.back(); - if (c == '0') { - switch (context.file.sgetc()) { - case 'x': - context.file.sbumpc(); - return parseHexNumber("0x"); - case 'X': - context.file.sbumpc(); - return parseHexNumber("0X"); - case 'o': - context.file.sbumpc(); - return parseOctNumber("0o"); - case 'O': - context.file.sbumpc(); - return parseOctNumber("0O"); - case 'b': - context.file.sbumpc(); - return parseBinNumber("0b"); - case 'B': - context.file.sbumpc(); - return parseBinNumber("0B"); - } - } - return parseDecNumber(c); -} - -static yy::parser::symbol_type parseString() { - LexerStackEntry &context = lexerStack.back(); - int c = context.file.sgetc(); - std::string str; - for (; c != '"'; c = context.file.sgetc()) { - if (c == EOF || isNewline(c)) { - scriptError(context, "Unterminated string"); - break; - } - context.file.sbumpc(); - if (c == '\\') { - c = context.file.sgetc(); - if (c == EOF || isNewline(c)) { - scriptError(context, "Unterminated string"); - break; - } else if (c == 'n') { - c = '\n'; - } else if (c == 'r') { - c = '\r'; - } else if (c == 't') { - c = '\t'; - } else if (c == '0') { - c = '\0'; - } else if (c != '\\' && c != '"' && c != '\'') { - scriptError(context, "Cannot escape character %s", printChar(c)); - } - context.file.sbumpc(); - } - str.push_back(c); - } - if (c == '"') { - context.file.sbumpc(); - } - return yy::parser::make_string(std::move(str)); -} - -struct Keyword { - std::string_view name; - yy::parser::symbol_type (*tokenGen)(); -}; - -using namespace std::literals; - -static std::array keywords{ - Keyword{"ORG"sv, yy::parser::make_ORG }, - Keyword{"FLOATING"sv, yy::parser::make_FLOATING}, - Keyword{"INCLUDE"sv, yy::parser::make_INCLUDE }, - Keyword{"ALIGN"sv, yy::parser::make_ALIGN }, - Keyword{"DS"sv, yy::parser::make_DS }, - Keyword{"OPTIONAL"sv, yy::parser::make_OPTIONAL}, -}; - -yy::parser::symbol_type yylex() { - LexerStackEntry &context = lexerStack.back(); - int c = context.file.sbumpc(); - - // First, skip leading whitespace. - while (isWhiteSpace(c)) { - c = context.file.sbumpc(); - } - // Then, skip a comment if applicable. - if (c == ';') { - while (c != EOF && !isNewline(c)) { - c = context.file.sbumpc(); - } - } - - // Alright, what token should we return? - if (c == EOF) { - return yywrap(); - } else if (c == ',') { - return yy::parser::make_COMMA(); - } else if (isNewline(c)) { - // Handle CRLF. - if (c == '\r' && context.file.sgetc() == '\n') { - context.file.sbumpc(); - } - return yy::parser::make_newline(); - } else if (c == '"') { - return parseString(); - } else if (c == '$') { - return parseHexNumber("$"); - } else if (c == '%') { - return parseBinNumber("%"); - } else if (c == '&') { - return parseOctNumber("&"); - } else if (isDecDigit(c)) { - return parseNumber(c); - } else if (isIdentChar(c)) { // Note that we match these *after* digit characters! - std::string ident = readIdent(c); - - auto strUpperCmp = [](char cmp, char ref) { return toupper(cmp) == ref; }; - - for (SectionType type : EnumSeq(SECTTYPE_INVALID)) { - if (std::equal(RANGE(ident), RANGE(sectionTypeInfo[type].name), strUpperCmp)) { - return yy::parser::make_sect_type(type); - } - } - - for (Keyword const &keyword : keywords) { - if (std::equal(RANGE(ident), RANGE(keyword.name), strUpperCmp)) { - return keyword.tokenGen(); - } - } - - scriptError(context, "Unknown keyword \"%s\"", ident.c_str()); - return yylex(); - } else { - scriptError(context, "Unexpected character %s", printChar(c)); - // Keep reading characters until the EOL, to avoid reporting too many errors. - for (c = context.file.sgetc(); !isNewline(c); c = context.file.sgetc()) { - if (c == EOF) { - break; - } - context.file.sbumpc(); - } - return yylex(); - } - // Not marking as unreachable; this will generate a warning if any codepath forgets to return. -} - -bool lexer_Init(char const *linkerScriptName) { - if (LexerStackEntry &newContext = lexerStack.emplace_back(std::string(linkerScriptName)); - !newContext.file.open(newContext.path, std::ios_base::in)) { - error("Failed to open linker script \"%s\"", linkerScriptName); - lexerStack.clear(); - return false; - } - return true; -} - -/******************** Semantic actions ********************/ static std::array, SECTTYPE_INVALID> curAddr; static SectionType activeType = SECTTYPE_INVALID; // Index into curAddr @@ -383,12 +46,8 @@ void layout_SetFloatingSectionType(SectionType type) { } void layout_SetSectionType(SectionType type) { - LexerStackEntry const &context = lexerStack.back(); - if (nbbanks(type) != 1) { - scriptError( - context, "A bank number must be specified for %s", sectionTypeInfo[type].name.c_str() - ); + lexer_Error("A bank number must be specified for %s", sectionTypeInfo[type].name.c_str()); // Keep going with a default value for the bank index. } @@ -396,12 +55,10 @@ void layout_SetSectionType(SectionType type) { } void layout_SetSectionType(SectionType type, uint32_t bank) { - LexerStackEntry const &context = lexerStack.back(); SectionTypeInfo const &typeInfo = sectionTypeInfo[type]; if (bank < typeInfo.firstBank) { - scriptError( - context, + lexer_Error( "%s bank %" PRIu32 " doesn't exist (the minimum is %" PRIu32 ")", typeInfo.name.c_str(), bank, @@ -409,8 +66,7 @@ void layout_SetSectionType(SectionType type, uint32_t bank) { ); bank = typeInfo.firstBank; } else if (bank > typeInfo.lastBank) { - scriptError( - context, + lexer_Error( "%s bank %" PRIu32 " doesn't exist (the maximum is %" PRIu32 ")", typeInfo.name.c_str(), bank, @@ -422,13 +78,12 @@ void layout_SetSectionType(SectionType type, uint32_t bank) { } void layout_SetAddr(uint32_t addr) { - LexerStackEntry const &context = lexerStack.back(); if (activeType == SECTTYPE_INVALID) { - scriptError(context, "Cannot set the current address: no memory region is active"); + lexer_Error("Cannot set the current address: no memory region is active"); return; } if (activeBankIdx == UINT32_MAX) { - scriptError(context, "Cannot set the current address: the bank is floating"); + lexer_Error("Cannot set the current address: the bank is floating"); return; } @@ -436,11 +91,10 @@ void layout_SetAddr(uint32_t addr) { SectionTypeInfo const &typeInfo = sectionTypeInfo[activeType]; if (addr < pc) { - scriptError(context, "Cannot decrease the current address (from $%04x to $%04x)", pc, addr); + lexer_Error("Cannot decrease the current address (from $%04x to $%04x)", pc, addr); } else if (addr > endaddr(activeType)) { // Allow "one past the end" sections. - scriptError( - context, - "Cannot set the current address to $%04" PRIx32 ": %s ends at $%04" PRIx16 "", + lexer_Error( + "Cannot set the current address to $%04" PRIx32 ": %s ends at $%04" PRIx16, addr, typeInfo.name.c_str(), endaddr(activeType) @@ -453,11 +107,8 @@ void layout_SetAddr(uint32_t addr) { } void layout_MakeAddrFloating() { - LexerStackEntry const &context = lexerStack.back(); if (activeType == SECTTYPE_INVALID) { - scriptError( - context, "Cannot make the current address floating: no memory region is active" - ); + lexer_Error("Cannot make the current address floating: no memory region is active"); return; } @@ -467,9 +118,8 @@ void layout_MakeAddrFloating() { } void layout_AlignTo(uint32_t alignment, uint32_t alignOfs) { - LexerStackEntry const &context = lexerStack.back(); if (activeType == SECTTYPE_INVALID) { - scriptError(context, "Cannot align: no memory region is active"); + lexer_Error("Cannot align: no memory region is active"); return; } @@ -480,8 +130,7 @@ void layout_AlignTo(uint32_t alignment, uint32_t alignOfs) { uint32_t alignSize = 1u << alignment; if (alignOfs >= alignSize) { - scriptError( - context, + lexer_Error( "Cannot align: The alignment offset (%" PRIu32 ") must be less than alignment size (%" PRIu32 ")", alignOfs, @@ -500,9 +149,7 @@ void layout_AlignTo(uint32_t alignment, uint32_t alignOfs) { uint16_t &pc = curAddr[activeType][activeBankIdx]; if (alignment > 16) { - scriptError( - context, "Cannot align: The alignment (%" PRIu32 ") must be less than 16", alignment - ); + lexer_Error("Cannot align: The alignment (%" PRIu32 ") must be less than 16", alignment); return; } @@ -513,8 +160,7 @@ void layout_AlignTo(uint32_t alignment, uint32_t alignOfs) { uint32_t alignSize = 1u << alignment; if (alignOfs >= alignSize) { - scriptError( - context, + lexer_Error( "Cannot align: The alignment offset (%" PRIu32 ") must be less than alignment size (%" PRIu32 ")", alignOfs, @@ -528,8 +174,7 @@ void layout_AlignTo(uint32_t alignment, uint32_t alignOfs) { } if (uint16_t offset = pc - typeInfo.startAddr; length > typeInfo.size - offset) { - scriptError( - context, + lexer_Error( "Cannot align: the next suitable address after $%04" PRIx16 " is $%04" PRIx16 ", past $%04" PRIx16, pc, @@ -543,9 +188,8 @@ void layout_AlignTo(uint32_t alignment, uint32_t alignOfs) { } void layout_Pad(uint32_t length) { - LexerStackEntry const &context = lexerStack.back(); if (activeType == SECTTYPE_INVALID) { - scriptError(context, "Cannot increase the current address: no memory region is active"); + lexer_Error("Cannot increase the current address: no memory region is active"); return; } @@ -559,8 +203,7 @@ void layout_Pad(uint32_t length) { assume(pc >= typeInfo.startAddr); if (uint16_t offset = pc - typeInfo.startAddr; length + offset > typeInfo.size) { - scriptError( - context, + lexer_Error( "Cannot increase the current address by %u bytes: only %u bytes to $%04" PRIx16, length, typeInfo.size - offset, @@ -572,18 +215,15 @@ void layout_Pad(uint32_t length) { } void layout_PlaceSection(std::string const &name, bool isOptional) { - LexerStackEntry const &context = lexerStack.back(); if (activeType == SECTTYPE_INVALID) { - scriptError( - context, "No memory region has been specified to place section \"%s\" in", name.c_str() - ); + lexer_Error("No memory region has been specified to place section \"%s\" in", name.c_str()); return; } Section *section = sect_GetSection(name.c_str()); if (!section) { if (!isOptional) { - scriptError(context, "Unknown section \"%s\"", name.c_str()); + lexer_Error("Unknown section \"%s\"", name.c_str()); } return; } @@ -594,8 +234,7 @@ void layout_PlaceSection(std::string const &name, bool isOptional) { if (section->type == SECTTYPE_INVALID) { // A section that has data must get assigned a type that requires data. if (!sect_HasData(activeType) && !section->data.empty()) { - scriptError( - context, + lexer_Error( "\"%s\" is specified to be a %s section, but it contains data", name.c_str(), typeInfo.name.c_str() @@ -603,8 +242,7 @@ void layout_PlaceSection(std::string const &name, bool isOptional) { } else if (sect_HasData(activeType) && section->data.empty() && section->size != 0) { // A section that lacks data can only be assigned to a type that requires data // if it's empty. - scriptError( - context, + lexer_Error( "\"%s\" is specified to be a %s section, but it doesn't contain data", name.c_str(), typeInfo.name.c_str() @@ -617,8 +255,7 @@ void layout_PlaceSection(std::string const &name, bool isOptional) { } } } else if (section->type != activeType) { - scriptError( - context, + lexer_Error( "\"%s\" is specified to be a %s section, but it is already a %s section", name.c_str(), typeInfo.name.c_str(), @@ -631,8 +268,7 @@ void layout_PlaceSection(std::string const &name, bool isOptional) { } else { uint32_t bank = activeBankIdx + typeInfo.firstBank; if (section->isBankFixed && bank != section->bank) { - scriptError( - context, + lexer_Error( "The linker script places section \"%s\" in %s bank %" PRIu32 ", but it was already defined in bank %" PRIu32, name.c_str(), @@ -648,8 +284,7 @@ void layout_PlaceSection(std::string const &name, bool isOptional) { if (!isPcFloating) { uint16_t &org = curAddr[activeType][activeBankIdx]; if (section->isAddressFixed && org != section->org) { - scriptError( - context, + lexer_Error( "The linker script assigns section \"%s\" to address $%04" PRIx16 ", but it was already at $%04" PRIx16, name.c_str(), @@ -658,8 +293,7 @@ void layout_PlaceSection(std::string const &name, bool isOptional) { ); } else if (section->isAlignFixed && (org & section->alignMask) != section->alignOfs) { uint8_t alignment = std::countr_one(section->alignMask); - scriptError( - context, + lexer_Error( "The linker script assigns section \"%s\" to address $%04" PRIx16 ", but that would be ALIGN[%" PRIu8 ", %" PRIu16 "] instead of the requested ALIGN[%" PRIu8 ", %" PRIu16 "]", @@ -678,8 +312,7 @@ void layout_PlaceSection(std::string const &name, bool isOptional) { uint16_t curOfs = org - typeInfo.startAddr; if (section->size > typeInfo.size - curOfs) { uint16_t overflowSize = section->size - (typeInfo.size - curOfs); - scriptError( - context, + lexer_Error( "The linker script assigns section \"%s\" to address $%04" PRIx16 ", but then it would overflow %s by %" PRIu16 " byte%s", name.c_str(), diff --git a/src/link/lexer.cpp b/src/link/lexer.cpp new file mode 100644 index 00000000..382c0541 --- /dev/null +++ b/src/link/lexer.cpp @@ -0,0 +1,361 @@ +// SPDX-License-Identifier: MIT + +#include "link/lexer.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "helpers.hpp" +#include "itertools.hpp" +#include "util.hpp" + +#include "link/warning.hpp" +// Include this last so it gets all type & constant definitions +#include "script.hpp" // For token definitions, generated from script.y + +struct LexerStackEntry { + std::filebuf file; + std::string path; + uint32_t lineNo; + + explicit LexerStackEntry(std::string &&path_) : file(), path(path_), lineNo(1) {} +}; + +static std::vector lexerStack; + +void lexer_Error(char const *fmt, ...) { + LexerStackEntry &context = lexerStack.back(); + va_list args; + va_start(args, fmt); + scriptError(context.path.c_str(), context.lineNo, fmt, args); + va_end(args); +} + +void lexer_IncludeFile(std::string &&path) { + // `.emplace_back` can invalidate references to the stack's elements! + // This is why `newContext` must be gotten before `prevContext`. + LexerStackEntry &newContext = lexerStack.emplace_back(std::move(path)); + LexerStackEntry &prevContext = lexerStack[lexerStack.size() - 2]; + + if (!newContext.file.open(newContext.path, std::ios_base::in)) { + // `.pop_back()` will invalidate `newContext`, which is why `path` must be moved first. + std::string badPath = std::move(newContext.path); + lexerStack.pop_back(); + // This error will occur in `prevContext`, *before* incrementing the line number! + lexer_Error( + "Failed to open included linker script \"%s\": %s", badPath.c_str(), strerror(errno) + ); + } + + // `.pop_back()` cannot invalidate an unpopped reference, so `prevContext` + // is still valid even if `.open()` failed. + ++prevContext.lineNo; +} + +void lexer_IncLineNo() { + ++lexerStack.back().lineNo; +} + +static bool isWhiteSpace(int c) { + return c == ' ' || c == '\t'; +} + +static bool isNewline(int c) { + return c == '\r' || c == '\n'; +} + +yy::parser::symbol_type yylex(); // Forward declaration for `yywrap` + +static yy::parser::symbol_type yywrap() { + static bool atEof = false; + if (lexerStack.size() != 1) { + if (!atEof) { + // Inject a newline at EOF to simplify parsing. + atEof = true; + return yy::parser::make_newline(); + } + lexerStack.pop_back(); + return yylex(); + } + if (!atEof) { + // Inject a newline at EOF to simplify parsing. + atEof = true; + return yy::parser::make_newline(); + } + return yy::parser::make_YYEOF(); +} + +static bool isIdentChar(int c) { + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9'); +} + +static std::string readIdent(int c) { + LexerStackEntry &context = lexerStack.back(); + std::string ident; + ident.push_back(c); + for (c = context.file.sgetc(); isIdentChar(c); c = context.file.snextc()) { + ident.push_back(c); + } + return ident; +} + +static bool isDecDigit(int c) { + return c >= '0' && c <= '9'; +} + +static yy::parser::symbol_type parseDecNumber(int c) { + LexerStackEntry &context = lexerStack.back(); + uint32_t number = c - '0'; + for (c = context.file.sgetc(); isDecDigit(c) || c == '_'; c = context.file.sgetc()) { + if (c != '_') { + number = number * 10 + (c - '0'); + } + context.file.sbumpc(); + } + return yy::parser::make_number(number); +} + +static bool isBinDigit(int c) { + return c >= '0' && c <= '1'; +} + +static yy::parser::symbol_type parseBinNumber(char const *prefix) { + LexerStackEntry &context = lexerStack.back(); + int c = context.file.sgetc(); + if (!isBinDigit(c)) { + lexer_Error("No binary digits found after '%s'", prefix); + return yy::parser::make_number(0); + } + + uint32_t number = c - '0'; + context.file.sbumpc(); + for (c = context.file.sgetc(); isBinDigit(c) || c == '_'; c = context.file.sgetc()) { + if (c != '_') { + number = number * 2 + (c - '0'); + } + context.file.sbumpc(); + } + return yy::parser::make_number(number); +} + +static bool isOctDigit(int c) { + return c >= '0' && c <= '7'; +} + +static yy::parser::symbol_type parseOctNumber(char const *prefix) { + LexerStackEntry &context = lexerStack.back(); + int c = context.file.sgetc(); + if (!isOctDigit(c)) { + lexer_Error("No octal digits found after '%s'", prefix); + return yy::parser::make_number(0); + } + + uint32_t number = c - '0'; + context.file.sbumpc(); + for (c = context.file.sgetc(); isOctDigit(c) || c == '_'; c = context.file.sgetc()) { + if (c != '_') { + number = number * 8 + (c - '0'); + } + context.file.sbumpc(); + } + return yy::parser::make_number(number); +} + +static bool isHexDigit(int c) { + return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); +} + +static uint8_t parseHexDigit(int c) { + if (c >= '0' && c <= '9') { + return c - '0'; + } else if (c >= 'A' && c <= 'F') { + return c - 'A' + 10; + } else if (c >= 'a' && c <= 'f') { + return c - 'a' + 10; + } else { + unreachable_(); // LCOV_EXCL_LINE + } +} + +static yy::parser::symbol_type parseHexNumber(char const *prefix) { + LexerStackEntry &context = lexerStack.back(); + int c = context.file.sgetc(); + if (!isHexDigit(c)) { + lexer_Error("No hexadecimal digits found after '%s'", prefix); + return yy::parser::make_number(0); + } + + uint32_t number = parseHexDigit(c); + context.file.sbumpc(); + for (c = context.file.sgetc(); isHexDigit(c) || c == '_'; c = context.file.sgetc()) { + if (c != '_') { + number = number * 16 + parseHexDigit(c); + } + context.file.sbumpc(); + } + return yy::parser::make_number(number); +} + +static yy::parser::symbol_type parseNumber(int c) { + LexerStackEntry &context = lexerStack.back(); + if (c == '0') { + switch (context.file.sgetc()) { + case 'x': + context.file.sbumpc(); + return parseHexNumber("0x"); + case 'X': + context.file.sbumpc(); + return parseHexNumber("0X"); + case 'o': + context.file.sbumpc(); + return parseOctNumber("0o"); + case 'O': + context.file.sbumpc(); + return parseOctNumber("0O"); + case 'b': + context.file.sbumpc(); + return parseBinNumber("0b"); + case 'B': + context.file.sbumpc(); + return parseBinNumber("0B"); + } + } + return parseDecNumber(c); +} + +static yy::parser::symbol_type parseString() { + LexerStackEntry &context = lexerStack.back(); + int c = context.file.sgetc(); + std::string str; + for (; c != '"'; c = context.file.sgetc()) { + if (c == EOF || isNewline(c)) { + lexer_Error("Unterminated string"); + break; + } + context.file.sbumpc(); + if (c == '\\') { + c = context.file.sgetc(); + if (c == EOF || isNewline(c)) { + lexer_Error("Unterminated string"); + break; + } else if (c == 'n') { + c = '\n'; + } else if (c == 'r') { + c = '\r'; + } else if (c == 't') { + c = '\t'; + } else if (c == '0') { + c = '\0'; + } else if (c != '\\' && c != '"' && c != '\'') { + lexer_Error("Cannot escape character %s", printChar(c)); + } + context.file.sbumpc(); + } + str.push_back(c); + } + if (c == '"') { + context.file.sbumpc(); + } + return yy::parser::make_string(std::move(str)); +} + +struct Keyword { + std::string_view name; + yy::parser::symbol_type (*tokenGen)(); +}; + +using namespace std::literals; + +static std::array keywords{ + Keyword{"ORG"sv, yy::parser::make_ORG }, + Keyword{"FLOATING"sv, yy::parser::make_FLOATING}, + Keyword{"INCLUDE"sv, yy::parser::make_INCLUDE }, + Keyword{"ALIGN"sv, yy::parser::make_ALIGN }, + Keyword{"DS"sv, yy::parser::make_DS }, + Keyword{"OPTIONAL"sv, yy::parser::make_OPTIONAL}, +}; + +yy::parser::symbol_type yylex() { + LexerStackEntry &context = lexerStack.back(); + int c = context.file.sbumpc(); + + // First, skip leading whitespace. + while (isWhiteSpace(c)) { + c = context.file.sbumpc(); + } + // Then, skip a comment if applicable. + if (c == ';') { + while (c != EOF && !isNewline(c)) { + c = context.file.sbumpc(); + } + } + + // Alright, what token should we return? + if (c == EOF) { + return yywrap(); + } else if (c == ',') { + return yy::parser::make_COMMA(); + } else if (isNewline(c)) { + // Handle CRLF. + if (c == '\r' && context.file.sgetc() == '\n') { + context.file.sbumpc(); + } + return yy::parser::make_newline(); + } else if (c == '"') { + return parseString(); + } else if (c == '$') { + return parseHexNumber("$"); + } else if (c == '%') { + return parseBinNumber("%"); + } else if (c == '&') { + return parseOctNumber("&"); + } else if (isDecDigit(c)) { + return parseNumber(c); + } else if (isIdentChar(c)) { // Note that we match these *after* digit characters! + std::string ident = readIdent(c); + + auto strUpperCmp = [](char cmp, char ref) { return toupper(cmp) == ref; }; + + for (SectionType type : EnumSeq(SECTTYPE_INVALID)) { + if (std::equal(RANGE(ident), RANGE(sectionTypeInfo[type].name), strUpperCmp)) { + return yy::parser::make_sect_type(type); + } + } + + for (Keyword const &keyword : keywords) { + if (std::equal(RANGE(ident), RANGE(keyword.name), strUpperCmp)) { + return keyword.tokenGen(); + } + } + + lexer_Error("Unknown keyword \"%s\"", ident.c_str()); + return yylex(); + } else { + lexer_Error("Unexpected character %s", printChar(c)); + // Keep reading characters until the EOL, to avoid reporting too many errors. + for (c = context.file.sgetc(); !isNewline(c); c = context.file.sgetc()) { + if (c == EOF) { + break; + } + context.file.sbumpc(); + } + return yylex(); + } + // Not marking as unreachable; this will generate a warning if any codepath forgets to return. +} + +bool lexer_Init(char const *linkerScriptName) { + if (LexerStackEntry &newContext = lexerStack.emplace_back(std::string(linkerScriptName)); + !newContext.file.open(newContext.path, std::ios_base::in)) { + error("Failed to open linker script \"%s\"", linkerScriptName); + lexerStack.clear(); + return false; + } + return true; +} diff --git a/src/link/main.cpp b/src/link/main.cpp index ba282d65..52d52133 100644 --- a/src/link/main.cpp +++ b/src/link/main.cpp @@ -18,7 +18,7 @@ #include "version.hpp" #include "link/assign.hpp" -#include "link/layout.hpp" +#include "link/lexer.hpp" #include "link/object.hpp" #include "link/output.hpp" #include "link/patch.hpp" @@ -151,7 +151,7 @@ static void parseScrambleSpec(char const *spec) { // If this trips, `spec` must be pointing at a ',' or '=' (or NUL) due to the assumption if (regionNameLen == 0) { - argErr('S', "Missing region name"); + argError('S', "Missing region name"); if (*spec == '\0') { break; @@ -165,7 +165,7 @@ static void parseScrambleSpec(char const *spec) { // Find the next non-blank char after the region name's end spec += regionNameLen + strspn(&spec[regionNameLen], " \t"); if (*spec != '\0' && *spec != ',' && *spec != '=') { - argErr( + argError( 'S', "Unexpected '%c' after region name \"%.*s\"", *spec, @@ -188,7 +188,7 @@ static void parseScrambleSpec(char const *spec) { } if (region == SCRAMBLE_UNK) { - argErr('S', "Unknown region \"%.*s\"", regionNameFmtLen, regionName); + argError('S', "Unknown region \"%.*s\"", regionNameFmtLen, regionName); } if (*spec == '=') { @@ -197,13 +197,13 @@ static void parseScrambleSpec(char const *spec) { char *endptr; if (*spec == '\0' || *spec == ',') { - argErr('S', "Empty limit for region \"%.*s\"", regionNameFmtLen, regionName); + argError('S', "Empty limit for region \"%.*s\"", regionNameFmtLen, regionName); goto next; } limit = strtoul(spec, &endptr, 10); endptr += strspn(endptr, " \t"); if (*endptr != '\0' && *endptr != ',') { - argErr( + argError( 'S', "Invalid non-numeric limit for region \"%.*s\"", regionNameFmtLen, @@ -214,7 +214,7 @@ static void parseScrambleSpec(char const *spec) { spec = endptr; if (region != SCRAMBLE_UNK && limit > scrambleSpecs[region].max) { - argErr( + argError( 'S', "Limit for region \"%.*s\" may not exceed %" PRIu16, regionNameFmtLen, @@ -241,7 +241,7 @@ static void parseScrambleSpec(char const *spec) { // Only WRAMX can be implied, since ROMX and SRAM size may vary options.scrambleWRAMX = 7; } else { - argErr('S', "Cannot imply limit for region \"%.*s\"", regionNameFmtLen, regionName); + argError('S', "Cannot imply limit for region \"%.*s\"", regionNameFmtLen, regionName); } next: // Can't `continue` a `for` loop with this nontrivial iteration logic @@ -310,7 +310,7 @@ int main(int argc, char *argv[]) { unsigned long value = strtoul(musl_optarg, &endptr, 0); if (musl_optarg[0] == '\0' || *endptr != '\0' || value > 0xFF) { - argErr('p', "Argument for 'p' must be a byte (between 0 and 0xFF)"); + argError('p', "Argument for 'p' must be a byte (between 0 and 0xFF)"); value = 0xFF; } options.padValue = value; diff --git a/src/link/script.y b/src/link/script.y index 2a504c89..8f19e781 100644 --- a/src/link/script.y +++ b/src/link/script.y @@ -12,8 +12,8 @@ } %code { + #include "link/lexer.hpp" #include "link/layout.hpp" - #include "link/warning.hpp" yy::parser::symbol_type yylex(); // Provided by layout.cpp } @@ -110,5 +110,5 @@ optional: /******************** Error handler ********************/ void yy::parser::error(std::string const &msg) { - scriptError(lexer_Context(), "%s", msg.c_str()); + lexer_Error("%s", msg.c_str()); } diff --git a/src/link/warning.cpp b/src/link/warning.cpp index 3cf677cb..98b5b13d 100644 --- a/src/link/warning.cpp +++ b/src/link/warning.cpp @@ -103,7 +103,7 @@ void errorNoDump(char const *fmt, ...) { warnings.incrementErrors(); } -void argErr(char flag, char const *fmt, ...) { +void argError(char flag, char const *fmt, ...) { va_list args; fprintf(stderr, "error: Invalid argument for option '%c': ", flag); va_start(args, fmt); @@ -114,6 +114,14 @@ void argErr(char flag, char const *fmt, ...) { warnings.incrementErrors(); } +void scriptError(char const *name, uint32_t lineNo, char const *fmt, va_list args) { + fprintf(stderr, "error: %s(%" PRIu32 "): ", name, lineNo); + vfprintf(stderr, fmt, args); + putc('\n', stderr); + + warnings.incrementErrors(); +} + [[noreturn]] void fatal(FileStackNode const *src, uint32_t lineNo, char const *fmt, ...) { va_list args; diff --git a/test/link/linkerscript-noexist.asm b/test/link/linkerscript-noexist.asm new file mode 100644 index 00000000..58cd5c5d --- /dev/null +++ b/test/link/linkerscript-noexist.asm @@ -0,0 +1,2 @@ +SECTION "test", ROM0 +db 1, 2, 3 diff --git a/test/link/linkerscript-noexist.link b/test/link/linkerscript-noexist.link new file mode 100644 index 00000000..11cee6de --- /dev/null +++ b/test/link/linkerscript-noexist.link @@ -0,0 +1,3 @@ +INCLUDE "linkerscript-noexist.inc" +ROM0 + "test" diff --git a/test/link/linkerscript-noexist.out b/test/link/linkerscript-noexist.out new file mode 100644 index 00000000..d6bacadf --- /dev/null +++ b/test/link/linkerscript-noexist.out @@ -0,0 +1,2 @@ +error: linkerscript-noexist.link(1): Failed to open included linker script "linkerscript-noexist.inc": No such file or directory +Linking failed with 1 error