From fd78a9ae83d62e800fa9106e6bcd3d583cd61c67 Mon Sep 17 00:00:00 2001 From: Eldred Habert Date: Mon, 11 Dec 2023 02:29:37 +0100 Subject: [PATCH] Port linkerscript parser to Bison (#1266) Notable side effects: * Use the standard-conformant MSVC preproc * Add test for linker script INCLUDE * Improve wording of placement conflict errors * Fix errors from not newline-terminated files * Teach checkdiff about the linker script doc * Call linker script "commands" "directives" instead --------- Co-authored-by: Rangi42 --- CMakeLists.txt | 3 +- Makefile | 14 +- contrib/checkdiff.bash | 3 + include/link/script.hpp | 21 +- include/linkdefs.hpp | 3 +- include/{asm => }/util.hpp | 1 + man/rgblink.5 | 16 +- src/CMakeLists.txt | 15 +- src/asm/charmap.cpp | 2 +- src/asm/lexer.cpp | 2 +- src/asm/parser.y | 2 +- src/asm/section.cpp | 6 +- src/asm/symbol.cpp | 2 +- src/link/.gitignore | 3 + src/link/assign.cpp | 6 +- src/link/main.cpp | 43 +- src/link/output.cpp | 4 +- src/link/script.cpp | 550 ---------------------- src/link/script.y | 490 +++++++++++++++++++ src/link/section.cpp | 7 +- src/linkdefs.cpp | 18 +- src/{asm => }/util.cpp | 5 +- test/link/linkerscript-escapes-test.out | 3 +- test/link/linkerscript-include.asm | 2 + test/link/linkerscript-include.inc | 1 + test/link/linkerscript-include.link | 3 + test/link/linkerscript-include.out | 2 + test/link/section-attributes-mismatch.out | 2 +- 28 files changed, 573 insertions(+), 656 deletions(-) rename include/{asm => }/util.hpp (93%) create mode 100644 src/link/.gitignore delete mode 100644 src/link/script.cpp create mode 100644 src/link/script.y rename src/{asm => }/util.cpp (92%) create mode 100644 test/link/linkerscript-include.asm create mode 100644 test/link/linkerscript-include.inc create mode 100644 test/link/linkerscript-include.link create mode 100644 test/link/linkerscript-include.out diff --git a/CMakeLists.txt b/CMakeLists.txt index 0720d148..df1c7cb3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,7 +23,8 @@ option(MORE_WARNINGS "Turn on more warnings" OFF) # Ignored on MSVC if(MSVC) # MSVC's standard library triggers warning C5105, # "macro expansion producing 'defined' has undefined behavior" - add_compile_options(/MP /wd5105) + # We also opt into the C++20-conformant preprocessor. + add_compile_options(/MP /wd5105 /Zc:preprocessor) add_definitions(/D_CRT_SECURE_NO_WARNINGS) if(SANITIZERS) diff --git a/Makefile b/Makefile index 38c2248a..97052ad3 100644 --- a/Makefile +++ b/Makefile @@ -65,14 +65,14 @@ rgbasm_obj := \ src/asm/rpn.o \ src/asm/section.o \ src/asm/symbol.o \ - src/asm/util.o \ src/asm/warning.o \ src/extern/getopt.o \ src/extern/utf8decoder.o \ src/error.o \ src/hashmap.o \ src/linkdefs.o \ - src/opmath.o + src/opmath.o \ + src/util.o src/asm/lexer.o src/asm/main.o: src/asm/parser.hpp @@ -91,7 +91,8 @@ rgblink_obj := \ src/error.o \ src/hashmap.o \ src/linkdefs.o \ - src/opmath.o + src/opmath.o \ + src/util.o rgbfix_obj := \ src/fix/main.o \ @@ -133,13 +134,13 @@ test/gfx/rgbgfx_test: test/gfx/rgbgfx_test.cpp # We want the Bison invocation to pass through our rules, not default ones .y.o: +.y.cpp: + $Q${BISON} $@ $< + # Bison-generated C++ files have an accompanying header src/asm/parser.hpp: src/asm/parser.cpp $Qtouch $@ -src/asm/parser.cpp: src/asm/parser.y - $Q${BISON} $@ $< - # Only RGBGFX uses libpng (POSIX make doesn't support pattern rules to cover all these) src/gfx/main.o: src/gfx/main.cpp $Q${CXX} ${REALCXXFLAGS} ${PNGCFLAGS} -c -o $@ $< @@ -172,6 +173,7 @@ clean: $Qfind . -type f \( -name "*.gcno" -o -name "*.gcda" -o -name "*.gcov" \) -exec rm {} \; $Q${RM} rgbshim.sh $Q${RM} src/asm/parser.cpp src/asm/parser.hpp + $Q${RM} src/link/script.cpp src/link/script.hpp src/link/stack.hh $Q${RM} test/gfx/randtilegen test/gfx/rgbgfx_test # Target used to install the binaries and man pages. diff --git a/contrib/checkdiff.bash b/contrib/checkdiff.bash index 18abb1c9..434314f0 100755 --- a/contrib/checkdiff.bash +++ b/contrib/checkdiff.bash @@ -26,6 +26,9 @@ dependency include/linkdefs.hpp man/rgbds.5 \ dependency src/asm/parser.y man/rgbasm.5 \ "Was the rgbasm grammar changed?" +dependency src/link/script.y man/rgblink.5 \ + "Was the linker script grammar changed?" + dependency include/asm/warning.hpp man/rgbasm.1 \ "Were the rgbasm warnings changed?" diff --git a/include/link/script.hpp b/include/link/script.hpp index 6cb098c8..2cd9b32c 100644 --- a/include/link/script.hpp +++ b/include/link/script.hpp @@ -8,26 +8,9 @@ #include "linkdefs.hpp" -extern FILE * linkerScript; - -struct SectionPlacement { - struct Section *section; - enum SectionType type; - uint16_t org; - uint32_t bank; -}; - -extern uint64_t script_lineNo; - /* - * Parses the linker script to return the next section constraint - * @return A pointer to a struct, or NULL on EOF. The pointer shouldn't be freed + * Parses the linker script, and modifies the sections mentioned within appropriately. */ -struct SectionPlacement *script_NextSection(void); - -/* - * `free`s all assignment memory that was allocated. - */ -void script_Cleanup(void); +void script_ProcessScript(char const *path); #endif // RGBDS_LINK_SCRIPT_H diff --git a/include/linkdefs.hpp b/include/linkdefs.hpp index db2e150a..eabbbef9 100644 --- a/include/linkdefs.hpp +++ b/include/linkdefs.hpp @@ -6,6 +6,7 @@ #include #include #include +#include #define RGBDS_OBJECT_VERSION_STRING "RGB9" #define RGBDS_OBJECT_REV 9U @@ -83,7 +84,7 @@ enum FileStackNodeType { // Nont-`const` members may be patched in RGBLINK depending on CLI flags extern struct SectionTypeInfo { - char const *const name; + std::string const name; uint16_t const startAddr; uint16_t size; uint32_t const firstBank; diff --git a/include/asm/util.hpp b/include/util.hpp similarity index 93% rename from include/asm/util.hpp rename to include/util.hpp index f8d73eaf..d0020d7c 100644 --- a/include/asm/util.hpp +++ b/include/util.hpp @@ -3,6 +3,7 @@ #ifndef RGBDS_UTIL_H #define RGBDS_UTIL_H +#include #include char const *printChar(int c); diff --git a/man/rgblink.5 b/man/rgblink.5 index 80f8cce1..c839d0d5 100644 --- a/man/rgblink.5 +++ b/man/rgblink.5 @@ -9,8 +9,8 @@ .Sh DESCRIPTION The linker script is an external file that allows the user to specify the order of sections at link time and in a centralized manner. .Pp -A linker script consists of a series of bank declarations, each optionally followed by a list of section names (in double quotes) or commands. -All reserved keywords (bank types and command names) are case-insensitive; all section names are case-sensitive. +A linker script consists of a series of bank declarations, each optionally followed by a list of section names (in double quotes) or directives. +All reserved keywords (bank types and directive names) are case-insensitive; all section names are case-sensitive. .Pp Any line can contain a comment starting with .Ql \&; @@ -19,17 +19,17 @@ that ends at the end of the line. ; This line is a comment ROMX $F ; start a bank "Some functions" ; a section name - ALIGN 8 ; a command + ALIGN 8 ; a directive "Some array" WRAMX 2 ; start another bank - org $d123 ; another command + org $d123 ; another directive "Some variables" .Ed .Pp Numbers can be in decimal or hexadecimal format .Pq the prefix is Ql $ . -It is an error if any section name or command is found before setting a bank. +It is an error if any section name or directive is found before setting a bank. .Pp Files can be included by using the .Ic INCLUDE @@ -43,7 +43,7 @@ Unless there is a single bank, which can occur with types .Cm ROMX , VRAM , SRAM and .Cm WRAMX , -it is needed to specify a bank number after the type. +it is mandatory to specify a bank number after the type. .Pp Section names in double quotes support the same character escape sequences as strings in .Xr rgbasm 5 , @@ -61,7 +61,7 @@ are only relevant to assembly code and do not apply in section names. When a new bank statement is found, sections found after it will be placed right from the beginning of that bank. If the linker script switches to a different bank and then comes back to a previous one, it will continue from the last address that was used. .Pp -The only three commands are +The only three directives are .Ic ORG , .Ic ALIGN , and @@ -74,7 +74,7 @@ It can not be lower than the current address. .It .Ic ALIGN will increase the address until it is aligned to the specified boundary -.Po it tries to set to 0 the number of bits specified after the command: +.Po it tries to set to 0 the number of bits specified after the directive: .Ql ALIGN 8 will align to $100 .Pc . diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9c1c6a12..f6dde0f1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -22,14 +22,20 @@ endif() set(BISON_FLAGS "${BISON_FLAGS} -Dparse.lac=full") set(BISON_FLAGS "${BISON_FLAGS} -Dlr.type=ielr") -BISON_TARGET(PARSER "asm/parser.y" +BISON_TARGET(ASM_PARSER "asm/parser.y" "${PROJECT_SOURCE_DIR}/src/asm/parser.cpp" COMPILE_FLAGS "${BISON_FLAGS}" DEFINES_FILE "${PROJECT_SOURCE_DIR}/src/asm/parser.hpp" ) +BISON_TARGET(LINKER_SCRIPT_PARSER "link/script.y" + "${PROJECT_SOURCE_DIR}/src/link/script.cpp" + COMPILE_FLAGS "${BISON_FLAGS}" + DEFINES_FILE "${PROJECT_SOURCE_DIR}/src/link/script.hpp" + ) + set(rgbasm_src - "${BISON_PARSER_OUTPUT_SOURCE}" + "${BISON_ASM_PARSER_OUTPUT_SOURCE}" "asm/charmap.cpp" "asm/fixpoint.cpp" "asm/format.cpp" @@ -42,12 +48,12 @@ set(rgbasm_src "asm/rpn.cpp" "asm/section.cpp" "asm/symbol.cpp" - "asm/util.cpp" "asm/warning.cpp" "extern/utf8decoder.cpp" "hashmap.cpp" "linkdefs.cpp" "opmath.cpp" + "util.cpp" ) set(rgbfix_src @@ -68,12 +74,12 @@ set(rgbgfx_src ) set(rgblink_src + "${BISON_LINKER_SCRIPT_PARSER_OUTPUT_SOURCE}" "link/assign.cpp" "link/main.cpp" "link/object.cpp" "link/output.cpp" "link/patch.cpp" - "link/script.cpp" "link/sdas_obj.cpp" "link/section.cpp" "link/symbol.cpp" @@ -81,6 +87,7 @@ set(rgblink_src "hashmap.cpp" "linkdefs.cpp" "opmath.cpp" + "util.cpp" ) foreach(PROG "asm" "fix" "gfx" "link") diff --git a/src/asm/charmap.cpp b/src/asm/charmap.cpp index d82b6282..f63efad2 100644 --- a/src/asm/charmap.cpp +++ b/src/asm/charmap.cpp @@ -10,10 +10,10 @@ #include "asm/charmap.hpp" #include "asm/main.hpp" #include "asm/output.hpp" -#include "asm/util.hpp" #include "asm/warning.hpp" #include "hashmap.hpp" +#include "util.hpp" // Charmaps are stored using a structure known as "trie". // Essentially a tree, where each nodes stores a single character's worth of info: diff --git a/src/asm/lexer.cpp b/src/asm/lexer.cpp index 5342afc4..9175160c 100644 --- a/src/asm/lexer.cpp +++ b/src/asm/lexer.cpp @@ -19,6 +19,7 @@ #endif #include "platform.hpp" // For `ssize_t` and `AT` +#include "util.hpp" #include "asm/lexer.hpp" #include "asm/fixpoint.hpp" @@ -28,7 +29,6 @@ #include "asm/main.hpp" #include "asm/rpn.hpp" #include "asm/symbol.hpp" -#include "asm/util.hpp" #include "asm/warning.hpp" // Include this last so it gets all type & constant definitions #include "parser.hpp" // For token definitions, generated from parser.y diff --git a/src/asm/parser.y b/src/asm/parser.y index a28bfb44..8b63035e 100644 --- a/src/asm/parser.y +++ b/src/asm/parser.y @@ -22,7 +22,7 @@ #include "asm/rpn.hpp" #include "asm/section.hpp" #include "asm/symbol.hpp" -#include "asm/util.hpp" +#include "util.hpp" #include "asm/warning.hpp" #include "extern/utf8decoder.hpp" diff --git a/src/asm/section.cpp b/src/asm/section.cpp index aaa3e848..1d9428ee 100644 --- a/src/asm/section.cpp +++ b/src/asm/section.cpp @@ -222,7 +222,7 @@ static void mergeSections(struct Section *sect, enum SectionType type, uint32_t unsigned int nbSectErrors = 0; if (type != sect->type) - fail("Section already exists but with type %s\n", sectionTypeInfo[sect->type].name); + fail("Section already exists but with type %s\n", sectionTypeInfo[sect->type].name.c_str()); if (sect->modifier != mod) { fail("Section already declared as %s section\n", sectionModNames[sect->modifier]); @@ -314,7 +314,7 @@ static struct Section *getSection(char const *name, enum SectionType type, uint3 error("BANK only allowed for ROMX, WRAMX, SRAM, or VRAM sections\n"); else if (bank < sectionTypeInfo[type].firstBank || bank > sectionTypeInfo[type].lastBank) error("%s bank value $%04" PRIx32 " out of range ($%04" PRIx32 " to $%04" - PRIx32 ")\n", sectionTypeInfo[type].name, bank, + PRIx32 ")\n", sectionTypeInfo[type].name.c_str(), bank, sectionTypeInfo[type].firstBank, sectionTypeInfo[type].lastBank); } else if (nbbanks(type) == 1) { // If the section type only has a single bank, implicitly force it @@ -349,7 +349,7 @@ static struct Section *getSection(char const *name, enum SectionType type, uint3 alignment = 0; // Ignore it if it's satisfied } else if (sectionTypeInfo[type].startAddr & mask) { error("Section \"%s\"'s alignment cannot be attained in %s\n", - name, sectionTypeInfo[type].name); + name, sectionTypeInfo[type].name.c_str()); alignment = 0; // Ignore it if it's unattainable org = 0; } else if (alignment == 16) { diff --git a/src/asm/symbol.cpp b/src/asm/symbol.cpp index 546ae5dd..2fe5c3c4 100644 --- a/src/asm/symbol.cpp +++ b/src/asm/symbol.cpp @@ -18,7 +18,7 @@ #include "asm/output.hpp" #include "asm/section.hpp" #include "asm/symbol.hpp" -#include "asm/util.hpp" +#include "util.hpp" #include "asm/warning.hpp" #include "error.hpp" diff --git a/src/link/.gitignore b/src/link/.gitignore new file mode 100644 index 00000000..dc5ed7c2 --- /dev/null +++ b/src/link/.gitignore @@ -0,0 +1,3 @@ +/script.cpp +/script.hpp +/stack.hh diff --git a/src/link/assign.cpp b/src/link/assign.cpp index d9657c6f..4c5f51c4 100644 --- a/src/link/assign.cpp +++ b/src/link/assign.cpp @@ -298,16 +298,16 @@ static void placeSection(struct Section *section) // If a section failed to go to several places, nothing we can report if (!section->isBankFixed || !section->isAddressFixed) errx("Unable to place \"%s\" (%s section) %s", - section->name, sectionTypeInfo[section->type].name, where); + section->name, sectionTypeInfo[section->type].name.c_str(), where); // If the section just can't fit the bank, report that else if (section->org + section->size > endaddr(section->type) + 1) errx("Unable to place \"%s\" (%s section) %s: section runs past end of region ($%04x > $%04x)", - section->name, sectionTypeInfo[section->type].name, where, + section->name, sectionTypeInfo[section->type].name.c_str(), where, section->org + section->size, endaddr(section->type) + 1); // Otherwise there is overlap with another section else errx("Unable to place \"%s\" (%s section) %s: section overlaps with \"%s\"", - section->name, sectionTypeInfo[section->type].name, where, + section->name, sectionTypeInfo[section->type].name.c_str(), where, out_OverlappingSection(section)->name); } diff --git a/src/link/main.cpp b/src/link/main.cpp index e487eeb6..519ad99a 100644 --- a/src/link/main.cpp +++ b/src/link/main.cpp @@ -29,7 +29,7 @@ #include "version.hpp" bool isDmgMode; // -d -char *linkerScriptName; // -l +char *linkerScriptName; // -l char const *mapFileName; // -m bool noSymInMap; // -M char const *symFileName; // -n @@ -45,6 +45,8 @@ bool beVerbose; // -v bool isWRA0Mode; // -w bool disablePadding; // -x +FILE *linkerScript; + static uint32_t nbErrors = 0; // Helper function to dump a file stack to stderr @@ -460,44 +462,7 @@ int main(int argc, char *argv[]) if (linkerScriptName) { verbosePrint("Reading linker script...\n"); - linkerScript = openFile(linkerScriptName, "r"); - - // Modify all sections according to the linker script - struct SectionPlacement *placement; - - while ((placement = script_NextSection())) { - struct Section *section = placement->section; - - assert(section->offset == 0); - // Check if this doesn't conflict with what the code says - if (section->type == SECTTYPE_INVALID) { - for (struct Section *sect = section; sect; sect = sect->nextu) - sect->type = placement->type; // SDCC "unknown" sections - } else if (section->type != placement->type) { - error(NULL, 0, "Linker script contradicts \"%s\"'s type", - section->name); - } - if (section->isBankFixed && placement->bank != section->bank) - error(NULL, 0, "Linker script contradicts \"%s\"'s bank placement", - section->name); - if (section->isAddressFixed && placement->org != section->org) - error(NULL, 0, "Linker script contradicts \"%s\"'s address placement", - section->name); - if (section->isAlignFixed - && (placement->org & section->alignMask) != 0) - error(NULL, 0, "Linker script contradicts \"%s\"'s alignment", - section->name); - - section->isAddressFixed = true; - section->org = placement->org; - section->isBankFixed = true; - section->bank = placement->bank; - section->isAlignFixed = false; // The alignment is satisfied - } - - fclose(linkerScript); - - script_Cleanup(); + script_ProcessScript(linkerScriptName); // If the linker script produced any errors, some sections may be in an invalid state if (nbErrors != 0) diff --git a/src/link/output.cpp b/src/link/output.cpp index ff30001e..d5450453 100644 --- a/src/link/output.cpp +++ b/src/link/output.cpp @@ -430,7 +430,7 @@ static void writeEmptySpace(uint16_t begin, uint16_t end) static void writeMapBank(struct SortedSections const *sectList, enum SectionType type, uint32_t bank) { - fprintf(mapFile, "\n%s bank #%" PRIu32 ":\n", sectionTypeInfo[type].name, + fprintf(mapFile, "\n%s bank #%" PRIu32 ":\n", sectionTypeInfo[type].name.c_str(), bank + sectionTypeInfo[type].firstBank); uint16_t used = 0; @@ -540,7 +540,7 @@ static void writeMapSummary(void) } fprintf(mapFile, "\t%s: %" PRId32 " byte%s used / %" PRId32 " free", - sectionTypeInfo[type].name, usedTotal, usedTotal == 1 ? "" : "s", + sectionTypeInfo[type].name.c_str(), usedTotal, usedTotal == 1 ? "" : "s", nbBanks * sectionTypeInfo[type].size - usedTotal); if (sectionTypeInfo[type].firstBank != sectionTypeInfo[type].lastBank || nbBanks > 1) diff --git a/src/link/script.cpp b/src/link/script.cpp deleted file mode 100644 index 77ab5e5d..00000000 --- a/src/link/script.cpp +++ /dev/null @@ -1,550 +0,0 @@ -/* SPDX-License-Identifier: MIT */ - -#include -#include -#include -#include -#include -#include - -#include "link/main.hpp" -#include "link/script.hpp" -#include "link/section.hpp" - -#include "error.hpp" -#include "itertools.hpp" -#include "linkdefs.hpp" -#include "platform.hpp" - -FILE *linkerScript; -char *includeFileName; - -static uint32_t lineNo; - -struct FileNode { - FILE *file; - uint32_t lineNo; - char *name; -}; - -static struct FileNode *fileStack; - -static uint32_t fileStackSize; -static uint32_t fileStackIndex; - -static void pushFile(char *newFileName) -{ - if (fileStackIndex == UINT32_MAX) - errx("%s(%" PRIu32 "): INCLUDE recursion limit reached", - linkerScriptName, lineNo); - - if (fileStackIndex == fileStackSize) { - if (!fileStackSize) // Init file stack - fileStackSize = 4; - fileStackSize *= 2; - fileStack = (struct FileNode *)realloc(fileStack, sizeof(*fileStack) * fileStackSize); - if (!fileStack) - err("%s(%" PRIu32 "): Internal INCLUDE error", - linkerScriptName, lineNo); - } - - fileStack[fileStackIndex].file = linkerScript; - fileStack[fileStackIndex].lineNo = lineNo; - fileStack[fileStackIndex].name = linkerScriptName; - fileStackIndex++; - - linkerScript = fopen(newFileName, "r"); - if (!linkerScript) - err("%s(%" PRIu32 "): Could not open \"%s\"", - linkerScriptName, lineNo, newFileName); - lineNo = 1; - linkerScriptName = newFileName; -} - -static bool popFile(void) -{ - if (!fileStackIndex) - return false; - - free(linkerScriptName); - - fileStackIndex--; - linkerScript = fileStack[fileStackIndex].file; - lineNo = fileStack[fileStackIndex].lineNo; - linkerScriptName = fileStack[fileStackIndex].name; - - return true; -} - -static bool isWhiteSpace(int c) -{ - return c == ' ' || c == '\t'; -} - -static bool isNewline(int c) -{ - return c == '\r' || c == '\n'; -} - -/* - * Try parsing a number, in base 16 if it begins with a dollar, - * in base 10 otherwise - * @param str The number to parse - * @param number A pointer where the number will be written to - * @return True if parsing was successful, false otherwise - */ -static bool tryParseNumber(char const *str, uint32_t *number) -{ - static char const digits[] = { - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', - 'A', 'B', 'C', 'D', 'E', 'F' - }; - uint8_t base = 10; - - if (*str == '$') { - str++; - base = 16; - } - - // An empty string is not a number - if (!*str) - return false; - - *number = 0; - do { - char chr = toupper(*str++); - uint8_t digit = 0; - - while (digit < base) { - if (chr == digits[digit]) - break; - digit++; - } - if (digit == base) - return false; - *number = *number * base + digit; - } while (*str); - - return true; -} - -enum LinkerScriptTokenType { - TOKEN_NEWLINE, - TOKEN_COMMAND, - TOKEN_BANK, - TOKEN_INCLUDE, - TOKEN_NUMBER, - TOKEN_STRING, - TOKEN_EOF, - - TOKEN_INVALID -}; - -char const *tokenTypes[TOKEN_INVALID] = { - AT(TOKEN_NEWLINE) "newline", - AT(TOKEN_COMMAND) "command", - AT(TOKEN_BANK) "bank command", - AT(TOKEN_INCLUDE) NULL, - AT(TOKEN_NUMBER) "number", - AT(TOKEN_STRING) "string", - AT(TOKEN_EOF) "end of file", -}; - -enum LinkerScriptCommand { - COMMAND_ORG, - COMMAND_ALIGN, - COMMAND_DS, - - COMMAND_INVALID -}; - -union LinkerScriptTokenAttr { - enum LinkerScriptCommand command; - enum SectionType secttype; - uint32_t number; - char *string; -}; - - -struct LinkerScriptToken { - enum LinkerScriptTokenType type; - union LinkerScriptTokenAttr attr; -}; - -static char const * const commands[COMMAND_INVALID] = { - AT(COMMAND_ORG) "ORG", - AT(COMMAND_ALIGN) "ALIGN", - AT(COMMAND_DS) "DS" -}; - -static int nextChar(void) -{ - int curchar = getc(linkerScript); - - if (curchar == EOF && ferror(linkerScript)) - err("%s(%" PRIu32 "): Unexpected error in %s", - linkerScriptName, lineNo, __func__); - return curchar; -} - -static struct LinkerScriptToken *nextToken(void) -{ - static struct LinkerScriptToken token; - int curchar; - - // If the token has a string, make sure to avoid leaking it - if (token.type == TOKEN_STRING) - free(token.attr.string); - - // Skip initial whitespace... - do - curchar = nextChar(); - while (isWhiteSpace(curchar)); - - // If this is a comment, skip to the end of the line - if (curchar == ';') { - do { - curchar = nextChar(); - } while (!isNewline(curchar) && curchar != EOF); - } - - if (curchar == EOF) { - token.type = TOKEN_EOF; - } else if (isNewline(curchar)) { - // If we have a newline char, this is a newline token - token.type = TOKEN_NEWLINE; - - if (curchar == '\r') { - // Handle CRLF - curchar = nextChar(); - if (curchar != '\n') - ungetc(curchar, linkerScript); - } - } else if (curchar == '"') { - // If we have a string start, this is a string - token.type = TOKEN_STRING; - token.attr.string = NULL; // Force initial alloc - - size_t size = 0; - size_t capacity = 16; // Half of the default capacity - - do { - curchar = nextChar(); - if (curchar == EOF || isNewline(curchar)) { - errx("%s(%" PRIu32 "): Unterminated string", - linkerScriptName, lineNo); - } else if (curchar == '"') { - // Quotes force a string termination - curchar = '\0'; - } else if (curchar == '\\') { - // Backslashes are escape sequences - curchar = nextChar(); - if (curchar == EOF || isNewline(curchar)) - errx("%s(%" PRIu32 "): Unterminated string", - linkerScriptName, lineNo); - else if (curchar == 'n') - curchar = '\n'; - else if (curchar == 'r') - curchar = '\r'; - else if (curchar == 't') - curchar = '\t'; - else if (curchar != '\\' && curchar != '"') - errx("%s(%" PRIu32 "): Illegal character escape", - linkerScriptName, lineNo); - } - - if (size >= capacity || token.attr.string == NULL) { - capacity *= 2; - token.attr.string = (char *)realloc(token.attr.string, capacity); - if (!token.attr.string) - err("%s: Failed to allocate memory for string", - __func__); - } - token.attr.string[size++] = curchar; - } while (curchar); - } else { - // This is either a number, command or bank, that is: a word - char *str = NULL; - size_t size = 0; - size_t capacity = 8; // Half of the default capacity - - for (;;) { - if (size >= capacity || str == NULL) { - capacity *= 2; - str = (char *)realloc(str, capacity); - if (!str) - err("%s: Failed to allocate memory for token", - __func__); - } - str[size] = toupper(curchar); - size++; - - if (!curchar) - break; - - curchar = nextChar(); - // Whitespace, a newline or a comment end the token - if (isWhiteSpace(curchar) || isNewline(curchar) || curchar == ';') { - ungetc(curchar, linkerScript); - curchar = '\0'; - } - } - - token.type = TOKEN_INVALID; - - // Try to match a command - for (enum LinkerScriptCommand i : EnumSeq(COMMAND_INVALID)) { - if (!strcmp(commands[i], str)) { - token.type = TOKEN_COMMAND; - token.attr.command = i; - break; - } - } - - if (token.type == TOKEN_INVALID) { - // Try to match a bank specifier - for (enum SectionType type : EnumSeq(SECTTYPE_INVALID)) { - if (!strcmp(sectionTypeInfo[type].name, str)) { - token.type = TOKEN_BANK; - token.attr.secttype = type; - break; - } - } - } - - if (token.type == TOKEN_INVALID) { - // Try to match an include token - if (!strcmp("INCLUDE", str)) - token.type = TOKEN_INCLUDE; - } - - if (token.type == TOKEN_INVALID) { - // None of the strings matched, do we have a number? - if (tryParseNumber(str, &token.attr.number)) - token.type = TOKEN_NUMBER; - else - errx("%s(%" PRIu32 "): Unknown token \"%s\"", - linkerScriptName, lineNo, str); - } - - free(str); - } - - return &token; -} - -static void processCommand(enum LinkerScriptCommand command, uint16_t arg, uint16_t *pc) -{ - switch (command) { - case COMMAND_INVALID: - unreachable_(); - - case COMMAND_ORG: - break; - - case COMMAND_ALIGN: - if (arg >= 16) { - arg = 0; - } else { - uint16_t mask = (1 << arg) - 1; - - arg = (*pc + mask) & ~mask; - } - break; - - case COMMAND_DS: - arg += *pc; - } - - if (arg < *pc) - errx("%s(%" PRIu32 "): `%s` cannot be used to go backwards (currently at $%x)", - linkerScriptName, lineNo, commands[command], *pc); - *pc = arg; -} - -enum LinkerScriptParserState { - PARSER_FIRSTTIME, - PARSER_LINESTART, - PARSER_INCLUDE, // After an INCLUDE token - PARSER_LINEEND -}; - -// Part of internal state, but has data that needs to be freed -static uint16_t *curaddr[SECTTYPE_INVALID]; - -// Put as global to ensure it's initialized only once -static enum LinkerScriptParserState parserState = PARSER_FIRSTTIME; - -struct SectionPlacement *script_NextSection(void) -{ - static struct SectionPlacement placement; - static uint32_t bank; - static uint32_t bankID; - - if (parserState == PARSER_FIRSTTIME) { - lineNo = 1; - - // Init PC for all banks - for (enum SectionType i : EnumSeq(SECTTYPE_INVALID)) { - curaddr[i] = (uint16_t *)malloc(sizeof(*curaddr[i]) * nbbanks(i)); - for (uint32_t b = 0; b < nbbanks(i); b++) - curaddr[i][b] = sectionTypeInfo[i].startAddr; - } - - placement.type = SECTTYPE_INVALID; - - parserState = PARSER_LINESTART; - } - - for (;;) { - struct LinkerScriptToken *token = nextToken(); - enum LinkerScriptTokenType tokType; - union LinkerScriptTokenAttr attr; - bool hasArg; - uint32_t arg; - - if (placement.type != SECTTYPE_INVALID) { - if (curaddr[placement.type][bankID] > endaddr(placement.type) + 1) - errx("%s(%" PRIu32 "): Sections would extend past the end of %s ($%04" PRIx16 " > $%04" PRIx16 ")", - linkerScriptName, lineNo, sectionTypeInfo[placement.type].name, - curaddr[placement.type][bankID], endaddr(placement.type)); - if (curaddr[placement.type][bankID] < sectionTypeInfo[placement.type].startAddr) - errx("%s(%" PRIu32 "): PC underflowed ($%04" PRIx16 " < $%04" PRIx16 ")", - linkerScriptName, lineNo, - curaddr[placement.type][bankID], sectionTypeInfo[placement.type].startAddr); - } - - switch (parserState) { - case PARSER_FIRSTTIME: - unreachable_(); - - case PARSER_LINESTART: - switch (token->type) { - case TOKEN_INVALID: - unreachable_(); - - case TOKEN_EOF: - if (!popFile()) - return NULL; - parserState = PARSER_LINEEND; - break; - - case TOKEN_NUMBER: - errx("%s(%" PRIu32 "): stray number \"%" PRIu32 "\"", - linkerScriptName, lineNo, - token->attr.number); - - case TOKEN_NEWLINE: - lineNo++; - break; - - // A stray string is a section name - case TOKEN_STRING: - parserState = PARSER_LINEEND; - - if (placement.type == SECTTYPE_INVALID) - errx("%s(%" PRIu32 "): Didn't specify a location before the section", - linkerScriptName, lineNo); - - placement.section = - sect_GetSection(token->attr.string); - if (!placement.section) - errx("%s(%" PRIu32 "): Unknown section \"%s\"", - linkerScriptName, lineNo, - token->attr.string); - placement.org = curaddr[placement.type][bankID]; - placement.bank = bank; - - curaddr[placement.type][bankID] += placement.section->size; - return &placement; - - case TOKEN_COMMAND: - case TOKEN_BANK: - tokType = token->type; - attr = token->attr; - - token = nextToken(); - hasArg = token->type == TOKEN_NUMBER; - // Leaving `arg` uninitialized when `!hasArg` causes GCC to warn - // about its use as an argument to `processCommand`. This cannot - // happen because `hasArg` has to be true, but silence the warning - // anyways. I dislike doing this because it could swallow actual - // errors, but I don't have a choice. - arg = hasArg ? token->attr.number : 0; - - if (tokType == TOKEN_COMMAND) { - if (placement.type == SECTTYPE_INVALID) - errx("%s(%" PRIu32 "): Didn't specify a location before the command", - linkerScriptName, lineNo); - if (!hasArg) - errx("%s(%" PRIu32 "): Command specified without an argument", - linkerScriptName, lineNo); - - processCommand(attr.command, arg, &curaddr[placement.type][bankID]); - } else { // TOKEN_BANK - placement.type = attr.secttype; - // If there's only one bank, - // specifying the number is optional. - if (!hasArg && nbbanks(placement.type) != 1) - errx("%s(%" PRIu32 "): Didn't specify a bank number", - linkerScriptName, lineNo); - else if (!hasArg) - arg = sectionTypeInfo[placement.type].firstBank; - else if (arg < sectionTypeInfo[placement.type].firstBank) - errx("%s(%" PRIu32 "): specified bank number is too low (%" PRIu32 " < %" PRIu32 ")", - linkerScriptName, lineNo, - arg, sectionTypeInfo[placement.type].firstBank); - else if (arg > sectionTypeInfo[placement.type].lastBank) - errx("%s(%" PRIu32 "): specified bank number is too high (%" PRIu32 " > %" PRIu32 ")", - linkerScriptName, lineNo, - arg, sectionTypeInfo[placement.type].lastBank); - bank = arg; - bankID = arg - sectionTypeInfo[placement.type].firstBank; - } - - // If we read a token we shouldn't have... - if (token->type != TOKEN_NUMBER) - goto lineend; - break; - - case TOKEN_INCLUDE: - parserState = PARSER_INCLUDE; - break; - } - break; - - case PARSER_INCLUDE: - if (token->type != TOKEN_STRING) - errx("%s(%" PRIu32 "): Expected a file name after INCLUDE", - linkerScriptName, lineNo); - - // Switch to that file - pushFile(token->attr.string); - // The file stack took ownership of the string - token->attr.string = NULL; - - parserState = PARSER_LINESTART; - break; - - case PARSER_LINEEND: -lineend: - lineNo++; - parserState = PARSER_LINESTART; - if (token->type == TOKEN_EOF) { - if (!popFile()) - return NULL; - parserState = PARSER_LINEEND; - } else if (token->type != TOKEN_NEWLINE) - errx("%s(%" PRIu32 "): Unexpected %s at the end of the line", - linkerScriptName, lineNo, - tokenTypes[token->type]); - break; - } - } -} - -void script_Cleanup(void) -{ - for (enum SectionType type : EnumSeq(SECTTYPE_INVALID)) - free(curaddr[type]); -} diff --git a/src/link/script.y b/src/link/script.y new file mode 100644 index 00000000..fc5adeca --- /dev/null +++ b/src/link/script.y @@ -0,0 +1,490 @@ +%language "c++" +%define api.value.type variant +%define api.token.constructor + +%code requires { + #include + #include + + #include "linkdefs.hpp" +} +%code { + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + + #include "helpers.hpp" + #include "itertools.hpp" + #include "util.hpp" + + #include "link/main.hpp" + #include "link/section.hpp" + + using namespace std::literals; + + static void includeFile(std::string &&path); + static void incLineNo(void); + + static void setSectionType(SectionType type); + static void setSectionType(SectionType type, uint32_t bank); + static void setAddr(uint32_t addr); + static void alignTo(uint32_t alignment, uint32_t offset); + static void pad(uint32_t length); + static void placeSection(std::string const &name); + + static yy::parser::symbol_type yylex(void); + + struct Keyword { + std::string_view name; + yy::parser::symbol_type (* tokenGen)(void); + }; +} + +%token YYEOF 0 "end of file" +%token newline +%token ORG "ORG" + INCLUDE "INCLUDE" + ALIGN "ALIGN" + DS "DS" +%code { + static std::array keywords{ + Keyword{"ORG"sv, yy::parser::make_ORG}, + Keyword{"INCLUDE"sv, yy::parser::make_INCLUDE}, + Keyword{"ALIGN"sv, yy::parser::make_ALIGN}, + Keyword{"DS"sv, yy::parser::make_DS}, + }; +} +%token string; +%token number; +%token section_type; + +%% + +lines: %empty + | line lines +; + +line: INCLUDE string newline { includeFile(std::move($2)); } // Note: this additionally increments the line number! + | directive newline { incLineNo(); } + | newline { incLineNo(); } + | error newline { yyerrok; incLineNo(); } // Error recovery. +; + +directive: section_type { setSectionType($1); } + | section_type number { setSectionType($1, $2); } + | ORG number { setAddr($2); } + | ALIGN number { alignTo($2, 0); } + | DS number { pad($2); } + | string { placeSection($1); } +; + +%% + +#define scriptError(context, fmt, ...) \ + ::error(NULL, 0, "%s(%" PRIu32 "): " fmt, \ + context.path.c_str(), context.lineNo __VA_OPT__(,) __VA_ARGS__) + +// Lexer. + +struct LexerStackEntry { + std::filebuf file; + std::string path; + uint32_t lineNo; + + explicit LexerStackEntry(std::string &&path_) : file(), path(path_), lineNo(1) {} +}; +static std::vector lexerStack; +static bool atEof; + +void yy::parser::error(std::string const &msg) { + auto const &script = lexerStack.back(); + scriptError(script, "%s", msg.c_str()); +} + +static void includeFile(std::string &&path) { + // `emplace_back` can invalidate references to the stack's elements! + // This is why `newContext` must be gotten before `prevContext`. + auto &newContext = lexerStack.emplace_back(std::move(path)); + auto &prevContext = lexerStack[lexerStack.size() - 2]; + + if (!newContext.file.open(newContext.path, std::ios_base::in)) { + // The order is important: report the error, increment the line number, modify the stack! + scriptError(prevContext, "Could not open included linker script \"%s\"", + newContext.path.c_str()); + ++prevContext.lineNo; + lexerStack.pop_back(); + } else { + // The lexer will use the new entry to lex the next token. + ++prevContext.lineNo; + } +} + +static void incLineNo(void) { + ++lexerStack.back().lineNo; +} + +static bool isWhiteSpace(int c) { + return c == ' ' || c == '\t'; +} + +static bool isNewline(int c) { + return c == '\r' || c == '\n'; +} + +static bool isIdentChar(int c) { + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9'); +} + +static bool isDecDigit(int c) { + return c >= '0' && c <= '9'; +} + +static bool isBinDigit(int c) { + return c >= '0' && c <= '1'; +} + +static bool isHexDigit(int c) { + return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); +} + +static uint8_t parseHexDigit(int c) { + if (c >= '0' && c <= '9') { + return c - '0'; + } else if (c >= 'A' && c <= 'F') { + return c - 'A' + 10; + } else if (c >= 'a' && c <= 'f') { + return c - 'a' + 10; + } else { + unreachable_(); + } +} + +yy::parser::symbol_type yylex(void) { +try_again: // Can't use a `do {} while(0)` loop, otherwise compilers (wrongly) think it can end. + auto &context = lexerStack.back(); + auto c = context.file.sbumpc(); + + // First, skip leading whitespace. + while (isWhiteSpace(c)) { + c = context.file.sbumpc(); + } + // Then, skip a comment if applicable. + if (c == ';') { + while (!isNewline(c)) { + c = context.file.sbumpc(); + } + } + + // Alright, what token should we return? + if (c == EOF) { + // Basically yywrap(). + if (lexerStack.size() != 1) { + lexerStack.pop_back(); + goto try_again; + } else if (!atEof) { + // Inject a newline at EOF, to avoid errors for files that don't end with one. + atEof = true; + return yy::parser::make_newline(); + } else { + return yy::parser::make_YYEOF(); + } + } else if (isNewline(c)) { + // Handle CRLF. + if (c == '\r' && context.file.sgetc() == '\n') { + context.file.sbumpc(); + } + return yy::parser::make_newline(); + } else if (c == '"') { + std::string str; + + for (c = context.file.sgetc(); c != '"'; c = context.file.sgetc()) { + if (c == EOF || isNewline(c)) { + scriptError(context, "Unterminated string"); + break; + } + context.file.sbumpc(); + if (c == '\\') { + c = context.file.sgetc(); + if (c == EOF || isNewline(c)) { + scriptError(context, "Unterminated string"); + break; + } else if (c == 'n') { + c = '\n'; + } else if (c == 'r') { + c = '\r'; + } else if (c == 't') { + c = '\t'; + } else if (c != '\\' && c != '"') { + scriptError(context, "Cannot escape character %s", printChar(c)); + } + context.file.sbumpc(); + } + str.push_back(c); + } + context.file.sbumpc(); // Consume the closing quote. + + return yy::parser::make_string(std::move(str)); + } else if (c == '$') { + c = context.file.sgetc(); + if (!isHexDigit(c)) { + scriptError(context, "No hexadecimal digits found after '$'"); + return yy::parser::make_number(0); + } + + uint32_t number = parseHexDigit(c); + context.file.sbumpc(); + for (c = context.file.sgetc(); isHexDigit(c); c = context.file.sgetc()) { + number = number * 16 + parseHexDigit(c); + context.file.sbumpc(); + } + return yy::parser::make_number(number); + } else if (c == '%') { + c = context.file.sgetc(); + if (!isBinDigit(c)) { + scriptError(context, "No binary digits found after '%%'"); + return yy::parser::make_number(0); + } + + uint32_t number = c - '0'; + context.file.sbumpc(); + for (c = context.file.sgetc(); isBinDigit(c); c = context.file.sgetc()) { + number = number * 2 + (c - '0'); + context.file.sbumpc(); + } + return yy::parser::make_number(number); + } else if (isDecDigit(c)) { + uint32_t number = c - '0'; + for (c = context.file.sgetc(); isDecDigit(c); c = context.file.sgetc()) { + number = number * 10 + (c - '0'); + } + return yy::parser::make_number(number); + } else if (isIdentChar(c)) { // Note that we match these *after* digit characters! + std::string ident; + auto strUpperCmp = [](char cmp, char ref) { + // `locale::classic()` yields the "C" locale. + assert(!std::use_facet>(std::locale::classic()) + .is(std::ctype_base::lower, ref)); + return std::use_facet>(std::locale::classic()) + .toupper(cmp) == ref; + }; + + ident.push_back(c); + for (c = context.file.sgetc(); isIdentChar(c); c = context.file.snextc()) { + ident.push_back(c); + } + + for (SectionType type : EnumSeq(SECTTYPE_INVALID)) { + if (std::equal(ident.begin(), ident.end(), + sectionTypeInfo[type].name.begin(), sectionTypeInfo[type].name.end(), + strUpperCmp)) { + return yy::parser::make_section_type(type); + } + } + + for (Keyword const &keyword : keywords) { + if (std::equal(ident.begin(), ident.end(), + keyword.name.begin(), keyword.name.end(), + strUpperCmp)) { + return keyword.tokenGen(); + } + } + + scriptError(context, "Unknown keyword \"%s\"", ident.c_str()); + goto try_again; // Try lexing another token. + } else { + scriptError(context, "Unexpected character '%s'", printChar(c)); + // Keep reading characters until the EOL, to avoid reporting too many errors. + for (c = context.file.sgetc(); !isNewline(c); c = context.file.sgetc()) { + if (c == EOF) { + break; + } + } + goto try_again; + } + // Not marking as unreachable; this will generate a warning if any codepath forgets to return. +} + +// Semantic actions. + +static std::array, SECTTYPE_INVALID> curAddr; +static SectionType activeType; // Index into curAddr +static uint32_t activeBankIdx; // Index into curAddr[activeType] + +static void setActiveTypeAndIdx(SectionType type, uint32_t idx) { + activeType = type; + activeBankIdx = idx; + if (curAddr[activeType].size() <= activeBankIdx) { + curAddr[activeType].resize(activeBankIdx + 1, sectionTypeInfo[type].startAddr); + } +} + +static void setSectionType(SectionType type) { + auto const &context = lexerStack.back(); + + if (nbbanks(type) != 1) { + scriptError(context, "A bank number must be specified for %s", + sectionTypeInfo[type].name.c_str()); + // Keep going with a default value for the bank index. + } + + setActiveTypeAndIdx(type, 0); // There is only a single bank anyway, so just set the index to 0. +} + +static void setSectionType(SectionType type, uint32_t bank) { + auto const &context = lexerStack.back(); + auto const &typeInfo = sectionTypeInfo[type]; + + if (bank < typeInfo.firstBank) { + scriptError(context, "%s bank %" PRIu32 " doesn't exist, the minimum is %" PRIu32, + typeInfo.name.c_str(), bank, typeInfo.firstBank); + bank = typeInfo.firstBank; + } else if (bank > typeInfo.lastBank) { + scriptError(context, "%s bank %" PRIu32 " doesn't exist, the maximum is %" PRIu32, + typeInfo.name.c_str(), bank, typeInfo.lastBank); + } + + setActiveTypeAndIdx(type, bank - typeInfo.firstBank); +} + +static void setAddr(uint32_t addr) { + auto const &context = lexerStack.back(); + auto &pc = curAddr[activeType][activeBankIdx]; + auto const &typeInfo = sectionTypeInfo[activeType]; + + if (addr < pc) { + scriptError(context, "ORG cannot be used to go backwards (from $%04x to $%04x)", pc, addr); + } else if (addr > endaddr(activeType)) { // Allow "one past the end" sections. + scriptError(context, "Cannot go to $%04" PRIx32 ": %s ends at $%04" PRIx16 "", + addr, typeInfo.name.c_str(), endaddr(activeType)); + pc = endaddr(activeType); + } else { + pc = addr; + } +} + +static void alignTo(uint32_t alignment, uint32_t alignOfs) { + auto const &context = lexerStack.back(); + auto const &typeInfo = sectionTypeInfo[activeType]; + auto &pc = curAddr[activeType][activeBankIdx]; + + // TODO: maybe warn if truncating? + alignOfs %= 1 << alignment; + + assert(pc >= typeInfo.startAddr); + uint16_t length = alignment < 16 ? (uint16_t)(alignOfs - pc) % (1u << alignment) + : alignOfs - pc; // Let it wrap around, this'll trip the check. + if (uint16_t offset = pc - typeInfo.startAddr; length > typeInfo.size - offset) { + scriptError(context, "Cannot align: the next suitable address after $%04" PRIx16 " is $%04" PRIx16 ", past $%04" PRIx16, + pc, (uint16_t)(pc + length), (uint16_t)(endaddr(activeType) + 1)); + } else { + pc += length; + } +} + +static void pad(uint32_t length) { + auto const &context = lexerStack.back(); + auto const &typeInfo = sectionTypeInfo[activeType]; + auto &pc = curAddr[activeType][activeBankIdx]; + + assert(pc >= typeInfo.startAddr); + if (uint16_t offset = pc - typeInfo.startAddr; length + offset > typeInfo.size) { + scriptError(context, "Cannot pad by %u bytes: only %u bytes to $%04" PRIx16, + length, typeInfo.size - offset, (uint16_t)(endaddr(activeType) + 1)); + } else { + pc += length; + } +} + +static void placeSection(std::string const &name) { + auto const &context = lexerStack.back(); + auto const &typeInfo = sectionTypeInfo[activeType]; + + // A type *must* be active. + if (activeType == SECTTYPE_INVALID) { + scriptError(context, "No memory region has been specified to place section \"%s\" in", + name.c_str()); + return; + } + + auto *section = sect_GetSection(name.c_str()); + if (!section) { + scriptError(context, "Unknown section \"%s\"", name.c_str()); + return; + } + + assert(section->offset == 0); + // Check that the linker script doesn't contradict what the code says. + if (section->type == SECTTYPE_INVALID) { + // SDCC areas don't have a type assigned yet, so the linker script is used to give them one. + for (Section *fragment = section; fragment; fragment = fragment->nextu) { + fragment->type = activeType; + } + } else if (section->type != activeType) { + scriptError(context, "\"%s\" is specified to be a %s section, but it is already a %s section", + name.c_str(), typeInfo.name.c_str(), sectionTypeInfo[section->type].name.c_str()); + } + + uint32_t bank = activeBankIdx + typeInfo.firstBank; + if (section->isBankFixed && bank != section->bank) { + scriptError(context, "The linker script places section \"%s\" in %s bank %" PRIu32 ", but it was already defined in bank %" PRIu32, + name.c_str(), sectionTypeInfo[section->type].name.c_str(), bank, section->bank); + } + section->isBankFixed = true; + section->bank = bank; + + uint16_t &org = curAddr[activeType][activeBankIdx]; + if (section->isAddressFixed && org != section->org) { + scriptError(context, "The linker script assigns section \"%s\" to address $%04" PRIx16 ", but it was already at $%04" PRIx16, + name.c_str(), org, section->org); + } else if (section->isAlignFixed && (org & section->alignMask) != section->alignOfs) { + uint8_t alignment = std::countr_one(section->alignMask); + scriptError(context, "The linker script assigns section \"%s\" to address $%04" PRIx16 ", but that would be ALIGN[%" PRIu8 ", %" PRIu16 "] instead of the requested ALIGN[%" PRIu8 ", %" PRIu16 "]", + name.c_str(), org, alignment, (uint16_t)(org & section->alignMask), alignment, section->alignOfs); + } + section->isAddressFixed = true; + section->isAlignFixed = false; // This can't be set when the above is. + section->org = org; + + uint16_t curOfs = org - typeInfo.startAddr; + if (section->size > typeInfo.size - curOfs) { + scriptError(context, "The linker script assigns section \"%s\" to address $%04" PRIx16 ", but then it would overflow %s by %" PRIx16 " bytes", + name.c_str(), org, typeInfo.name.c_str(), + (uint16_t)(section->size - (typeInfo.size - curOfs))); + // Fill as much as possible without going out of bounds. + org = typeInfo.startAddr + typeInfo.size; + } else { + org += section->size; + } +} + +// External API. + +void script_ProcessScript(char const *path) { + activeType = SECTTYPE_INVALID; + + lexerStack.clear(); + atEof = false; + auto &newContext = lexerStack.emplace_back(std::string(path)); + + if (!newContext.file.open(newContext.path, std::ios_base::in)) { + error(NULL, 0, "Could not open linker script \"%s\"", newContext.path.c_str()); + lexerStack.clear(); + } else { + yy::parser linkerScriptParser; + // We don't care about the return value, as any error increments the global error count, + // which is what `main` checks. + (void)linkerScriptParser.parse(); + + // Free up working memory. + for (auto ®ion : curAddr) { + region.clear(); + } + } +} diff --git a/src/link/section.cpp b/src/link/section.cpp index 3308f9b0..4c5565e4 100644 --- a/src/link/section.cpp +++ b/src/link/section.cpp @@ -128,7 +128,8 @@ static void mergeSections(struct Section *target, struct Section *other, enum Se if (target->type != other->type) errx("Section \"%s\" is defined with conflicting types %s and %s", - other->name, sectionTypeInfo[target->type].name, sectionTypeInfo[other->type].name); + other->name, sectionTypeInfo[target->type].name.c_str(), + sectionTypeInfo[other->type].name.c_str()); if (other->isBankFixed) { if (!target->isBankFixed) { @@ -202,7 +203,7 @@ void sect_AddSection(struct Section *section) mergeSections(other, section, section->modifier); } else if (section->modifier == SECTION_UNION && sect_HasData(section->type)) { errx("Section \"%s\" is of type %s, which cannot be unionized", - section->name, sectionTypeInfo[section->type].name); + section->name, sectionTypeInfo[section->type].name.c_str()); } else { // If not, add it hash_AddElement(sections, section->name, section); @@ -254,7 +255,7 @@ static void doSanityChecks(struct Section *section, void *) // Too large an alignment may not be satisfiable if (section->isAlignFixed && (section->alignMask & sectionTypeInfo[section->type].startAddr)) error(NULL, 0, "%s: %s sections cannot be aligned to $%04x bytes", - section->name, sectionTypeInfo[section->type].name, section->alignMask + 1); + section->name, sectionTypeInfo[section->type].name.c_str(), section->alignMask + 1); uint32_t minbank = sectionTypeInfo[section->type].firstBank, maxbank = sectionTypeInfo[section->type].lastBank; diff --git a/src/linkdefs.cpp b/src/linkdefs.cpp index 7f1510f6..24bb99e5 100644 --- a/src/linkdefs.cpp +++ b/src/linkdefs.cpp @@ -3,60 +3,62 @@ #include "linkdefs.hpp" #include "platform.hpp" +using namespace std::literals; + // The default values are the most lax, as they are used as-is by RGBASM; only RGBLINK has the full info, // so RGBASM's job is only to catch unconditional errors earlier. struct SectionTypeInfo sectionTypeInfo[SECTTYPE_INVALID] = { AT(SECTTYPE_WRAM0) { - .name = "WRAM0", + .name = "WRAM0"s, .startAddr = 0xC000, .size = 0x2000, // Patched to 0x1000 if !isWRA0Mode .firstBank = 0, .lastBank = 0, }, AT(SECTTYPE_VRAM) { - .name = "VRAM", + .name = "VRAM"s, .startAddr = 0x8000, .size = 0x2000, .firstBank = 0, .lastBank = 1, // Patched to 0 if isDmgMode }, AT(SECTTYPE_ROMX) { - .name = "ROMX", + .name = "ROMX"s, .startAddr = 0x4000, .size = 0x4000, .firstBank = 1, .lastBank = 65535, }, AT(SECTTYPE_ROM0) { - .name = "ROM0", + .name = "ROM0"s, .startAddr = 0x0000, .size = 0x8000, // Patched to 0x4000 if !is32kMode .firstBank = 0, .lastBank = 0, }, AT(SECTTYPE_HRAM) { - .name = "HRAM", + .name = "HRAM"s, .startAddr = 0xFF80, .size = 0x007F, .firstBank = 0, .lastBank = 0, }, AT(SECTTYPE_WRAMX) { - .name = "WRAMX", + .name = "WRAMX"s, .startAddr = 0xD000, .size = 0x1000, .firstBank = 1, .lastBank = 7, }, AT(SECTTYPE_SRAM) { - .name = "SRAM", + .name = "SRAM"s, .startAddr = 0xA000, .size = 0x2000, .firstBank = 0, .lastBank = 255, }, AT(SECTTYPE_OAM) { - .name = "OAM", + .name = "OAM"s, .startAddr = 0xFE00, .size = 0x00A0, .firstBank = 0, diff --git a/src/asm/util.cpp b/src/util.cpp similarity index 92% rename from src/asm/util.cpp rename to src/util.cpp index 58fea49a..f7a192ac 100644 --- a/src/asm/util.cpp +++ b/src/util.cpp @@ -2,10 +2,9 @@ #include #include +#include -#include "asm/main.hpp" -#include "asm/util.hpp" -#include "asm/warning.hpp" +#include "util.hpp" #include "extern/utf8decoder.hpp" diff --git a/test/link/linkerscript-escapes-test.out b/test/link/linkerscript-escapes-test.out index 832f3d4c..a3aebf50 100644 --- a/test/link/linkerscript-escapes-test.out +++ b/test/link/linkerscript-escapes-test.out @@ -1 +1,2 @@ -error: ./linkerscript-escapes-test.link(4): Illegal character escape +error: ./linkerscript-escapes-test.link(4): Cannot escape character '{' +Linking failed with 1 error diff --git a/test/link/linkerscript-include.asm b/test/link/linkerscript-include.asm new file mode 100644 index 00000000..fa30dd97 --- /dev/null +++ b/test/link/linkerscript-include.asm @@ -0,0 +1,2 @@ +SECTION "test", ROM0[42] +DB 1, 2, 3, 4, 5 diff --git a/test/link/linkerscript-include.inc b/test/link/linkerscript-include.inc new file mode 100644 index 00000000..8b8441b9 --- /dev/null +++ b/test/link/linkerscript-include.inc @@ -0,0 +1 @@ +"test" diff --git a/test/link/linkerscript-include.link b/test/link/linkerscript-include.link new file mode 100644 index 00000000..068137b4 --- /dev/null +++ b/test/link/linkerscript-include.link @@ -0,0 +1,3 @@ +; This has no newline at the end of the file +ROM0 +INCLUDE "linkerscript-include.inc" \ No newline at end of file diff --git a/test/link/linkerscript-include.out b/test/link/linkerscript-include.out new file mode 100644 index 00000000..b61a1fc0 --- /dev/null +++ b/test/link/linkerscript-include.out @@ -0,0 +1,2 @@ +error: linkerscript-include.inc(1): The linker script assigns section "test" to address $0000, but it was already at $002a +Linking failed with 1 error diff --git a/test/link/section-attributes-mismatch.out b/test/link/section-attributes-mismatch.out index e49983cc..5c2f4fe8 100644 --- a/test/link/section-attributes-mismatch.out +++ b/test/link/section-attributes-mismatch.out @@ -1,2 +1,2 @@ -error: Linker script contradicts "sec"'s alignment +error: ./section-attributes-mismatch.link(3): The linker script assigns section "sec" to address $0018, but that would be ALIGN[4, 8] instead of the requested ALIGN[4, 0] Linking failed with 1 error