Port linkerscript parser to Bison (#1266)

Notable side effects:
* Use the standard-conformant MSVC preproc
* Add test for linker script INCLUDE
* Improve wording of placement conflict errors
* Fix errors from not newline-terminated files
* Teach checkdiff about the linker script doc
* Call linker script "commands" "directives" instead

---------

Co-authored-by: Rangi42 <remy.oukaour+rangi42@gmail.com>
This commit is contained in:
Eldred Habert
2023-12-11 02:29:37 +01:00
committed by GitHub
parent ab30690854
commit fd78a9ae83
28 changed files with 573 additions and 656 deletions

View File

@@ -22,14 +22,20 @@ endif()
set(BISON_FLAGS "${BISON_FLAGS} -Dparse.lac=full")
set(BISON_FLAGS "${BISON_FLAGS} -Dlr.type=ielr")
BISON_TARGET(PARSER "asm/parser.y"
BISON_TARGET(ASM_PARSER "asm/parser.y"
"${PROJECT_SOURCE_DIR}/src/asm/parser.cpp"
COMPILE_FLAGS "${BISON_FLAGS}"
DEFINES_FILE "${PROJECT_SOURCE_DIR}/src/asm/parser.hpp"
)
BISON_TARGET(LINKER_SCRIPT_PARSER "link/script.y"
"${PROJECT_SOURCE_DIR}/src/link/script.cpp"
COMPILE_FLAGS "${BISON_FLAGS}"
DEFINES_FILE "${PROJECT_SOURCE_DIR}/src/link/script.hpp"
)
set(rgbasm_src
"${BISON_PARSER_OUTPUT_SOURCE}"
"${BISON_ASM_PARSER_OUTPUT_SOURCE}"
"asm/charmap.cpp"
"asm/fixpoint.cpp"
"asm/format.cpp"
@@ -42,12 +48,12 @@ set(rgbasm_src
"asm/rpn.cpp"
"asm/section.cpp"
"asm/symbol.cpp"
"asm/util.cpp"
"asm/warning.cpp"
"extern/utf8decoder.cpp"
"hashmap.cpp"
"linkdefs.cpp"
"opmath.cpp"
"util.cpp"
)
set(rgbfix_src
@@ -68,12 +74,12 @@ set(rgbgfx_src
)
set(rgblink_src
"${BISON_LINKER_SCRIPT_PARSER_OUTPUT_SOURCE}"
"link/assign.cpp"
"link/main.cpp"
"link/object.cpp"
"link/output.cpp"
"link/patch.cpp"
"link/script.cpp"
"link/sdas_obj.cpp"
"link/section.cpp"
"link/symbol.cpp"
@@ -81,6 +87,7 @@ set(rgblink_src
"hashmap.cpp"
"linkdefs.cpp"
"opmath.cpp"
"util.cpp"
)
foreach(PROG "asm" "fix" "gfx" "link")

View File

@@ -10,10 +10,10 @@
#include "asm/charmap.hpp"
#include "asm/main.hpp"
#include "asm/output.hpp"
#include "asm/util.hpp"
#include "asm/warning.hpp"
#include "hashmap.hpp"
#include "util.hpp"
// Charmaps are stored using a structure known as "trie".
// Essentially a tree, where each nodes stores a single character's worth of info:

View File

@@ -19,6 +19,7 @@
#endif
#include "platform.hpp" // For `ssize_t` and `AT`
#include "util.hpp"
#include "asm/lexer.hpp"
#include "asm/fixpoint.hpp"
@@ -28,7 +29,6 @@
#include "asm/main.hpp"
#include "asm/rpn.hpp"
#include "asm/symbol.hpp"
#include "asm/util.hpp"
#include "asm/warning.hpp"
// Include this last so it gets all type & constant definitions
#include "parser.hpp" // For token definitions, generated from parser.y

View File

@@ -22,7 +22,7 @@
#include "asm/rpn.hpp"
#include "asm/section.hpp"
#include "asm/symbol.hpp"
#include "asm/util.hpp"
#include "util.hpp"
#include "asm/warning.hpp"
#include "extern/utf8decoder.hpp"

View File

@@ -222,7 +222,7 @@ static void mergeSections(struct Section *sect, enum SectionType type, uint32_t
unsigned int nbSectErrors = 0;
if (type != sect->type)
fail("Section already exists but with type %s\n", sectionTypeInfo[sect->type].name);
fail("Section already exists but with type %s\n", sectionTypeInfo[sect->type].name.c_str());
if (sect->modifier != mod) {
fail("Section already declared as %s section\n", sectionModNames[sect->modifier]);
@@ -314,7 +314,7 @@ static struct Section *getSection(char const *name, enum SectionType type, uint3
error("BANK only allowed for ROMX, WRAMX, SRAM, or VRAM sections\n");
else if (bank < sectionTypeInfo[type].firstBank || bank > sectionTypeInfo[type].lastBank)
error("%s bank value $%04" PRIx32 " out of range ($%04" PRIx32 " to $%04"
PRIx32 ")\n", sectionTypeInfo[type].name, bank,
PRIx32 ")\n", sectionTypeInfo[type].name.c_str(), bank,
sectionTypeInfo[type].firstBank, sectionTypeInfo[type].lastBank);
} else if (nbbanks(type) == 1) {
// If the section type only has a single bank, implicitly force it
@@ -349,7 +349,7 @@ static struct Section *getSection(char const *name, enum SectionType type, uint3
alignment = 0; // Ignore it if it's satisfied
} else if (sectionTypeInfo[type].startAddr & mask) {
error("Section \"%s\"'s alignment cannot be attained in %s\n",
name, sectionTypeInfo[type].name);
name, sectionTypeInfo[type].name.c_str());
alignment = 0; // Ignore it if it's unattainable
org = 0;
} else if (alignment == 16) {

View File

@@ -18,7 +18,7 @@
#include "asm/output.hpp"
#include "asm/section.hpp"
#include "asm/symbol.hpp"
#include "asm/util.hpp"
#include "util.hpp"
#include "asm/warning.hpp"
#include "error.hpp"

3
src/link/.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
/script.cpp
/script.hpp
/stack.hh

View File

@@ -298,16 +298,16 @@ static void placeSection(struct Section *section)
// If a section failed to go to several places, nothing we can report
if (!section->isBankFixed || !section->isAddressFixed)
errx("Unable to place \"%s\" (%s section) %s",
section->name, sectionTypeInfo[section->type].name, where);
section->name, sectionTypeInfo[section->type].name.c_str(), where);
// If the section just can't fit the bank, report that
else if (section->org + section->size > endaddr(section->type) + 1)
errx("Unable to place \"%s\" (%s section) %s: section runs past end of region ($%04x > $%04x)",
section->name, sectionTypeInfo[section->type].name, where,
section->name, sectionTypeInfo[section->type].name.c_str(), where,
section->org + section->size, endaddr(section->type) + 1);
// Otherwise there is overlap with another section
else
errx("Unable to place \"%s\" (%s section) %s: section overlaps with \"%s\"",
section->name, sectionTypeInfo[section->type].name, where,
section->name, sectionTypeInfo[section->type].name.c_str(), where,
out_OverlappingSection(section)->name);
}

View File

@@ -29,7 +29,7 @@
#include "version.hpp"
bool isDmgMode; // -d
char *linkerScriptName; // -l
char *linkerScriptName; // -l
char const *mapFileName; // -m
bool noSymInMap; // -M
char const *symFileName; // -n
@@ -45,6 +45,8 @@ bool beVerbose; // -v
bool isWRA0Mode; // -w
bool disablePadding; // -x
FILE *linkerScript;
static uint32_t nbErrors = 0;
// Helper function to dump a file stack to stderr
@@ -460,44 +462,7 @@ int main(int argc, char *argv[])
if (linkerScriptName) {
verbosePrint("Reading linker script...\n");
linkerScript = openFile(linkerScriptName, "r");
// Modify all sections according to the linker script
struct SectionPlacement *placement;
while ((placement = script_NextSection())) {
struct Section *section = placement->section;
assert(section->offset == 0);
// Check if this doesn't conflict with what the code says
if (section->type == SECTTYPE_INVALID) {
for (struct Section *sect = section; sect; sect = sect->nextu)
sect->type = placement->type; // SDCC "unknown" sections
} else if (section->type != placement->type) {
error(NULL, 0, "Linker script contradicts \"%s\"'s type",
section->name);
}
if (section->isBankFixed && placement->bank != section->bank)
error(NULL, 0, "Linker script contradicts \"%s\"'s bank placement",
section->name);
if (section->isAddressFixed && placement->org != section->org)
error(NULL, 0, "Linker script contradicts \"%s\"'s address placement",
section->name);
if (section->isAlignFixed
&& (placement->org & section->alignMask) != 0)
error(NULL, 0, "Linker script contradicts \"%s\"'s alignment",
section->name);
section->isAddressFixed = true;
section->org = placement->org;
section->isBankFixed = true;
section->bank = placement->bank;
section->isAlignFixed = false; // The alignment is satisfied
}
fclose(linkerScript);
script_Cleanup();
script_ProcessScript(linkerScriptName);
// If the linker script produced any errors, some sections may be in an invalid state
if (nbErrors != 0)

View File

@@ -430,7 +430,7 @@ static void writeEmptySpace(uint16_t begin, uint16_t end)
static void writeMapBank(struct SortedSections const *sectList, enum SectionType type,
uint32_t bank)
{
fprintf(mapFile, "\n%s bank #%" PRIu32 ":\n", sectionTypeInfo[type].name,
fprintf(mapFile, "\n%s bank #%" PRIu32 ":\n", sectionTypeInfo[type].name.c_str(),
bank + sectionTypeInfo[type].firstBank);
uint16_t used = 0;
@@ -540,7 +540,7 @@ static void writeMapSummary(void)
}
fprintf(mapFile, "\t%s: %" PRId32 " byte%s used / %" PRId32 " free",
sectionTypeInfo[type].name, usedTotal, usedTotal == 1 ? "" : "s",
sectionTypeInfo[type].name.c_str(), usedTotal, usedTotal == 1 ? "" : "s",
nbBanks * sectionTypeInfo[type].size - usedTotal);
if (sectionTypeInfo[type].firstBank != sectionTypeInfo[type].lastBank
|| nbBanks > 1)

View File

@@ -1,550 +0,0 @@
/* SPDX-License-Identifier: MIT */
#include <ctype.h>
#include <inttypes.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "link/main.hpp"
#include "link/script.hpp"
#include "link/section.hpp"
#include "error.hpp"
#include "itertools.hpp"
#include "linkdefs.hpp"
#include "platform.hpp"
FILE *linkerScript;
char *includeFileName;
static uint32_t lineNo;
struct FileNode {
FILE *file;
uint32_t lineNo;
char *name;
};
static struct FileNode *fileStack;
static uint32_t fileStackSize;
static uint32_t fileStackIndex;
static void pushFile(char *newFileName)
{
if (fileStackIndex == UINT32_MAX)
errx("%s(%" PRIu32 "): INCLUDE recursion limit reached",
linkerScriptName, lineNo);
if (fileStackIndex == fileStackSize) {
if (!fileStackSize) // Init file stack
fileStackSize = 4;
fileStackSize *= 2;
fileStack = (struct FileNode *)realloc(fileStack, sizeof(*fileStack) * fileStackSize);
if (!fileStack)
err("%s(%" PRIu32 "): Internal INCLUDE error",
linkerScriptName, lineNo);
}
fileStack[fileStackIndex].file = linkerScript;
fileStack[fileStackIndex].lineNo = lineNo;
fileStack[fileStackIndex].name = linkerScriptName;
fileStackIndex++;
linkerScript = fopen(newFileName, "r");
if (!linkerScript)
err("%s(%" PRIu32 "): Could not open \"%s\"",
linkerScriptName, lineNo, newFileName);
lineNo = 1;
linkerScriptName = newFileName;
}
static bool popFile(void)
{
if (!fileStackIndex)
return false;
free(linkerScriptName);
fileStackIndex--;
linkerScript = fileStack[fileStackIndex].file;
lineNo = fileStack[fileStackIndex].lineNo;
linkerScriptName = fileStack[fileStackIndex].name;
return true;
}
static bool isWhiteSpace(int c)
{
return c == ' ' || c == '\t';
}
static bool isNewline(int c)
{
return c == '\r' || c == '\n';
}
/*
* Try parsing a number, in base 16 if it begins with a dollar,
* in base 10 otherwise
* @param str The number to parse
* @param number A pointer where the number will be written to
* @return True if parsing was successful, false otherwise
*/
static bool tryParseNumber(char const *str, uint32_t *number)
{
static char const digits[] = {
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'A', 'B', 'C', 'D', 'E', 'F'
};
uint8_t base = 10;
if (*str == '$') {
str++;
base = 16;
}
// An empty string is not a number
if (!*str)
return false;
*number = 0;
do {
char chr = toupper(*str++);
uint8_t digit = 0;
while (digit < base) {
if (chr == digits[digit])
break;
digit++;
}
if (digit == base)
return false;
*number = *number * base + digit;
} while (*str);
return true;
}
enum LinkerScriptTokenType {
TOKEN_NEWLINE,
TOKEN_COMMAND,
TOKEN_BANK,
TOKEN_INCLUDE,
TOKEN_NUMBER,
TOKEN_STRING,
TOKEN_EOF,
TOKEN_INVALID
};
char const *tokenTypes[TOKEN_INVALID] = {
AT(TOKEN_NEWLINE) "newline",
AT(TOKEN_COMMAND) "command",
AT(TOKEN_BANK) "bank command",
AT(TOKEN_INCLUDE) NULL,
AT(TOKEN_NUMBER) "number",
AT(TOKEN_STRING) "string",
AT(TOKEN_EOF) "end of file",
};
enum LinkerScriptCommand {
COMMAND_ORG,
COMMAND_ALIGN,
COMMAND_DS,
COMMAND_INVALID
};
union LinkerScriptTokenAttr {
enum LinkerScriptCommand command;
enum SectionType secttype;
uint32_t number;
char *string;
};
struct LinkerScriptToken {
enum LinkerScriptTokenType type;
union LinkerScriptTokenAttr attr;
};
static char const * const commands[COMMAND_INVALID] = {
AT(COMMAND_ORG) "ORG",
AT(COMMAND_ALIGN) "ALIGN",
AT(COMMAND_DS) "DS"
};
static int nextChar(void)
{
int curchar = getc(linkerScript);
if (curchar == EOF && ferror(linkerScript))
err("%s(%" PRIu32 "): Unexpected error in %s",
linkerScriptName, lineNo, __func__);
return curchar;
}
static struct LinkerScriptToken *nextToken(void)
{
static struct LinkerScriptToken token;
int curchar;
// If the token has a string, make sure to avoid leaking it
if (token.type == TOKEN_STRING)
free(token.attr.string);
// Skip initial whitespace...
do
curchar = nextChar();
while (isWhiteSpace(curchar));
// If this is a comment, skip to the end of the line
if (curchar == ';') {
do {
curchar = nextChar();
} while (!isNewline(curchar) && curchar != EOF);
}
if (curchar == EOF) {
token.type = TOKEN_EOF;
} else if (isNewline(curchar)) {
// If we have a newline char, this is a newline token
token.type = TOKEN_NEWLINE;
if (curchar == '\r') {
// Handle CRLF
curchar = nextChar();
if (curchar != '\n')
ungetc(curchar, linkerScript);
}
} else if (curchar == '"') {
// If we have a string start, this is a string
token.type = TOKEN_STRING;
token.attr.string = NULL; // Force initial alloc
size_t size = 0;
size_t capacity = 16; // Half of the default capacity
do {
curchar = nextChar();
if (curchar == EOF || isNewline(curchar)) {
errx("%s(%" PRIu32 "): Unterminated string",
linkerScriptName, lineNo);
} else if (curchar == '"') {
// Quotes force a string termination
curchar = '\0';
} else if (curchar == '\\') {
// Backslashes are escape sequences
curchar = nextChar();
if (curchar == EOF || isNewline(curchar))
errx("%s(%" PRIu32 "): Unterminated string",
linkerScriptName, lineNo);
else if (curchar == 'n')
curchar = '\n';
else if (curchar == 'r')
curchar = '\r';
else if (curchar == 't')
curchar = '\t';
else if (curchar != '\\' && curchar != '"')
errx("%s(%" PRIu32 "): Illegal character escape",
linkerScriptName, lineNo);
}
if (size >= capacity || token.attr.string == NULL) {
capacity *= 2;
token.attr.string = (char *)realloc(token.attr.string, capacity);
if (!token.attr.string)
err("%s: Failed to allocate memory for string",
__func__);
}
token.attr.string[size++] = curchar;
} while (curchar);
} else {
// This is either a number, command or bank, that is: a word
char *str = NULL;
size_t size = 0;
size_t capacity = 8; // Half of the default capacity
for (;;) {
if (size >= capacity || str == NULL) {
capacity *= 2;
str = (char *)realloc(str, capacity);
if (!str)
err("%s: Failed to allocate memory for token",
__func__);
}
str[size] = toupper(curchar);
size++;
if (!curchar)
break;
curchar = nextChar();
// Whitespace, a newline or a comment end the token
if (isWhiteSpace(curchar) || isNewline(curchar) || curchar == ';') {
ungetc(curchar, linkerScript);
curchar = '\0';
}
}
token.type = TOKEN_INVALID;
// Try to match a command
for (enum LinkerScriptCommand i : EnumSeq(COMMAND_INVALID)) {
if (!strcmp(commands[i], str)) {
token.type = TOKEN_COMMAND;
token.attr.command = i;
break;
}
}
if (token.type == TOKEN_INVALID) {
// Try to match a bank specifier
for (enum SectionType type : EnumSeq(SECTTYPE_INVALID)) {
if (!strcmp(sectionTypeInfo[type].name, str)) {
token.type = TOKEN_BANK;
token.attr.secttype = type;
break;
}
}
}
if (token.type == TOKEN_INVALID) {
// Try to match an include token
if (!strcmp("INCLUDE", str))
token.type = TOKEN_INCLUDE;
}
if (token.type == TOKEN_INVALID) {
// None of the strings matched, do we have a number?
if (tryParseNumber(str, &token.attr.number))
token.type = TOKEN_NUMBER;
else
errx("%s(%" PRIu32 "): Unknown token \"%s\"",
linkerScriptName, lineNo, str);
}
free(str);
}
return &token;
}
static void processCommand(enum LinkerScriptCommand command, uint16_t arg, uint16_t *pc)
{
switch (command) {
case COMMAND_INVALID:
unreachable_();
case COMMAND_ORG:
break;
case COMMAND_ALIGN:
if (arg >= 16) {
arg = 0;
} else {
uint16_t mask = (1 << arg) - 1;
arg = (*pc + mask) & ~mask;
}
break;
case COMMAND_DS:
arg += *pc;
}
if (arg < *pc)
errx("%s(%" PRIu32 "): `%s` cannot be used to go backwards (currently at $%x)",
linkerScriptName, lineNo, commands[command], *pc);
*pc = arg;
}
enum LinkerScriptParserState {
PARSER_FIRSTTIME,
PARSER_LINESTART,
PARSER_INCLUDE, // After an INCLUDE token
PARSER_LINEEND
};
// Part of internal state, but has data that needs to be freed
static uint16_t *curaddr[SECTTYPE_INVALID];
// Put as global to ensure it's initialized only once
static enum LinkerScriptParserState parserState = PARSER_FIRSTTIME;
struct SectionPlacement *script_NextSection(void)
{
static struct SectionPlacement placement;
static uint32_t bank;
static uint32_t bankID;
if (parserState == PARSER_FIRSTTIME) {
lineNo = 1;
// Init PC for all banks
for (enum SectionType i : EnumSeq(SECTTYPE_INVALID)) {
curaddr[i] = (uint16_t *)malloc(sizeof(*curaddr[i]) * nbbanks(i));
for (uint32_t b = 0; b < nbbanks(i); b++)
curaddr[i][b] = sectionTypeInfo[i].startAddr;
}
placement.type = SECTTYPE_INVALID;
parserState = PARSER_LINESTART;
}
for (;;) {
struct LinkerScriptToken *token = nextToken();
enum LinkerScriptTokenType tokType;
union LinkerScriptTokenAttr attr;
bool hasArg;
uint32_t arg;
if (placement.type != SECTTYPE_INVALID) {
if (curaddr[placement.type][bankID] > endaddr(placement.type) + 1)
errx("%s(%" PRIu32 "): Sections would extend past the end of %s ($%04" PRIx16 " > $%04" PRIx16 ")",
linkerScriptName, lineNo, sectionTypeInfo[placement.type].name,
curaddr[placement.type][bankID], endaddr(placement.type));
if (curaddr[placement.type][bankID] < sectionTypeInfo[placement.type].startAddr)
errx("%s(%" PRIu32 "): PC underflowed ($%04" PRIx16 " < $%04" PRIx16 ")",
linkerScriptName, lineNo,
curaddr[placement.type][bankID], sectionTypeInfo[placement.type].startAddr);
}
switch (parserState) {
case PARSER_FIRSTTIME:
unreachable_();
case PARSER_LINESTART:
switch (token->type) {
case TOKEN_INVALID:
unreachable_();
case TOKEN_EOF:
if (!popFile())
return NULL;
parserState = PARSER_LINEEND;
break;
case TOKEN_NUMBER:
errx("%s(%" PRIu32 "): stray number \"%" PRIu32 "\"",
linkerScriptName, lineNo,
token->attr.number);
case TOKEN_NEWLINE:
lineNo++;
break;
// A stray string is a section name
case TOKEN_STRING:
parserState = PARSER_LINEEND;
if (placement.type == SECTTYPE_INVALID)
errx("%s(%" PRIu32 "): Didn't specify a location before the section",
linkerScriptName, lineNo);
placement.section =
sect_GetSection(token->attr.string);
if (!placement.section)
errx("%s(%" PRIu32 "): Unknown section \"%s\"",
linkerScriptName, lineNo,
token->attr.string);
placement.org = curaddr[placement.type][bankID];
placement.bank = bank;
curaddr[placement.type][bankID] += placement.section->size;
return &placement;
case TOKEN_COMMAND:
case TOKEN_BANK:
tokType = token->type;
attr = token->attr;
token = nextToken();
hasArg = token->type == TOKEN_NUMBER;
// Leaving `arg` uninitialized when `!hasArg` causes GCC to warn
// about its use as an argument to `processCommand`. This cannot
// happen because `hasArg` has to be true, but silence the warning
// anyways. I dislike doing this because it could swallow actual
// errors, but I don't have a choice.
arg = hasArg ? token->attr.number : 0;
if (tokType == TOKEN_COMMAND) {
if (placement.type == SECTTYPE_INVALID)
errx("%s(%" PRIu32 "): Didn't specify a location before the command",
linkerScriptName, lineNo);
if (!hasArg)
errx("%s(%" PRIu32 "): Command specified without an argument",
linkerScriptName, lineNo);
processCommand(attr.command, arg, &curaddr[placement.type][bankID]);
} else { // TOKEN_BANK
placement.type = attr.secttype;
// If there's only one bank,
// specifying the number is optional.
if (!hasArg && nbbanks(placement.type) != 1)
errx("%s(%" PRIu32 "): Didn't specify a bank number",
linkerScriptName, lineNo);
else if (!hasArg)
arg = sectionTypeInfo[placement.type].firstBank;
else if (arg < sectionTypeInfo[placement.type].firstBank)
errx("%s(%" PRIu32 "): specified bank number is too low (%" PRIu32 " < %" PRIu32 ")",
linkerScriptName, lineNo,
arg, sectionTypeInfo[placement.type].firstBank);
else if (arg > sectionTypeInfo[placement.type].lastBank)
errx("%s(%" PRIu32 "): specified bank number is too high (%" PRIu32 " > %" PRIu32 ")",
linkerScriptName, lineNo,
arg, sectionTypeInfo[placement.type].lastBank);
bank = arg;
bankID = arg - sectionTypeInfo[placement.type].firstBank;
}
// If we read a token we shouldn't have...
if (token->type != TOKEN_NUMBER)
goto lineend;
break;
case TOKEN_INCLUDE:
parserState = PARSER_INCLUDE;
break;
}
break;
case PARSER_INCLUDE:
if (token->type != TOKEN_STRING)
errx("%s(%" PRIu32 "): Expected a file name after INCLUDE",
linkerScriptName, lineNo);
// Switch to that file
pushFile(token->attr.string);
// The file stack took ownership of the string
token->attr.string = NULL;
parserState = PARSER_LINESTART;
break;
case PARSER_LINEEND:
lineend:
lineNo++;
parserState = PARSER_LINESTART;
if (token->type == TOKEN_EOF) {
if (!popFile())
return NULL;
parserState = PARSER_LINEEND;
} else if (token->type != TOKEN_NEWLINE)
errx("%s(%" PRIu32 "): Unexpected %s at the end of the line",
linkerScriptName, lineNo,
tokenTypes[token->type]);
break;
}
}
}
void script_Cleanup(void)
{
for (enum SectionType type : EnumSeq(SECTTYPE_INVALID))
free(curaddr[type]);
}

490
src/link/script.y Normal file
View File

@@ -0,0 +1,490 @@
%language "c++"
%define api.value.type variant
%define api.token.constructor
%code requires {
#include <stdint.h>
#include <string>
#include "linkdefs.hpp"
}
%code {
#include <algorithm>
#include <array>
#include <assert.h>
#include <bit>
#include <cinttypes>
#include <fstream>
#include <locale>
#include <stdio.h>
#include <string_view>
#include <vector>
#include "helpers.hpp"
#include "itertools.hpp"
#include "util.hpp"
#include "link/main.hpp"
#include "link/section.hpp"
using namespace std::literals;
static void includeFile(std::string &&path);
static void incLineNo(void);
static void setSectionType(SectionType type);
static void setSectionType(SectionType type, uint32_t bank);
static void setAddr(uint32_t addr);
static void alignTo(uint32_t alignment, uint32_t offset);
static void pad(uint32_t length);
static void placeSection(std::string const &name);
static yy::parser::symbol_type yylex(void);
struct Keyword {
std::string_view name;
yy::parser::symbol_type (* tokenGen)(void);
};
}
%token YYEOF 0 "end of file"
%token newline
%token ORG "ORG"
INCLUDE "INCLUDE"
ALIGN "ALIGN"
DS "DS"
%code {
static std::array keywords{
Keyword{"ORG"sv, yy::parser::make_ORG},
Keyword{"INCLUDE"sv, yy::parser::make_INCLUDE},
Keyword{"ALIGN"sv, yy::parser::make_ALIGN},
Keyword{"DS"sv, yy::parser::make_DS},
};
}
%token <std::string> string;
%token <uint32_t> number;
%token <SectionType> section_type;
%%
lines: %empty
| line lines
;
line: INCLUDE string newline { includeFile(std::move($2)); } // Note: this additionally increments the line number!
| directive newline { incLineNo(); }
| newline { incLineNo(); }
| error newline { yyerrok; incLineNo(); } // Error recovery.
;
directive: section_type { setSectionType($1); }
| section_type number { setSectionType($1, $2); }
| ORG number { setAddr($2); }
| ALIGN number { alignTo($2, 0); }
| DS number { pad($2); }
| string { placeSection($1); }
;
%%
#define scriptError(context, fmt, ...) \
::error(NULL, 0, "%s(%" PRIu32 "): " fmt, \
context.path.c_str(), context.lineNo __VA_OPT__(,) __VA_ARGS__)
// Lexer.
struct LexerStackEntry {
std::filebuf file;
std::string path;
uint32_t lineNo;
explicit LexerStackEntry(std::string &&path_) : file(), path(path_), lineNo(1) {}
};
static std::vector<LexerStackEntry> lexerStack;
static bool atEof;
void yy::parser::error(std::string const &msg) {
auto const &script = lexerStack.back();
scriptError(script, "%s", msg.c_str());
}
static void includeFile(std::string &&path) {
// `emplace_back` can invalidate references to the stack's elements!
// This is why `newContext` must be gotten before `prevContext`.
auto &newContext = lexerStack.emplace_back(std::move(path));
auto &prevContext = lexerStack[lexerStack.size() - 2];
if (!newContext.file.open(newContext.path, std::ios_base::in)) {
// The order is important: report the error, increment the line number, modify the stack!
scriptError(prevContext, "Could not open included linker script \"%s\"",
newContext.path.c_str());
++prevContext.lineNo;
lexerStack.pop_back();
} else {
// The lexer will use the new entry to lex the next token.
++prevContext.lineNo;
}
}
static void incLineNo(void) {
++lexerStack.back().lineNo;
}
static bool isWhiteSpace(int c) {
return c == ' ' || c == '\t';
}
static bool isNewline(int c) {
return c == '\r' || c == '\n';
}
static bool isIdentChar(int c) {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9');
}
static bool isDecDigit(int c) {
return c >= '0' && c <= '9';
}
static bool isBinDigit(int c) {
return c >= '0' && c <= '1';
}
static bool isHexDigit(int c) {
return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
}
static uint8_t parseHexDigit(int c) {
if (c >= '0' && c <= '9') {
return c - '0';
} else if (c >= 'A' && c <= 'F') {
return c - 'A' + 10;
} else if (c >= 'a' && c <= 'f') {
return c - 'a' + 10;
} else {
unreachable_();
}
}
yy::parser::symbol_type yylex(void) {
try_again: // Can't use a `do {} while(0)` loop, otherwise compilers (wrongly) think it can end.
auto &context = lexerStack.back();
auto c = context.file.sbumpc();
// First, skip leading whitespace.
while (isWhiteSpace(c)) {
c = context.file.sbumpc();
}
// Then, skip a comment if applicable.
if (c == ';') {
while (!isNewline(c)) {
c = context.file.sbumpc();
}
}
// Alright, what token should we return?
if (c == EOF) {
// Basically yywrap().
if (lexerStack.size() != 1) {
lexerStack.pop_back();
goto try_again;
} else if (!atEof) {
// Inject a newline at EOF, to avoid errors for files that don't end with one.
atEof = true;
return yy::parser::make_newline();
} else {
return yy::parser::make_YYEOF();
}
} else if (isNewline(c)) {
// Handle CRLF.
if (c == '\r' && context.file.sgetc() == '\n') {
context.file.sbumpc();
}
return yy::parser::make_newline();
} else if (c == '"') {
std::string str;
for (c = context.file.sgetc(); c != '"'; c = context.file.sgetc()) {
if (c == EOF || isNewline(c)) {
scriptError(context, "Unterminated string");
break;
}
context.file.sbumpc();
if (c == '\\') {
c = context.file.sgetc();
if (c == EOF || isNewline(c)) {
scriptError(context, "Unterminated string");
break;
} else if (c == 'n') {
c = '\n';
} else if (c == 'r') {
c = '\r';
} else if (c == 't') {
c = '\t';
} else if (c != '\\' && c != '"') {
scriptError(context, "Cannot escape character %s", printChar(c));
}
context.file.sbumpc();
}
str.push_back(c);
}
context.file.sbumpc(); // Consume the closing quote.
return yy::parser::make_string(std::move(str));
} else if (c == '$') {
c = context.file.sgetc();
if (!isHexDigit(c)) {
scriptError(context, "No hexadecimal digits found after '$'");
return yy::parser::make_number(0);
}
uint32_t number = parseHexDigit(c);
context.file.sbumpc();
for (c = context.file.sgetc(); isHexDigit(c); c = context.file.sgetc()) {
number = number * 16 + parseHexDigit(c);
context.file.sbumpc();
}
return yy::parser::make_number(number);
} else if (c == '%') {
c = context.file.sgetc();
if (!isBinDigit(c)) {
scriptError(context, "No binary digits found after '%%'");
return yy::parser::make_number(0);
}
uint32_t number = c - '0';
context.file.sbumpc();
for (c = context.file.sgetc(); isBinDigit(c); c = context.file.sgetc()) {
number = number * 2 + (c - '0');
context.file.sbumpc();
}
return yy::parser::make_number(number);
} else if (isDecDigit(c)) {
uint32_t number = c - '0';
for (c = context.file.sgetc(); isDecDigit(c); c = context.file.sgetc()) {
number = number * 10 + (c - '0');
}
return yy::parser::make_number(number);
} else if (isIdentChar(c)) { // Note that we match these *after* digit characters!
std::string ident;
auto strUpperCmp = [](char cmp, char ref) {
// `locale::classic()` yields the "C" locale.
assert(!std::use_facet<std::ctype<char>>(std::locale::classic())
.is(std::ctype_base::lower, ref));
return std::use_facet<std::ctype<char>>(std::locale::classic())
.toupper(cmp) == ref;
};
ident.push_back(c);
for (c = context.file.sgetc(); isIdentChar(c); c = context.file.snextc()) {
ident.push_back(c);
}
for (SectionType type : EnumSeq(SECTTYPE_INVALID)) {
if (std::equal(ident.begin(), ident.end(),
sectionTypeInfo[type].name.begin(), sectionTypeInfo[type].name.end(),
strUpperCmp)) {
return yy::parser::make_section_type(type);
}
}
for (Keyword const &keyword : keywords) {
if (std::equal(ident.begin(), ident.end(),
keyword.name.begin(), keyword.name.end(),
strUpperCmp)) {
return keyword.tokenGen();
}
}
scriptError(context, "Unknown keyword \"%s\"", ident.c_str());
goto try_again; // Try lexing another token.
} else {
scriptError(context, "Unexpected character '%s'", printChar(c));
// Keep reading characters until the EOL, to avoid reporting too many errors.
for (c = context.file.sgetc(); !isNewline(c); c = context.file.sgetc()) {
if (c == EOF) {
break;
}
}
goto try_again;
}
// Not marking as unreachable; this will generate a warning if any codepath forgets to return.
}
// Semantic actions.
static std::array<std::vector<uint16_t>, SECTTYPE_INVALID> curAddr;
static SectionType activeType; // Index into curAddr
static uint32_t activeBankIdx; // Index into curAddr[activeType]
static void setActiveTypeAndIdx(SectionType type, uint32_t idx) {
activeType = type;
activeBankIdx = idx;
if (curAddr[activeType].size() <= activeBankIdx) {
curAddr[activeType].resize(activeBankIdx + 1, sectionTypeInfo[type].startAddr);
}
}
static void setSectionType(SectionType type) {
auto const &context = lexerStack.back();
if (nbbanks(type) != 1) {
scriptError(context, "A bank number must be specified for %s",
sectionTypeInfo[type].name.c_str());
// Keep going with a default value for the bank index.
}
setActiveTypeAndIdx(type, 0); // There is only a single bank anyway, so just set the index to 0.
}
static void setSectionType(SectionType type, uint32_t bank) {
auto const &context = lexerStack.back();
auto const &typeInfo = sectionTypeInfo[type];
if (bank < typeInfo.firstBank) {
scriptError(context, "%s bank %" PRIu32 " doesn't exist, the minimum is %" PRIu32,
typeInfo.name.c_str(), bank, typeInfo.firstBank);
bank = typeInfo.firstBank;
} else if (bank > typeInfo.lastBank) {
scriptError(context, "%s bank %" PRIu32 " doesn't exist, the maximum is %" PRIu32,
typeInfo.name.c_str(), bank, typeInfo.lastBank);
}
setActiveTypeAndIdx(type, bank - typeInfo.firstBank);
}
static void setAddr(uint32_t addr) {
auto const &context = lexerStack.back();
auto &pc = curAddr[activeType][activeBankIdx];
auto const &typeInfo = sectionTypeInfo[activeType];
if (addr < pc) {
scriptError(context, "ORG cannot be used to go backwards (from $%04x to $%04x)", pc, addr);
} else if (addr > endaddr(activeType)) { // Allow "one past the end" sections.
scriptError(context, "Cannot go to $%04" PRIx32 ": %s ends at $%04" PRIx16 "",
addr, typeInfo.name.c_str(), endaddr(activeType));
pc = endaddr(activeType);
} else {
pc = addr;
}
}
static void alignTo(uint32_t alignment, uint32_t alignOfs) {
auto const &context = lexerStack.back();
auto const &typeInfo = sectionTypeInfo[activeType];
auto &pc = curAddr[activeType][activeBankIdx];
// TODO: maybe warn if truncating?
alignOfs %= 1 << alignment;
assert(pc >= typeInfo.startAddr);
uint16_t length = alignment < 16 ? (uint16_t)(alignOfs - pc) % (1u << alignment)
: alignOfs - pc; // Let it wrap around, this'll trip the check.
if (uint16_t offset = pc - typeInfo.startAddr; length > typeInfo.size - offset) {
scriptError(context, "Cannot align: the next suitable address after $%04" PRIx16 " is $%04" PRIx16 ", past $%04" PRIx16,
pc, (uint16_t)(pc + length), (uint16_t)(endaddr(activeType) + 1));
} else {
pc += length;
}
}
static void pad(uint32_t length) {
auto const &context = lexerStack.back();
auto const &typeInfo = sectionTypeInfo[activeType];
auto &pc = curAddr[activeType][activeBankIdx];
assert(pc >= typeInfo.startAddr);
if (uint16_t offset = pc - typeInfo.startAddr; length + offset > typeInfo.size) {
scriptError(context, "Cannot pad by %u bytes: only %u bytes to $%04" PRIx16,
length, typeInfo.size - offset, (uint16_t)(endaddr(activeType) + 1));
} else {
pc += length;
}
}
static void placeSection(std::string const &name) {
auto const &context = lexerStack.back();
auto const &typeInfo = sectionTypeInfo[activeType];
// A type *must* be active.
if (activeType == SECTTYPE_INVALID) {
scriptError(context, "No memory region has been specified to place section \"%s\" in",
name.c_str());
return;
}
auto *section = sect_GetSection(name.c_str());
if (!section) {
scriptError(context, "Unknown section \"%s\"", name.c_str());
return;
}
assert(section->offset == 0);
// Check that the linker script doesn't contradict what the code says.
if (section->type == SECTTYPE_INVALID) {
// SDCC areas don't have a type assigned yet, so the linker script is used to give them one.
for (Section *fragment = section; fragment; fragment = fragment->nextu) {
fragment->type = activeType;
}
} else if (section->type != activeType) {
scriptError(context, "\"%s\" is specified to be a %s section, but it is already a %s section",
name.c_str(), typeInfo.name.c_str(), sectionTypeInfo[section->type].name.c_str());
}
uint32_t bank = activeBankIdx + typeInfo.firstBank;
if (section->isBankFixed && bank != section->bank) {
scriptError(context, "The linker script places section \"%s\" in %s bank %" PRIu32 ", but it was already defined in bank %" PRIu32,
name.c_str(), sectionTypeInfo[section->type].name.c_str(), bank, section->bank);
}
section->isBankFixed = true;
section->bank = bank;
uint16_t &org = curAddr[activeType][activeBankIdx];
if (section->isAddressFixed && org != section->org) {
scriptError(context, "The linker script assigns section \"%s\" to address $%04" PRIx16 ", but it was already at $%04" PRIx16,
name.c_str(), org, section->org);
} else if (section->isAlignFixed && (org & section->alignMask) != section->alignOfs) {
uint8_t alignment = std::countr_one(section->alignMask);
scriptError(context, "The linker script assigns section \"%s\" to address $%04" PRIx16 ", but that would be ALIGN[%" PRIu8 ", %" PRIu16 "] instead of the requested ALIGN[%" PRIu8 ", %" PRIu16 "]",
name.c_str(), org, alignment, (uint16_t)(org & section->alignMask), alignment, section->alignOfs);
}
section->isAddressFixed = true;
section->isAlignFixed = false; // This can't be set when the above is.
section->org = org;
uint16_t curOfs = org - typeInfo.startAddr;
if (section->size > typeInfo.size - curOfs) {
scriptError(context, "The linker script assigns section \"%s\" to address $%04" PRIx16 ", but then it would overflow %s by %" PRIx16 " bytes",
name.c_str(), org, typeInfo.name.c_str(),
(uint16_t)(section->size - (typeInfo.size - curOfs)));
// Fill as much as possible without going out of bounds.
org = typeInfo.startAddr + typeInfo.size;
} else {
org += section->size;
}
}
// External API.
void script_ProcessScript(char const *path) {
activeType = SECTTYPE_INVALID;
lexerStack.clear();
atEof = false;
auto &newContext = lexerStack.emplace_back(std::string(path));
if (!newContext.file.open(newContext.path, std::ios_base::in)) {
error(NULL, 0, "Could not open linker script \"%s\"", newContext.path.c_str());
lexerStack.clear();
} else {
yy::parser linkerScriptParser;
// We don't care about the return value, as any error increments the global error count,
// which is what `main` checks.
(void)linkerScriptParser.parse();
// Free up working memory.
for (auto &region : curAddr) {
region.clear();
}
}
}

View File

@@ -128,7 +128,8 @@ static void mergeSections(struct Section *target, struct Section *other, enum Se
if (target->type != other->type)
errx("Section \"%s\" is defined with conflicting types %s and %s",
other->name, sectionTypeInfo[target->type].name, sectionTypeInfo[other->type].name);
other->name, sectionTypeInfo[target->type].name.c_str(),
sectionTypeInfo[other->type].name.c_str());
if (other->isBankFixed) {
if (!target->isBankFixed) {
@@ -202,7 +203,7 @@ void sect_AddSection(struct Section *section)
mergeSections(other, section, section->modifier);
} else if (section->modifier == SECTION_UNION && sect_HasData(section->type)) {
errx("Section \"%s\" is of type %s, which cannot be unionized",
section->name, sectionTypeInfo[section->type].name);
section->name, sectionTypeInfo[section->type].name.c_str());
} else {
// If not, add it
hash_AddElement(sections, section->name, section);
@@ -254,7 +255,7 @@ static void doSanityChecks(struct Section *section, void *)
// Too large an alignment may not be satisfiable
if (section->isAlignFixed && (section->alignMask & sectionTypeInfo[section->type].startAddr))
error(NULL, 0, "%s: %s sections cannot be aligned to $%04x bytes",
section->name, sectionTypeInfo[section->type].name, section->alignMask + 1);
section->name, sectionTypeInfo[section->type].name.c_str(), section->alignMask + 1);
uint32_t minbank = sectionTypeInfo[section->type].firstBank, maxbank = sectionTypeInfo[section->type].lastBank;

View File

@@ -3,60 +3,62 @@
#include "linkdefs.hpp"
#include "platform.hpp"
using namespace std::literals;
// The default values are the most lax, as they are used as-is by RGBASM; only RGBLINK has the full info,
// so RGBASM's job is only to catch unconditional errors earlier.
struct SectionTypeInfo sectionTypeInfo[SECTTYPE_INVALID] = {
AT(SECTTYPE_WRAM0) {
.name = "WRAM0",
.name = "WRAM0"s,
.startAddr = 0xC000,
.size = 0x2000, // Patched to 0x1000 if !isWRA0Mode
.firstBank = 0,
.lastBank = 0,
},
AT(SECTTYPE_VRAM) {
.name = "VRAM",
.name = "VRAM"s,
.startAddr = 0x8000,
.size = 0x2000,
.firstBank = 0,
.lastBank = 1, // Patched to 0 if isDmgMode
},
AT(SECTTYPE_ROMX) {
.name = "ROMX",
.name = "ROMX"s,
.startAddr = 0x4000,
.size = 0x4000,
.firstBank = 1,
.lastBank = 65535,
},
AT(SECTTYPE_ROM0) {
.name = "ROM0",
.name = "ROM0"s,
.startAddr = 0x0000,
.size = 0x8000, // Patched to 0x4000 if !is32kMode
.firstBank = 0,
.lastBank = 0,
},
AT(SECTTYPE_HRAM) {
.name = "HRAM",
.name = "HRAM"s,
.startAddr = 0xFF80,
.size = 0x007F,
.firstBank = 0,
.lastBank = 0,
},
AT(SECTTYPE_WRAMX) {
.name = "WRAMX",
.name = "WRAMX"s,
.startAddr = 0xD000,
.size = 0x1000,
.firstBank = 1,
.lastBank = 7,
},
AT(SECTTYPE_SRAM) {
.name = "SRAM",
.name = "SRAM"s,
.startAddr = 0xA000,
.size = 0x2000,
.firstBank = 0,
.lastBank = 255,
},
AT(SECTTYPE_OAM) {
.name = "OAM",
.name = "OAM"s,
.startAddr = 0xFE00,
.size = 0x00A0,
.firstBank = 0,

View File

@@ -2,10 +2,9 @@
#include <ctype.h>
#include <stdint.h>
#include <stdio.h>
#include "asm/main.hpp"
#include "asm/util.hpp"
#include "asm/warning.hpp"
#include "util.hpp"
#include "extern/utf8decoder.hpp"