Split up the linkerscript lexer and layout actions

This commit is contained in:
Rangi42
2025-07-27 13:03:28 -04:00
parent a353637a90
commit 16e16cdf51
15 changed files with 448 additions and 433 deletions

View File

@@ -57,6 +57,7 @@ set(rgbasm_src
set(rgblink_src
"${BISON_LINKER_SCRIPT_PARSER_OUTPUT_SOURCE}"
"link/assign.cpp"
"link/lexer.cpp"
"link/layout.cpp"
"link/main.cpp"
"link/object.cpp"

View File

@@ -74,6 +74,7 @@ static void printDiag(
}
vfprintf(stderr, fmt, args);
putc('\n', stderr);
lexer_DumpStringExpansions();
}

View File

@@ -2,354 +2,17 @@
#include "link/layout.hpp"
#include <algorithm>
#include <array>
#include <bit>
#include <ctype.h>
#include <inttypes.h>
#include <stdio.h>
#include <string_view>
#include <vector>
#include "helpers.hpp"
#include "itertools.hpp"
#include "util.hpp"
#include "link/lexer.hpp" // lexer_Error
#include "link/section.hpp"
#include "link/warning.hpp"
// Include this last so it gets all type & constant definitions
#include "script.hpp" // For token definitions, generated from script.y
/******************** Lexer ********************/
static std::vector<LexerStackEntry> lexerStack;
static bool atEof = false;
LexerStackEntry &lexer_Context() {
return lexerStack.back();
}
void lexer_IncludeFile(std::string &&path) {
// `emplace_back` can invalidate references to the stack's elements!
// This is why `newContext` must be gotten before `prevContext`.
LexerStackEntry &newContext = lexerStack.emplace_back(std::move(path));
LexerStackEntry &prevContext = lexerStack[lexerStack.size() - 2];
if (!newContext.file.open(newContext.path, std::ios_base::in)) {
// The order is important: report the error, increment the line number, modify the stack!
scriptError(
prevContext, "Failed to open included linker script \"%s\"", newContext.path.c_str()
);
++prevContext.lineNo;
lexerStack.pop_back();
} else {
// The lexer will use the new entry to lex the next token.
++prevContext.lineNo;
}
}
void lexer_IncLineNo() {
++lexerStack.back().lineNo;
}
static bool isWhiteSpace(int c) {
return c == ' ' || c == '\t';
}
static bool isNewline(int c) {
return c == '\r' || c == '\n';
}
yy::parser::symbol_type yylex(); // Forward declaration for `yywrap`
static yy::parser::symbol_type yywrap() {
if (lexerStack.size() != 1) {
if (!atEof) {
// Inject a newline at EOF to simplify parsing.
atEof = true;
return yy::parser::make_newline();
}
lexerStack.pop_back();
return yylex();
}
if (!atEof) {
// Inject a newline at EOF to simplify parsing.
atEof = true;
return yy::parser::make_newline();
}
return yy::parser::make_YYEOF();
}
static bool isIdentChar(int c) {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9');
}
static std::string readIdent(int c) {
LexerStackEntry &context = lexerStack.back();
std::string ident;
ident.push_back(c);
for (c = context.file.sgetc(); isIdentChar(c); c = context.file.snextc()) {
ident.push_back(c);
}
return ident;
}
static bool isDecDigit(int c) {
return c >= '0' && c <= '9';
}
static yy::parser::symbol_type parseDecNumber(int c) {
LexerStackEntry &context = lexerStack.back();
uint32_t number = c - '0';
for (c = context.file.sgetc(); isDecDigit(c) || c == '_'; c = context.file.sgetc()) {
if (c != '_') {
number = number * 10 + (c - '0');
}
context.file.sbumpc();
}
return yy::parser::make_number(number);
}
static bool isBinDigit(int c) {
return c >= '0' && c <= '1';
}
static yy::parser::symbol_type parseBinNumber(char const *prefix) {
LexerStackEntry &context = lexerStack.back();
int c = context.file.sgetc();
if (!isBinDigit(c)) {
scriptError(context, "No binary digits found after '%s'", prefix);
return yy::parser::make_number(0);
}
uint32_t number = c - '0';
context.file.sbumpc();
for (c = context.file.sgetc(); isBinDigit(c) || c == '_'; c = context.file.sgetc()) {
if (c != '_') {
number = number * 2 + (c - '0');
}
context.file.sbumpc();
}
return yy::parser::make_number(number);
}
static bool isOctDigit(int c) {
return c >= '0' && c <= '7';
}
static yy::parser::symbol_type parseOctNumber(char const *prefix) {
LexerStackEntry &context = lexerStack.back();
int c = context.file.sgetc();
if (!isOctDigit(c)) {
scriptError(context, "No octal digits found after '%s'", prefix);
return yy::parser::make_number(0);
}
uint32_t number = c - '0';
context.file.sbumpc();
for (c = context.file.sgetc(); isOctDigit(c) || c == '_'; c = context.file.sgetc()) {
if (c != '_') {
number = number * 8 + (c - '0');
}
context.file.sbumpc();
}
return yy::parser::make_number(number);
}
static bool isHexDigit(int c) {
return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
}
static uint8_t parseHexDigit(int c) {
if (c >= '0' && c <= '9') {
return c - '0';
} else if (c >= 'A' && c <= 'F') {
return c - 'A' + 10;
} else if (c >= 'a' && c <= 'f') {
return c - 'a' + 10;
} else {
unreachable_(); // LCOV_EXCL_LINE
}
}
static yy::parser::symbol_type parseHexNumber(char const *prefix) {
LexerStackEntry &context = lexerStack.back();
int c = context.file.sgetc();
if (!isHexDigit(c)) {
scriptError(context, "No hexadecimal digits found after '%s'", prefix);
return yy::parser::make_number(0);
}
uint32_t number = parseHexDigit(c);
context.file.sbumpc();
for (c = context.file.sgetc(); isHexDigit(c) || c == '_'; c = context.file.sgetc()) {
if (c != '_') {
number = number * 16 + parseHexDigit(c);
}
context.file.sbumpc();
}
return yy::parser::make_number(number);
}
static yy::parser::symbol_type parseNumber(int c) {
LexerStackEntry &context = lexerStack.back();
if (c == '0') {
switch (context.file.sgetc()) {
case 'x':
context.file.sbumpc();
return parseHexNumber("0x");
case 'X':
context.file.sbumpc();
return parseHexNumber("0X");
case 'o':
context.file.sbumpc();
return parseOctNumber("0o");
case 'O':
context.file.sbumpc();
return parseOctNumber("0O");
case 'b':
context.file.sbumpc();
return parseBinNumber("0b");
case 'B':
context.file.sbumpc();
return parseBinNumber("0B");
}
}
return parseDecNumber(c);
}
static yy::parser::symbol_type parseString() {
LexerStackEntry &context = lexerStack.back();
int c = context.file.sgetc();
std::string str;
for (; c != '"'; c = context.file.sgetc()) {
if (c == EOF || isNewline(c)) {
scriptError(context, "Unterminated string");
break;
}
context.file.sbumpc();
if (c == '\\') {
c = context.file.sgetc();
if (c == EOF || isNewline(c)) {
scriptError(context, "Unterminated string");
break;
} else if (c == 'n') {
c = '\n';
} else if (c == 'r') {
c = '\r';
} else if (c == 't') {
c = '\t';
} else if (c == '0') {
c = '\0';
} else if (c != '\\' && c != '"' && c != '\'') {
scriptError(context, "Cannot escape character %s", printChar(c));
}
context.file.sbumpc();
}
str.push_back(c);
}
if (c == '"') {
context.file.sbumpc();
}
return yy::parser::make_string(std::move(str));
}
struct Keyword {
std::string_view name;
yy::parser::symbol_type (*tokenGen)();
};
using namespace std::literals;
static std::array keywords{
Keyword{"ORG"sv, yy::parser::make_ORG },
Keyword{"FLOATING"sv, yy::parser::make_FLOATING},
Keyword{"INCLUDE"sv, yy::parser::make_INCLUDE },
Keyword{"ALIGN"sv, yy::parser::make_ALIGN },
Keyword{"DS"sv, yy::parser::make_DS },
Keyword{"OPTIONAL"sv, yy::parser::make_OPTIONAL},
};
yy::parser::symbol_type yylex() {
LexerStackEntry &context = lexerStack.back();
int c = context.file.sbumpc();
// First, skip leading whitespace.
while (isWhiteSpace(c)) {
c = context.file.sbumpc();
}
// Then, skip a comment if applicable.
if (c == ';') {
while (c != EOF && !isNewline(c)) {
c = context.file.sbumpc();
}
}
// Alright, what token should we return?
if (c == EOF) {
return yywrap();
} else if (c == ',') {
return yy::parser::make_COMMA();
} else if (isNewline(c)) {
// Handle CRLF.
if (c == '\r' && context.file.sgetc() == '\n') {
context.file.sbumpc();
}
return yy::parser::make_newline();
} else if (c == '"') {
return parseString();
} else if (c == '$') {
return parseHexNumber("$");
} else if (c == '%') {
return parseBinNumber("%");
} else if (c == '&') {
return parseOctNumber("&");
} else if (isDecDigit(c)) {
return parseNumber(c);
} else if (isIdentChar(c)) { // Note that we match these *after* digit characters!
std::string ident = readIdent(c);
auto strUpperCmp = [](char cmp, char ref) { return toupper(cmp) == ref; };
for (SectionType type : EnumSeq(SECTTYPE_INVALID)) {
if (std::equal(RANGE(ident), RANGE(sectionTypeInfo[type].name), strUpperCmp)) {
return yy::parser::make_sect_type(type);
}
}
for (Keyword const &keyword : keywords) {
if (std::equal(RANGE(ident), RANGE(keyword.name), strUpperCmp)) {
return keyword.tokenGen();
}
}
scriptError(context, "Unknown keyword \"%s\"", ident.c_str());
return yylex();
} else {
scriptError(context, "Unexpected character %s", printChar(c));
// Keep reading characters until the EOL, to avoid reporting too many errors.
for (c = context.file.sgetc(); !isNewline(c); c = context.file.sgetc()) {
if (c == EOF) {
break;
}
context.file.sbumpc();
}
return yylex();
}
// Not marking as unreachable; this will generate a warning if any codepath forgets to return.
}
bool lexer_Init(char const *linkerScriptName) {
if (LexerStackEntry &newContext = lexerStack.emplace_back(std::string(linkerScriptName));
!newContext.file.open(newContext.path, std::ios_base::in)) {
error("Failed to open linker script \"%s\"", linkerScriptName);
lexerStack.clear();
return false;
}
return true;
}
/******************** Semantic actions ********************/
static std::array<std::vector<uint16_t>, SECTTYPE_INVALID> curAddr;
static SectionType activeType = SECTTYPE_INVALID; // Index into curAddr
@@ -383,12 +46,8 @@ void layout_SetFloatingSectionType(SectionType type) {
}
void layout_SetSectionType(SectionType type) {
LexerStackEntry const &context = lexerStack.back();
if (nbbanks(type) != 1) {
scriptError(
context, "A bank number must be specified for %s", sectionTypeInfo[type].name.c_str()
);
lexer_Error("A bank number must be specified for %s", sectionTypeInfo[type].name.c_str());
// Keep going with a default value for the bank index.
}
@@ -396,12 +55,10 @@ void layout_SetSectionType(SectionType type) {
}
void layout_SetSectionType(SectionType type, uint32_t bank) {
LexerStackEntry const &context = lexerStack.back();
SectionTypeInfo const &typeInfo = sectionTypeInfo[type];
if (bank < typeInfo.firstBank) {
scriptError(
context,
lexer_Error(
"%s bank %" PRIu32 " doesn't exist (the minimum is %" PRIu32 ")",
typeInfo.name.c_str(),
bank,
@@ -409,8 +66,7 @@ void layout_SetSectionType(SectionType type, uint32_t bank) {
);
bank = typeInfo.firstBank;
} else if (bank > typeInfo.lastBank) {
scriptError(
context,
lexer_Error(
"%s bank %" PRIu32 " doesn't exist (the maximum is %" PRIu32 ")",
typeInfo.name.c_str(),
bank,
@@ -422,13 +78,12 @@ void layout_SetSectionType(SectionType type, uint32_t bank) {
}
void layout_SetAddr(uint32_t addr) {
LexerStackEntry const &context = lexerStack.back();
if (activeType == SECTTYPE_INVALID) {
scriptError(context, "Cannot set the current address: no memory region is active");
lexer_Error("Cannot set the current address: no memory region is active");
return;
}
if (activeBankIdx == UINT32_MAX) {
scriptError(context, "Cannot set the current address: the bank is floating");
lexer_Error("Cannot set the current address: the bank is floating");
return;
}
@@ -436,11 +91,10 @@ void layout_SetAddr(uint32_t addr) {
SectionTypeInfo const &typeInfo = sectionTypeInfo[activeType];
if (addr < pc) {
scriptError(context, "Cannot decrease the current address (from $%04x to $%04x)", pc, addr);
lexer_Error("Cannot decrease the current address (from $%04x to $%04x)", pc, addr);
} else if (addr > endaddr(activeType)) { // Allow "one past the end" sections.
scriptError(
context,
"Cannot set the current address to $%04" PRIx32 ": %s ends at $%04" PRIx16 "",
lexer_Error(
"Cannot set the current address to $%04" PRIx32 ": %s ends at $%04" PRIx16,
addr,
typeInfo.name.c_str(),
endaddr(activeType)
@@ -453,11 +107,8 @@ void layout_SetAddr(uint32_t addr) {
}
void layout_MakeAddrFloating() {
LexerStackEntry const &context = lexerStack.back();
if (activeType == SECTTYPE_INVALID) {
scriptError(
context, "Cannot make the current address floating: no memory region is active"
);
lexer_Error("Cannot make the current address floating: no memory region is active");
return;
}
@@ -467,9 +118,8 @@ void layout_MakeAddrFloating() {
}
void layout_AlignTo(uint32_t alignment, uint32_t alignOfs) {
LexerStackEntry const &context = lexerStack.back();
if (activeType == SECTTYPE_INVALID) {
scriptError(context, "Cannot align: no memory region is active");
lexer_Error("Cannot align: no memory region is active");
return;
}
@@ -480,8 +130,7 @@ void layout_AlignTo(uint32_t alignment, uint32_t alignOfs) {
uint32_t alignSize = 1u << alignment;
if (alignOfs >= alignSize) {
scriptError(
context,
lexer_Error(
"Cannot align: The alignment offset (%" PRIu32
") must be less than alignment size (%" PRIu32 ")",
alignOfs,
@@ -500,9 +149,7 @@ void layout_AlignTo(uint32_t alignment, uint32_t alignOfs) {
uint16_t &pc = curAddr[activeType][activeBankIdx];
if (alignment > 16) {
scriptError(
context, "Cannot align: The alignment (%" PRIu32 ") must be less than 16", alignment
);
lexer_Error("Cannot align: The alignment (%" PRIu32 ") must be less than 16", alignment);
return;
}
@@ -513,8 +160,7 @@ void layout_AlignTo(uint32_t alignment, uint32_t alignOfs) {
uint32_t alignSize = 1u << alignment;
if (alignOfs >= alignSize) {
scriptError(
context,
lexer_Error(
"Cannot align: The alignment offset (%" PRIu32
") must be less than alignment size (%" PRIu32 ")",
alignOfs,
@@ -528,8 +174,7 @@ void layout_AlignTo(uint32_t alignment, uint32_t alignOfs) {
}
if (uint16_t offset = pc - typeInfo.startAddr; length > typeInfo.size - offset) {
scriptError(
context,
lexer_Error(
"Cannot align: the next suitable address after $%04" PRIx16 " is $%04" PRIx16
", past $%04" PRIx16,
pc,
@@ -543,9 +188,8 @@ void layout_AlignTo(uint32_t alignment, uint32_t alignOfs) {
}
void layout_Pad(uint32_t length) {
LexerStackEntry const &context = lexerStack.back();
if (activeType == SECTTYPE_INVALID) {
scriptError(context, "Cannot increase the current address: no memory region is active");
lexer_Error("Cannot increase the current address: no memory region is active");
return;
}
@@ -559,8 +203,7 @@ void layout_Pad(uint32_t length) {
assume(pc >= typeInfo.startAddr);
if (uint16_t offset = pc - typeInfo.startAddr; length + offset > typeInfo.size) {
scriptError(
context,
lexer_Error(
"Cannot increase the current address by %u bytes: only %u bytes to $%04" PRIx16,
length,
typeInfo.size - offset,
@@ -572,18 +215,15 @@ void layout_Pad(uint32_t length) {
}
void layout_PlaceSection(std::string const &name, bool isOptional) {
LexerStackEntry const &context = lexerStack.back();
if (activeType == SECTTYPE_INVALID) {
scriptError(
context, "No memory region has been specified to place section \"%s\" in", name.c_str()
);
lexer_Error("No memory region has been specified to place section \"%s\" in", name.c_str());
return;
}
Section *section = sect_GetSection(name.c_str());
if (!section) {
if (!isOptional) {
scriptError(context, "Unknown section \"%s\"", name.c_str());
lexer_Error("Unknown section \"%s\"", name.c_str());
}
return;
}
@@ -594,8 +234,7 @@ void layout_PlaceSection(std::string const &name, bool isOptional) {
if (section->type == SECTTYPE_INVALID) {
// A section that has data must get assigned a type that requires data.
if (!sect_HasData(activeType) && !section->data.empty()) {
scriptError(
context,
lexer_Error(
"\"%s\" is specified to be a %s section, but it contains data",
name.c_str(),
typeInfo.name.c_str()
@@ -603,8 +242,7 @@ void layout_PlaceSection(std::string const &name, bool isOptional) {
} else if (sect_HasData(activeType) && section->data.empty() && section->size != 0) {
// A section that lacks data can only be assigned to a type that requires data
// if it's empty.
scriptError(
context,
lexer_Error(
"\"%s\" is specified to be a %s section, but it doesn't contain data",
name.c_str(),
typeInfo.name.c_str()
@@ -617,8 +255,7 @@ void layout_PlaceSection(std::string const &name, bool isOptional) {
}
}
} else if (section->type != activeType) {
scriptError(
context,
lexer_Error(
"\"%s\" is specified to be a %s section, but it is already a %s section",
name.c_str(),
typeInfo.name.c_str(),
@@ -631,8 +268,7 @@ void layout_PlaceSection(std::string const &name, bool isOptional) {
} else {
uint32_t bank = activeBankIdx + typeInfo.firstBank;
if (section->isBankFixed && bank != section->bank) {
scriptError(
context,
lexer_Error(
"The linker script places section \"%s\" in %s bank %" PRIu32
", but it was already defined in bank %" PRIu32,
name.c_str(),
@@ -648,8 +284,7 @@ void layout_PlaceSection(std::string const &name, bool isOptional) {
if (!isPcFloating) {
uint16_t &org = curAddr[activeType][activeBankIdx];
if (section->isAddressFixed && org != section->org) {
scriptError(
context,
lexer_Error(
"The linker script assigns section \"%s\" to address $%04" PRIx16
", but it was already at $%04" PRIx16,
name.c_str(),
@@ -658,8 +293,7 @@ void layout_PlaceSection(std::string const &name, bool isOptional) {
);
} else if (section->isAlignFixed && (org & section->alignMask) != section->alignOfs) {
uint8_t alignment = std::countr_one(section->alignMask);
scriptError(
context,
lexer_Error(
"The linker script assigns section \"%s\" to address $%04" PRIx16
", but that would be ALIGN[%" PRIu8 ", %" PRIu16
"] instead of the requested ALIGN[%" PRIu8 ", %" PRIu16 "]",
@@ -678,8 +312,7 @@ void layout_PlaceSection(std::string const &name, bool isOptional) {
uint16_t curOfs = org - typeInfo.startAddr;
if (section->size > typeInfo.size - curOfs) {
uint16_t overflowSize = section->size - (typeInfo.size - curOfs);
scriptError(
context,
lexer_Error(
"The linker script assigns section \"%s\" to address $%04" PRIx16
", but then it would overflow %s by %" PRIu16 " byte%s",
name.c_str(),

361
src/link/lexer.cpp Normal file
View File

@@ -0,0 +1,361 @@
// SPDX-License-Identifier: MIT
#include "link/lexer.hpp"
#include <array>
#include <ctype.h>
#include <errno.h>
#include <fstream>
#include <inttypes.h>
#include <stdio.h>
#include <string_view>
#include <vector>
#include "helpers.hpp"
#include "itertools.hpp"
#include "util.hpp"
#include "link/warning.hpp"
// Include this last so it gets all type & constant definitions
#include "script.hpp" // For token definitions, generated from script.y
struct LexerStackEntry {
std::filebuf file;
std::string path;
uint32_t lineNo;
explicit LexerStackEntry(std::string &&path_) : file(), path(path_), lineNo(1) {}
};
static std::vector<LexerStackEntry> lexerStack;
void lexer_Error(char const *fmt, ...) {
LexerStackEntry &context = lexerStack.back();
va_list args;
va_start(args, fmt);
scriptError(context.path.c_str(), context.lineNo, fmt, args);
va_end(args);
}
void lexer_IncludeFile(std::string &&path) {
// `.emplace_back` can invalidate references to the stack's elements!
// This is why `newContext` must be gotten before `prevContext`.
LexerStackEntry &newContext = lexerStack.emplace_back(std::move(path));
LexerStackEntry &prevContext = lexerStack[lexerStack.size() - 2];
if (!newContext.file.open(newContext.path, std::ios_base::in)) {
// `.pop_back()` will invalidate `newContext`, which is why `path` must be moved first.
std::string badPath = std::move(newContext.path);
lexerStack.pop_back();
// This error will occur in `prevContext`, *before* incrementing the line number!
lexer_Error(
"Failed to open included linker script \"%s\": %s", badPath.c_str(), strerror(errno)
);
}
// `.pop_back()` cannot invalidate an unpopped reference, so `prevContext`
// is still valid even if `.open()` failed.
++prevContext.lineNo;
}
void lexer_IncLineNo() {
++lexerStack.back().lineNo;
}
static bool isWhiteSpace(int c) {
return c == ' ' || c == '\t';
}
static bool isNewline(int c) {
return c == '\r' || c == '\n';
}
yy::parser::symbol_type yylex(); // Forward declaration for `yywrap`
static yy::parser::symbol_type yywrap() {
static bool atEof = false;
if (lexerStack.size() != 1) {
if (!atEof) {
// Inject a newline at EOF to simplify parsing.
atEof = true;
return yy::parser::make_newline();
}
lexerStack.pop_back();
return yylex();
}
if (!atEof) {
// Inject a newline at EOF to simplify parsing.
atEof = true;
return yy::parser::make_newline();
}
return yy::parser::make_YYEOF();
}
static bool isIdentChar(int c) {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9');
}
static std::string readIdent(int c) {
LexerStackEntry &context = lexerStack.back();
std::string ident;
ident.push_back(c);
for (c = context.file.sgetc(); isIdentChar(c); c = context.file.snextc()) {
ident.push_back(c);
}
return ident;
}
static bool isDecDigit(int c) {
return c >= '0' && c <= '9';
}
static yy::parser::symbol_type parseDecNumber(int c) {
LexerStackEntry &context = lexerStack.back();
uint32_t number = c - '0';
for (c = context.file.sgetc(); isDecDigit(c) || c == '_'; c = context.file.sgetc()) {
if (c != '_') {
number = number * 10 + (c - '0');
}
context.file.sbumpc();
}
return yy::parser::make_number(number);
}
static bool isBinDigit(int c) {
return c >= '0' && c <= '1';
}
static yy::parser::symbol_type parseBinNumber(char const *prefix) {
LexerStackEntry &context = lexerStack.back();
int c = context.file.sgetc();
if (!isBinDigit(c)) {
lexer_Error("No binary digits found after '%s'", prefix);
return yy::parser::make_number(0);
}
uint32_t number = c - '0';
context.file.sbumpc();
for (c = context.file.sgetc(); isBinDigit(c) || c == '_'; c = context.file.sgetc()) {
if (c != '_') {
number = number * 2 + (c - '0');
}
context.file.sbumpc();
}
return yy::parser::make_number(number);
}
static bool isOctDigit(int c) {
return c >= '0' && c <= '7';
}
static yy::parser::symbol_type parseOctNumber(char const *prefix) {
LexerStackEntry &context = lexerStack.back();
int c = context.file.sgetc();
if (!isOctDigit(c)) {
lexer_Error("No octal digits found after '%s'", prefix);
return yy::parser::make_number(0);
}
uint32_t number = c - '0';
context.file.sbumpc();
for (c = context.file.sgetc(); isOctDigit(c) || c == '_'; c = context.file.sgetc()) {
if (c != '_') {
number = number * 8 + (c - '0');
}
context.file.sbumpc();
}
return yy::parser::make_number(number);
}
static bool isHexDigit(int c) {
return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
}
static uint8_t parseHexDigit(int c) {
if (c >= '0' && c <= '9') {
return c - '0';
} else if (c >= 'A' && c <= 'F') {
return c - 'A' + 10;
} else if (c >= 'a' && c <= 'f') {
return c - 'a' + 10;
} else {
unreachable_(); // LCOV_EXCL_LINE
}
}
static yy::parser::symbol_type parseHexNumber(char const *prefix) {
LexerStackEntry &context = lexerStack.back();
int c = context.file.sgetc();
if (!isHexDigit(c)) {
lexer_Error("No hexadecimal digits found after '%s'", prefix);
return yy::parser::make_number(0);
}
uint32_t number = parseHexDigit(c);
context.file.sbumpc();
for (c = context.file.sgetc(); isHexDigit(c) || c == '_'; c = context.file.sgetc()) {
if (c != '_') {
number = number * 16 + parseHexDigit(c);
}
context.file.sbumpc();
}
return yy::parser::make_number(number);
}
static yy::parser::symbol_type parseNumber(int c) {
LexerStackEntry &context = lexerStack.back();
if (c == '0') {
switch (context.file.sgetc()) {
case 'x':
context.file.sbumpc();
return parseHexNumber("0x");
case 'X':
context.file.sbumpc();
return parseHexNumber("0X");
case 'o':
context.file.sbumpc();
return parseOctNumber("0o");
case 'O':
context.file.sbumpc();
return parseOctNumber("0O");
case 'b':
context.file.sbumpc();
return parseBinNumber("0b");
case 'B':
context.file.sbumpc();
return parseBinNumber("0B");
}
}
return parseDecNumber(c);
}
static yy::parser::symbol_type parseString() {
LexerStackEntry &context = lexerStack.back();
int c = context.file.sgetc();
std::string str;
for (; c != '"'; c = context.file.sgetc()) {
if (c == EOF || isNewline(c)) {
lexer_Error("Unterminated string");
break;
}
context.file.sbumpc();
if (c == '\\') {
c = context.file.sgetc();
if (c == EOF || isNewline(c)) {
lexer_Error("Unterminated string");
break;
} else if (c == 'n') {
c = '\n';
} else if (c == 'r') {
c = '\r';
} else if (c == 't') {
c = '\t';
} else if (c == '0') {
c = '\0';
} else if (c != '\\' && c != '"' && c != '\'') {
lexer_Error("Cannot escape character %s", printChar(c));
}
context.file.sbumpc();
}
str.push_back(c);
}
if (c == '"') {
context.file.sbumpc();
}
return yy::parser::make_string(std::move(str));
}
struct Keyword {
std::string_view name;
yy::parser::symbol_type (*tokenGen)();
};
using namespace std::literals;
static std::array keywords{
Keyword{"ORG"sv, yy::parser::make_ORG },
Keyword{"FLOATING"sv, yy::parser::make_FLOATING},
Keyword{"INCLUDE"sv, yy::parser::make_INCLUDE },
Keyword{"ALIGN"sv, yy::parser::make_ALIGN },
Keyword{"DS"sv, yy::parser::make_DS },
Keyword{"OPTIONAL"sv, yy::parser::make_OPTIONAL},
};
yy::parser::symbol_type yylex() {
LexerStackEntry &context = lexerStack.back();
int c = context.file.sbumpc();
// First, skip leading whitespace.
while (isWhiteSpace(c)) {
c = context.file.sbumpc();
}
// Then, skip a comment if applicable.
if (c == ';') {
while (c != EOF && !isNewline(c)) {
c = context.file.sbumpc();
}
}
// Alright, what token should we return?
if (c == EOF) {
return yywrap();
} else if (c == ',') {
return yy::parser::make_COMMA();
} else if (isNewline(c)) {
// Handle CRLF.
if (c == '\r' && context.file.sgetc() == '\n') {
context.file.sbumpc();
}
return yy::parser::make_newline();
} else if (c == '"') {
return parseString();
} else if (c == '$') {
return parseHexNumber("$");
} else if (c == '%') {
return parseBinNumber("%");
} else if (c == '&') {
return parseOctNumber("&");
} else if (isDecDigit(c)) {
return parseNumber(c);
} else if (isIdentChar(c)) { // Note that we match these *after* digit characters!
std::string ident = readIdent(c);
auto strUpperCmp = [](char cmp, char ref) { return toupper(cmp) == ref; };
for (SectionType type : EnumSeq(SECTTYPE_INVALID)) {
if (std::equal(RANGE(ident), RANGE(sectionTypeInfo[type].name), strUpperCmp)) {
return yy::parser::make_sect_type(type);
}
}
for (Keyword const &keyword : keywords) {
if (std::equal(RANGE(ident), RANGE(keyword.name), strUpperCmp)) {
return keyword.tokenGen();
}
}
lexer_Error("Unknown keyword \"%s\"", ident.c_str());
return yylex();
} else {
lexer_Error("Unexpected character %s", printChar(c));
// Keep reading characters until the EOL, to avoid reporting too many errors.
for (c = context.file.sgetc(); !isNewline(c); c = context.file.sgetc()) {
if (c == EOF) {
break;
}
context.file.sbumpc();
}
return yylex();
}
// Not marking as unreachable; this will generate a warning if any codepath forgets to return.
}
bool lexer_Init(char const *linkerScriptName) {
if (LexerStackEntry &newContext = lexerStack.emplace_back(std::string(linkerScriptName));
!newContext.file.open(newContext.path, std::ios_base::in)) {
error("Failed to open linker script \"%s\"", linkerScriptName);
lexerStack.clear();
return false;
}
return true;
}

View File

@@ -18,7 +18,7 @@
#include "version.hpp"
#include "link/assign.hpp"
#include "link/layout.hpp"
#include "link/lexer.hpp"
#include "link/object.hpp"
#include "link/output.hpp"
#include "link/patch.hpp"
@@ -151,7 +151,7 @@ static void parseScrambleSpec(char const *spec) {
// If this trips, `spec` must be pointing at a ',' or '=' (or NUL) due to the assumption
if (regionNameLen == 0) {
argErr('S', "Missing region name");
argError('S', "Missing region name");
if (*spec == '\0') {
break;
@@ -165,7 +165,7 @@ static void parseScrambleSpec(char const *spec) {
// Find the next non-blank char after the region name's end
spec += regionNameLen + strspn(&spec[regionNameLen], " \t");
if (*spec != '\0' && *spec != ',' && *spec != '=') {
argErr(
argError(
'S',
"Unexpected '%c' after region name \"%.*s\"",
*spec,
@@ -188,7 +188,7 @@ static void parseScrambleSpec(char const *spec) {
}
if (region == SCRAMBLE_UNK) {
argErr('S', "Unknown region \"%.*s\"", regionNameFmtLen, regionName);
argError('S', "Unknown region \"%.*s\"", regionNameFmtLen, regionName);
}
if (*spec == '=') {
@@ -197,13 +197,13 @@ static void parseScrambleSpec(char const *spec) {
char *endptr;
if (*spec == '\0' || *spec == ',') {
argErr('S', "Empty limit for region \"%.*s\"", regionNameFmtLen, regionName);
argError('S', "Empty limit for region \"%.*s\"", regionNameFmtLen, regionName);
goto next;
}
limit = strtoul(spec, &endptr, 10);
endptr += strspn(endptr, " \t");
if (*endptr != '\0' && *endptr != ',') {
argErr(
argError(
'S',
"Invalid non-numeric limit for region \"%.*s\"",
regionNameFmtLen,
@@ -214,7 +214,7 @@ static void parseScrambleSpec(char const *spec) {
spec = endptr;
if (region != SCRAMBLE_UNK && limit > scrambleSpecs[region].max) {
argErr(
argError(
'S',
"Limit for region \"%.*s\" may not exceed %" PRIu16,
regionNameFmtLen,
@@ -241,7 +241,7 @@ static void parseScrambleSpec(char const *spec) {
// Only WRAMX can be implied, since ROMX and SRAM size may vary
options.scrambleWRAMX = 7;
} else {
argErr('S', "Cannot imply limit for region \"%.*s\"", regionNameFmtLen, regionName);
argError('S', "Cannot imply limit for region \"%.*s\"", regionNameFmtLen, regionName);
}
next: // Can't `continue` a `for` loop with this nontrivial iteration logic
@@ -310,7 +310,7 @@ int main(int argc, char *argv[]) {
unsigned long value = strtoul(musl_optarg, &endptr, 0);
if (musl_optarg[0] == '\0' || *endptr != '\0' || value > 0xFF) {
argErr('p', "Argument for 'p' must be a byte (between 0 and 0xFF)");
argError('p', "Argument for 'p' must be a byte (between 0 and 0xFF)");
value = 0xFF;
}
options.padValue = value;

View File

@@ -12,8 +12,8 @@
}
%code {
#include "link/lexer.hpp"
#include "link/layout.hpp"
#include "link/warning.hpp"
yy::parser::symbol_type yylex(); // Provided by layout.cpp
}
@@ -110,5 +110,5 @@ optional:
/******************** Error handler ********************/
void yy::parser::error(std::string const &msg) {
scriptError(lexer_Context(), "%s", msg.c_str());
lexer_Error("%s", msg.c_str());
}

View File

@@ -103,7 +103,7 @@ void errorNoDump(char const *fmt, ...) {
warnings.incrementErrors();
}
void argErr(char flag, char const *fmt, ...) {
void argError(char flag, char const *fmt, ...) {
va_list args;
fprintf(stderr, "error: Invalid argument for option '%c': ", flag);
va_start(args, fmt);
@@ -114,6 +114,14 @@ void argErr(char flag, char const *fmt, ...) {
warnings.incrementErrors();
}
void scriptError(char const *name, uint32_t lineNo, char const *fmt, va_list args) {
fprintf(stderr, "error: %s(%" PRIu32 "): ", name, lineNo);
vfprintf(stderr, fmt, args);
putc('\n', stderr);
warnings.incrementErrors();
}
[[noreturn]]
void fatal(FileStackNode const *src, uint32_t lineNo, char const *fmt, ...) {
va_list args;