Split up the linkerscript lexer and layout actions

2026-06-25 18:48:04 +00:00 · 2025-07-27 13:03:28 -04:00
parent a353637a90
commit 16e16cdf51
15 changed files with 448 additions and 433 deletions
@@ -57,6 +57,7 @@ set(rgbasm_src
 set(rgblink_src
    "${BISON_LINKER_SCRIPT_PARSER_OUTPUT_SOURCE}"
    "link/assign.cpp"
+    "link/lexer.cpp"
    "link/layout.cpp"
    "link/main.cpp"
    "link/object.cpp"
@@ -74,6 +74,7 @@ static void printDiag(
 	}
 	vfprintf(stderr, fmt, args);
 	putc('\n', stderr);
+
 	lexer_DumpStringExpansions();
 }

@@ -2,354 +2,17 @@

 #include "link/layout.hpp"

-#include <algorithm>
 #include <array>
 #include <bit>
-#include <ctype.h>
 #include <inttypes.h>
-#include <stdio.h>
-#include <string_view>
 #include <vector>

 #include "helpers.hpp"
-#include "itertools.hpp"
 #include "util.hpp"

+#include "link/lexer.hpp" // lexer_Error
 #include "link/section.hpp"
 #include "link/warning.hpp"
-// Include this last so it gets all type & constant definitions
-#include "script.hpp" // For token definitions, generated from script.y
-
-/******************** Lexer ********************/
-
-static std::vector<LexerStackEntry> lexerStack;
-static bool atEof = false;
-
-LexerStackEntry &lexer_Context() {
-	return lexerStack.back();
-}
-
-void lexer_IncludeFile(std::string &&path) {
-	// `emplace_back` can invalidate references to the stack's elements!
-	// This is why `newContext` must be gotten before `prevContext`.
-	LexerStackEntry &newContext = lexerStack.emplace_back(std::move(path));
-	LexerStackEntry &prevContext = lexerStack[lexerStack.size() - 2];
-
-	if (!newContext.file.open(newContext.path, std::ios_base::in)) {
-		// The order is important: report the error, increment the line number, modify the stack!
-		scriptError(
-		    prevContext, "Failed to open included linker script \"%s\"", newContext.path.c_str()
-		);
-		++prevContext.lineNo;
-		lexerStack.pop_back();
-	} else {
-		// The lexer will use the new entry to lex the next token.
-		++prevContext.lineNo;
-	}
-}
-
-void lexer_IncLineNo() {
-	++lexerStack.back().lineNo;
-}
-
-static bool isWhiteSpace(int c) {
-	return c == ' ' || c == '\t';
-}
-
-static bool isNewline(int c) {
-	return c == '\r' || c == '\n';
-}
-
-yy::parser::symbol_type yylex(); // Forward declaration for `yywrap`
-
-static yy::parser::symbol_type yywrap() {
-	if (lexerStack.size() != 1) {
-		if (!atEof) {
-			// Inject a newline at EOF to simplify parsing.
-			atEof = true;
-			return yy::parser::make_newline();
-		}
-		lexerStack.pop_back();
-		return yylex();
-	}
-	if (!atEof) {
-		// Inject a newline at EOF to simplify parsing.
-		atEof = true;
-		return yy::parser::make_newline();
-	}
-	return yy::parser::make_YYEOF();
-}
-
-static bool isIdentChar(int c) {
-	return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9');
-}
-
-static std::string readIdent(int c) {
-	LexerStackEntry &context = lexerStack.back();
-	std::string ident;
-	ident.push_back(c);
-	for (c = context.file.sgetc(); isIdentChar(c); c = context.file.snextc()) {
-		ident.push_back(c);
-	}
-	return ident;
-}
-
-static bool isDecDigit(int c) {
-	return c >= '0' && c <= '9';
-}
-
-static yy::parser::symbol_type parseDecNumber(int c) {
-	LexerStackEntry &context = lexerStack.back();
-	uint32_t number = c - '0';
-	for (c = context.file.sgetc(); isDecDigit(c) || c == '_'; c = context.file.sgetc()) {
-		if (c != '_') {
-			number = number * 10 + (c - '0');
-		}
-		context.file.sbumpc();
-	}
-	return yy::parser::make_number(number);
-}
-
-static bool isBinDigit(int c) {
-	return c >= '0' && c <= '1';
-}
-
-static yy::parser::symbol_type parseBinNumber(char const *prefix) {
-	LexerStackEntry &context = lexerStack.back();
-	int c = context.file.sgetc();
-	if (!isBinDigit(c)) {
-		scriptError(context, "No binary digits found after '%s'", prefix);
-		return yy::parser::make_number(0);
-	}
-
-	uint32_t number = c - '0';
-	context.file.sbumpc();
-	for (c = context.file.sgetc(); isBinDigit(c) || c == '_'; c = context.file.sgetc()) {
-		if (c != '_') {
-			number = number * 2 + (c - '0');
-		}
-		context.file.sbumpc();
-	}
-	return yy::parser::make_number(number);
-}
-
-static bool isOctDigit(int c) {
-	return c >= '0' && c <= '7';
-}
-
-static yy::parser::symbol_type parseOctNumber(char const *prefix) {
-	LexerStackEntry &context = lexerStack.back();
-	int c = context.file.sgetc();
-	if (!isOctDigit(c)) {
-		scriptError(context, "No octal digits found after '%s'", prefix);
-		return yy::parser::make_number(0);
-	}
-
-	uint32_t number = c - '0';
-	context.file.sbumpc();
-	for (c = context.file.sgetc(); isOctDigit(c) || c == '_'; c = context.file.sgetc()) {
-		if (c != '_') {
-			number = number * 8 + (c - '0');
-		}
-		context.file.sbumpc();
-	}
-	return yy::parser::make_number(number);
-}
-
-static bool isHexDigit(int c) {
-	return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
-}
-
-static uint8_t parseHexDigit(int c) {
-	if (c >= '0' && c <= '9') {
-		return c - '0';
-	} else if (c >= 'A' && c <= 'F') {
-		return c - 'A' + 10;
-	} else if (c >= 'a' && c <= 'f') {
-		return c - 'a' + 10;
-	} else {
-		unreachable_(); // LCOV_EXCL_LINE
-	}
-}
-
-static yy::parser::symbol_type parseHexNumber(char const *prefix) {
-	LexerStackEntry &context = lexerStack.back();
-	int c = context.file.sgetc();
-	if (!isHexDigit(c)) {
-		scriptError(context, "No hexadecimal digits found after '%s'", prefix);
-		return yy::parser::make_number(0);
-	}
-
-	uint32_t number = parseHexDigit(c);
-	context.file.sbumpc();
-	for (c = context.file.sgetc(); isHexDigit(c) || c == '_'; c = context.file.sgetc()) {
-		if (c != '_') {
-			number = number * 16 + parseHexDigit(c);
-		}
-		context.file.sbumpc();
-	}
-	return yy::parser::make_number(number);
-}
-
-static yy::parser::symbol_type parseNumber(int c) {
-	LexerStackEntry &context = lexerStack.back();
-	if (c == '0') {
-		switch (context.file.sgetc()) {
-		case 'x':
-			context.file.sbumpc();
-			return parseHexNumber("0x");
-		case 'X':
-			context.file.sbumpc();
-			return parseHexNumber("0X");
-		case 'o':
-			context.file.sbumpc();
-			return parseOctNumber("0o");
-		case 'O':
-			context.file.sbumpc();
-			return parseOctNumber("0O");
-		case 'b':
-			context.file.sbumpc();
-			return parseBinNumber("0b");
-		case 'B':
-			context.file.sbumpc();
-			return parseBinNumber("0B");
-		}
-	}
-	return parseDecNumber(c);
-}
-
-static yy::parser::symbol_type parseString() {
-	LexerStackEntry &context = lexerStack.back();
-	int c = context.file.sgetc();
-	std::string str;
-	for (; c != '"'; c = context.file.sgetc()) {
-		if (c == EOF || isNewline(c)) {
-			scriptError(context, "Unterminated string");
-			break;
-		}
-		context.file.sbumpc();
-		if (c == '\\') {
-			c = context.file.sgetc();
-			if (c == EOF || isNewline(c)) {
-				scriptError(context, "Unterminated string");
-				break;
-			} else if (c == 'n') {
-				c = '\n';
-			} else if (c == 'r') {
-				c = '\r';
-			} else if (c == 't') {
-				c = '\t';
-			} else if (c == '0') {
-				c = '\0';
-			} else if (c != '\\' && c != '"' && c != '\'') {
-				scriptError(context, "Cannot escape character %s", printChar(c));
-			}
-			context.file.sbumpc();
-		}
-		str.push_back(c);
-	}
-	if (c == '"') {
-		context.file.sbumpc();
-	}
-	return yy::parser::make_string(std::move(str));
-}
-
-struct Keyword {
-	std::string_view name;
-	yy::parser::symbol_type (*tokenGen)();
-};
-
-using namespace std::literals;
-
-static std::array keywords{
-    Keyword{"ORG"sv,      yy::parser::make_ORG     },
-    Keyword{"FLOATING"sv, yy::parser::make_FLOATING},
-    Keyword{"INCLUDE"sv,  yy::parser::make_INCLUDE },
-    Keyword{"ALIGN"sv,    yy::parser::make_ALIGN   },
-    Keyword{"DS"sv,       yy::parser::make_DS      },
-    Keyword{"OPTIONAL"sv, yy::parser::make_OPTIONAL},
-};
-
-yy::parser::symbol_type yylex() {
-	LexerStackEntry &context = lexerStack.back();
-	int c = context.file.sbumpc();
-
-	// First, skip leading whitespace.
-	while (isWhiteSpace(c)) {
-		c = context.file.sbumpc();
-	}
-	// Then, skip a comment if applicable.
-	if (c == ';') {
-		while (c != EOF && !isNewline(c)) {
-			c = context.file.sbumpc();
-		}
-	}
-
-	// Alright, what token should we return?
-	if (c == EOF) {
-		return yywrap();
-	} else if (c == ',') {
-		return yy::parser::make_COMMA();
-	} else if (isNewline(c)) {
-		// Handle CRLF.
-		if (c == '\r' && context.file.sgetc() == '\n') {
-			context.file.sbumpc();
-		}
-		return yy::parser::make_newline();
-	} else if (c == '"') {
-		return parseString();
-	} else if (c == '$') {
-		return parseHexNumber("$");
-	} else if (c == '%') {
-		return parseBinNumber("%");
-	} else if (c == '&') {
-		return parseOctNumber("&");
-	} else if (isDecDigit(c)) {
-		return parseNumber(c);
-	} else if (isIdentChar(c)) { // Note that we match these *after* digit characters!
-		std::string ident = readIdent(c);
-
-		auto strUpperCmp = [](char cmp, char ref) { return toupper(cmp) == ref; };
-
-		for (SectionType type : EnumSeq(SECTTYPE_INVALID)) {
-			if (std::equal(RANGE(ident), RANGE(sectionTypeInfo[type].name), strUpperCmp)) {
-				return yy::parser::make_sect_type(type);
-			}
-		}
-
-		for (Keyword const &keyword : keywords) {
-			if (std::equal(RANGE(ident), RANGE(keyword.name), strUpperCmp)) {
-				return keyword.tokenGen();
-			}
-		}
-
-		scriptError(context, "Unknown keyword \"%s\"", ident.c_str());
-		return yylex();
-	} else {
-		scriptError(context, "Unexpected character %s", printChar(c));
-		// Keep reading characters until the EOL, to avoid reporting too many errors.
-		for (c = context.file.sgetc(); !isNewline(c); c = context.file.sgetc()) {
-			if (c == EOF) {
-				break;
-			}
-			context.file.sbumpc();
-		}
-		return yylex();
-	}
-	// Not marking as unreachable; this will generate a warning if any codepath forgets to return.
-}
-
-bool lexer_Init(char const *linkerScriptName) {
-	if (LexerStackEntry &newContext = lexerStack.emplace_back(std::string(linkerScriptName));
-	    !newContext.file.open(newContext.path, std::ios_base::in)) {
-		error("Failed to open linker script \"%s\"", linkerScriptName);
-		lexerStack.clear();
-		return false;
-	}
-	return true;
-}
-
-/******************** Semantic actions ********************/

 static std::array<std::vector<uint16_t>, SECTTYPE_INVALID> curAddr;
 static SectionType activeType = SECTTYPE_INVALID; // Index into curAddr
@@ -383,12 +46,8 @@ void layout_SetFloatingSectionType(SectionType type) {
 }

 void layout_SetSectionType(SectionType type) {
-	LexerStackEntry const &context = lexerStack.back();
-
 	if (nbbanks(type) != 1) {
-		scriptError(
-		    context, "A bank number must be specified for %s", sectionTypeInfo[type].name.c_str()
-		);
+		lexer_Error("A bank number must be specified for %s", sectionTypeInfo[type].name.c_str());
 		// Keep going with a default value for the bank index.
 	}

@@ -396,12 +55,10 @@ void layout_SetSectionType(SectionType type) {
 }

 void layout_SetSectionType(SectionType type, uint32_t bank) {
-	LexerStackEntry const &context = lexerStack.back();
 	SectionTypeInfo const &typeInfo = sectionTypeInfo[type];

 	if (bank < typeInfo.firstBank) {
-		scriptError(
-		    context,
+		lexer_Error(
 		    "%s bank %" PRIu32 " doesn't exist (the minimum is %" PRIu32 ")",
 		    typeInfo.name.c_str(),
 		    bank,
@@ -409,8 +66,7 @@ void layout_SetSectionType(SectionType type, uint32_t bank) {
 		);
 		bank = typeInfo.firstBank;
 	} else if (bank > typeInfo.lastBank) {
-		scriptError(
-		    context,
+		lexer_Error(
 		    "%s bank %" PRIu32 " doesn't exist (the maximum is %" PRIu32 ")",
 		    typeInfo.name.c_str(),
 		    bank,
@@ -422,13 +78,12 @@ void layout_SetSectionType(SectionType type, uint32_t bank) {
 }

 void layout_SetAddr(uint32_t addr) {
-	LexerStackEntry const &context = lexerStack.back();
 	if (activeType == SECTTYPE_INVALID) {
-		scriptError(context, "Cannot set the current address: no memory region is active");
+		lexer_Error("Cannot set the current address: no memory region is active");
 		return;
 	}
 	if (activeBankIdx == UINT32_MAX) {
-		scriptError(context, "Cannot set the current address: the bank is floating");
+		lexer_Error("Cannot set the current address: the bank is floating");
 		return;
 	}

@@ -436,11 +91,10 @@ void layout_SetAddr(uint32_t addr) {
 	SectionTypeInfo const &typeInfo = sectionTypeInfo[activeType];

 	if (addr < pc) {
-		scriptError(context, "Cannot decrease the current address (from $%04x to $%04x)", pc, addr);
+		lexer_Error("Cannot decrease the current address (from $%04x to $%04x)", pc, addr);
 	} else if (addr > endaddr(activeType)) { // Allow "one past the end" sections.
-		scriptError(
-		    context,
-		    "Cannot set the current address to $%04" PRIx32 ": %s ends at $%04" PRIx16 "",
+		lexer_Error(
+		    "Cannot set the current address to $%04" PRIx32 ": %s ends at $%04" PRIx16,
 		    addr,
 		    typeInfo.name.c_str(),
 		    endaddr(activeType)
@@ -453,11 +107,8 @@ void layout_SetAddr(uint32_t addr) {
 }

 void layout_MakeAddrFloating() {
-	LexerStackEntry const &context = lexerStack.back();
 	if (activeType == SECTTYPE_INVALID) {
-		scriptError(
-		    context, "Cannot make the current address floating: no memory region is active"
-		);
+		lexer_Error("Cannot make the current address floating: no memory region is active");
 		return;
 	}

@@ -467,9 +118,8 @@ void layout_MakeAddrFloating() {
 }

 void layout_AlignTo(uint32_t alignment, uint32_t alignOfs) {
-	LexerStackEntry const &context = lexerStack.back();
 	if (activeType == SECTTYPE_INVALID) {
-		scriptError(context, "Cannot align: no memory region is active");
+		lexer_Error("Cannot align: no memory region is active");
 		return;
 	}

@@ -480,8 +130,7 @@ void layout_AlignTo(uint32_t alignment, uint32_t alignOfs) {
 			uint32_t alignSize = 1u << alignment;

 			if (alignOfs >= alignSize) {
-				scriptError(
-				    context,
+				lexer_Error(
 				    "Cannot align: The alignment offset (%" PRIu32
 				    ") must be less than alignment size (%" PRIu32 ")",
 				    alignOfs,
@@ -500,9 +149,7 @@ void layout_AlignTo(uint32_t alignment, uint32_t alignOfs) {
 	uint16_t &pc = curAddr[activeType][activeBankIdx];

 	if (alignment > 16) {
-		scriptError(
-		    context, "Cannot align: The alignment (%" PRIu32 ") must be less than 16", alignment
-		);
+		lexer_Error("Cannot align: The alignment (%" PRIu32 ") must be less than 16", alignment);
 		return;
 	}

@@ -513,8 +160,7 @@ void layout_AlignTo(uint32_t alignment, uint32_t alignOfs) {
 		uint32_t alignSize = 1u << alignment;

 		if (alignOfs >= alignSize) {
-			scriptError(
-			    context,
+			lexer_Error(
 			    "Cannot align: The alignment offset (%" PRIu32
 			    ") must be less than alignment size (%" PRIu32 ")",
 			    alignOfs,
@@ -528,8 +174,7 @@ void layout_AlignTo(uint32_t alignment, uint32_t alignOfs) {
 	}

 	if (uint16_t offset = pc - typeInfo.startAddr; length > typeInfo.size - offset) {
-		scriptError(
-		    context,
+		lexer_Error(
 		    "Cannot align: the next suitable address after $%04" PRIx16 " is $%04" PRIx16
 		    ", past $%04" PRIx16,
 		    pc,
@@ -543,9 +188,8 @@ void layout_AlignTo(uint32_t alignment, uint32_t alignOfs) {
 }

 void layout_Pad(uint32_t length) {
-	LexerStackEntry const &context = lexerStack.back();
 	if (activeType == SECTTYPE_INVALID) {
-		scriptError(context, "Cannot increase the current address: no memory region is active");
+		lexer_Error("Cannot increase the current address: no memory region is active");
 		return;
 	}

@@ -559,8 +203,7 @@ void layout_Pad(uint32_t length) {

 	assume(pc >= typeInfo.startAddr);
 	if (uint16_t offset = pc - typeInfo.startAddr; length + offset > typeInfo.size) {
-		scriptError(
-		    context,
+		lexer_Error(
 		    "Cannot increase the current address by %u bytes: only %u bytes to $%04" PRIx16,
 		    length,
 		    typeInfo.size - offset,
@@ -572,18 +215,15 @@ void layout_Pad(uint32_t length) {
 }

 void layout_PlaceSection(std::string const &name, bool isOptional) {
-	LexerStackEntry const &context = lexerStack.back();
 	if (activeType == SECTTYPE_INVALID) {
-		scriptError(
-		    context, "No memory region has been specified to place section \"%s\" in", name.c_str()
-		);
+		lexer_Error("No memory region has been specified to place section \"%s\" in", name.c_str());
 		return;
 	}

 	Section *section = sect_GetSection(name.c_str());
 	if (!section) {
 		if (!isOptional) {
-			scriptError(context, "Unknown section \"%s\"", name.c_str());
+			lexer_Error("Unknown section \"%s\"", name.c_str());
 		}
 		return;
 	}
@@ -594,8 +234,7 @@ void layout_PlaceSection(std::string const &name, bool isOptional) {
 	if (section->type == SECTTYPE_INVALID) {
 		// A section that has data must get assigned a type that requires data.
 		if (!sect_HasData(activeType) && !section->data.empty()) {
-			scriptError(
-			    context,
+			lexer_Error(
 			    "\"%s\" is specified to be a %s section, but it contains data",
 			    name.c_str(),
 			    typeInfo.name.c_str()
@@ -603,8 +242,7 @@ void layout_PlaceSection(std::string const &name, bool isOptional) {
 		} else if (sect_HasData(activeType) && section->data.empty() && section->size != 0) {
 			// A section that lacks data can only be assigned to a type that requires data
 			// if it's empty.
-			scriptError(
-			    context,
+			lexer_Error(
 			    "\"%s\" is specified to be a %s section, but it doesn't contain data",
 			    name.c_str(),
 			    typeInfo.name.c_str()
@@ -617,8 +255,7 @@ void layout_PlaceSection(std::string const &name, bool isOptional) {
 			}
 		}
 	} else if (section->type != activeType) {
-		scriptError(
-		    context,
+		lexer_Error(
 		    "\"%s\" is specified to be a %s section, but it is already a %s section",
 		    name.c_str(),
 		    typeInfo.name.c_str(),
@@ -631,8 +268,7 @@ void layout_PlaceSection(std::string const &name, bool isOptional) {
 	} else {
 		uint32_t bank = activeBankIdx + typeInfo.firstBank;
 		if (section->isBankFixed && bank != section->bank) {
-			scriptError(
-			    context,
+			lexer_Error(
 			    "The linker script places section \"%s\" in %s bank %" PRIu32
 			    ", but it was already defined in bank %" PRIu32,
 			    name.c_str(),
@@ -648,8 +284,7 @@ void layout_PlaceSection(std::string const &name, bool isOptional) {
 	if (!isPcFloating) {
 		uint16_t &org = curAddr[activeType][activeBankIdx];
 		if (section->isAddressFixed && org != section->org) {
-			scriptError(
-			    context,
+			lexer_Error(
 			    "The linker script assigns section \"%s\" to address $%04" PRIx16
 			    ", but it was already at $%04" PRIx16,
 			    name.c_str(),
@@ -658,8 +293,7 @@ void layout_PlaceSection(std::string const &name, bool isOptional) {
 			);
 		} else if (section->isAlignFixed && (org & section->alignMask) != section->alignOfs) {
 			uint8_t alignment = std::countr_one(section->alignMask);
-			scriptError(
-			    context,
+			lexer_Error(
 			    "The linker script assigns section \"%s\" to address $%04" PRIx16
 			    ", but that would be ALIGN[%" PRIu8 ", %" PRIu16
 			    "] instead of the requested ALIGN[%" PRIu8 ", %" PRIu16 "]",
@@ -678,8 +312,7 @@ void layout_PlaceSection(std::string const &name, bool isOptional) {
 		uint16_t curOfs = org - typeInfo.startAddr;
 		if (section->size > typeInfo.size - curOfs) {
 			uint16_t overflowSize = section->size - (typeInfo.size - curOfs);
-			scriptError(
-			    context,
+			lexer_Error(
 			    "The linker script assigns section \"%s\" to address $%04" PRIx16
 			    ", but then it would overflow %s by %" PRIu16 " byte%s",
 			    name.c_str(),
@@ -0,0 +1,361 @@
+// SPDX-License-Identifier: MIT
+
+#include "link/lexer.hpp"
+
+#include <array>
+#include <ctype.h>
+#include <errno.h>
+#include <fstream>
+#include <inttypes.h>
+#include <stdio.h>
+#include <string_view>
+#include <vector>
+
+#include "helpers.hpp"
+#include "itertools.hpp"
+#include "util.hpp"
+
+#include "link/warning.hpp"
+// Include this last so it gets all type & constant definitions
+#include "script.hpp" // For token definitions, generated from script.y
+
+struct LexerStackEntry {
+	std::filebuf file;
+	std::string path;
+	uint32_t lineNo;
+
+	explicit LexerStackEntry(std::string &&path_) : file(), path(path_), lineNo(1) {}
+};
+
+static std::vector<LexerStackEntry> lexerStack;
+
+void lexer_Error(char const *fmt, ...) {
+	LexerStackEntry &context = lexerStack.back();
+	va_list args;
+	va_start(args, fmt);
+	scriptError(context.path.c_str(), context.lineNo, fmt, args);
+	va_end(args);
+}
+
+void lexer_IncludeFile(std::string &&path) {
+	// `.emplace_back` can invalidate references to the stack's elements!
+	// This is why `newContext` must be gotten before `prevContext`.
+	LexerStackEntry &newContext = lexerStack.emplace_back(std::move(path));
+	LexerStackEntry &prevContext = lexerStack[lexerStack.size() - 2];
+
+	if (!newContext.file.open(newContext.path, std::ios_base::in)) {
+		// `.pop_back()` will invalidate `newContext`, which is why `path` must be moved first.
+		std::string badPath = std::move(newContext.path);
+		lexerStack.pop_back();
+		// This error will occur in `prevContext`, *before* incrementing the line number!
+		lexer_Error(
+		    "Failed to open included linker script \"%s\": %s", badPath.c_str(), strerror(errno)
+		);
+	}
+
+	// `.pop_back()` cannot invalidate an unpopped reference, so `prevContext`
+	// is still valid even if `.open()` failed.
+	++prevContext.lineNo;
+}
+
+void lexer_IncLineNo() {
+	++lexerStack.back().lineNo;
+}
+
+static bool isWhiteSpace(int c) {
+	return c == ' ' || c == '\t';
+}
+
+static bool isNewline(int c) {
+	return c == '\r' || c == '\n';
+}
+
+yy::parser::symbol_type yylex(); // Forward declaration for `yywrap`
+
+static yy::parser::symbol_type yywrap() {
+	static bool atEof = false;
+	if (lexerStack.size() != 1) {
+		if (!atEof) {
+			// Inject a newline at EOF to simplify parsing.
+			atEof = true;
+			return yy::parser::make_newline();
+		}
+		lexerStack.pop_back();
+		return yylex();
+	}
+	if (!atEof) {
+		// Inject a newline at EOF to simplify parsing.
+		atEof = true;
+		return yy::parser::make_newline();
+	}
+	return yy::parser::make_YYEOF();
+}
+
+static bool isIdentChar(int c) {
+	return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9');
+}
+
+static std::string readIdent(int c) {
+	LexerStackEntry &context = lexerStack.back();
+	std::string ident;
+	ident.push_back(c);
+	for (c = context.file.sgetc(); isIdentChar(c); c = context.file.snextc()) {
+		ident.push_back(c);
+	}
+	return ident;
+}
+
+static bool isDecDigit(int c) {
+	return c >= '0' && c <= '9';
+}
+
+static yy::parser::symbol_type parseDecNumber(int c) {
+	LexerStackEntry &context = lexerStack.back();
+	uint32_t number = c - '0';
+	for (c = context.file.sgetc(); isDecDigit(c) || c == '_'; c = context.file.sgetc()) {
+		if (c != '_') {
+			number = number * 10 + (c - '0');
+		}
+		context.file.sbumpc();
+	}
+	return yy::parser::make_number(number);
+}
+
+static bool isBinDigit(int c) {
+	return c >= '0' && c <= '1';
+}
+
+static yy::parser::symbol_type parseBinNumber(char const *prefix) {
+	LexerStackEntry &context = lexerStack.back();
+	int c = context.file.sgetc();
+	if (!isBinDigit(c)) {
+		lexer_Error("No binary digits found after '%s'", prefix);
+		return yy::parser::make_number(0);
+	}
+
+	uint32_t number = c - '0';
+	context.file.sbumpc();
+	for (c = context.file.sgetc(); isBinDigit(c) || c == '_'; c = context.file.sgetc()) {
+		if (c != '_') {
+			number = number * 2 + (c - '0');
+		}
+		context.file.sbumpc();
+	}
+	return yy::parser::make_number(number);
+}
+
+static bool isOctDigit(int c) {
+	return c >= '0' && c <= '7';
+}
+
+static yy::parser::symbol_type parseOctNumber(char const *prefix) {
+	LexerStackEntry &context = lexerStack.back();
+	int c = context.file.sgetc();
+	if (!isOctDigit(c)) {
+		lexer_Error("No octal digits found after '%s'", prefix);
+		return yy::parser::make_number(0);
+	}
+
+	uint32_t number = c - '0';
+	context.file.sbumpc();
+	for (c = context.file.sgetc(); isOctDigit(c) || c == '_'; c = context.file.sgetc()) {
+		if (c != '_') {
+			number = number * 8 + (c - '0');
+		}
+		context.file.sbumpc();
+	}
+	return yy::parser::make_number(number);
+}
+
+static bool isHexDigit(int c) {
+	return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
+}
+
+static uint8_t parseHexDigit(int c) {
+	if (c >= '0' && c <= '9') {
+		return c - '0';
+	} else if (c >= 'A' && c <= 'F') {
+		return c - 'A' + 10;
+	} else if (c >= 'a' && c <= 'f') {
+		return c - 'a' + 10;
+	} else {
+		unreachable_(); // LCOV_EXCL_LINE
+	}
+}
+
+static yy::parser::symbol_type parseHexNumber(char const *prefix) {
+	LexerStackEntry &context = lexerStack.back();
+	int c = context.file.sgetc();
+	if (!isHexDigit(c)) {
+		lexer_Error("No hexadecimal digits found after '%s'", prefix);
+		return yy::parser::make_number(0);
+	}
+
+	uint32_t number = parseHexDigit(c);
+	context.file.sbumpc();
+	for (c = context.file.sgetc(); isHexDigit(c) || c == '_'; c = context.file.sgetc()) {
+		if (c != '_') {
+			number = number * 16 + parseHexDigit(c);
+		}
+		context.file.sbumpc();
+	}
+	return yy::parser::make_number(number);
+}
+
+static yy::parser::symbol_type parseNumber(int c) {
+	LexerStackEntry &context = lexerStack.back();
+	if (c == '0') {
+		switch (context.file.sgetc()) {
+		case 'x':
+			context.file.sbumpc();
+			return parseHexNumber("0x");
+		case 'X':
+			context.file.sbumpc();
+			return parseHexNumber("0X");
+		case 'o':
+			context.file.sbumpc();
+			return parseOctNumber("0o");
+		case 'O':
+			context.file.sbumpc();
+			return parseOctNumber("0O");
+		case 'b':
+			context.file.sbumpc();
+			return parseBinNumber("0b");
+		case 'B':
+			context.file.sbumpc();
+			return parseBinNumber("0B");
+		}
+	}
+	return parseDecNumber(c);
+}
+
+static yy::parser::symbol_type parseString() {
+	LexerStackEntry &context = lexerStack.back();
+	int c = context.file.sgetc();
+	std::string str;
+	for (; c != '"'; c = context.file.sgetc()) {
+		if (c == EOF || isNewline(c)) {
+			lexer_Error("Unterminated string");
+			break;
+		}
+		context.file.sbumpc();
+		if (c == '\\') {
+			c = context.file.sgetc();
+			if (c == EOF || isNewline(c)) {
+				lexer_Error("Unterminated string");
+				break;
+			} else if (c == 'n') {
+				c = '\n';
+			} else if (c == 'r') {
+				c = '\r';
+			} else if (c == 't') {
+				c = '\t';
+			} else if (c == '0') {
+				c = '\0';
+			} else if (c != '\\' && c != '"' && c != '\'') {
+				lexer_Error("Cannot escape character %s", printChar(c));
+			}
+			context.file.sbumpc();
+		}
+		str.push_back(c);
+	}
+	if (c == '"') {
+		context.file.sbumpc();
+	}
+	return yy::parser::make_string(std::move(str));
+}
+
+struct Keyword {
+	std::string_view name;
+	yy::parser::symbol_type (*tokenGen)();
+};
+
+using namespace std::literals;
+
+static std::array keywords{
+    Keyword{"ORG"sv,      yy::parser::make_ORG     },
+    Keyword{"FLOATING"sv, yy::parser::make_FLOATING},
+    Keyword{"INCLUDE"sv,  yy::parser::make_INCLUDE },
+    Keyword{"ALIGN"sv,    yy::parser::make_ALIGN   },
+    Keyword{"DS"sv,       yy::parser::make_DS      },
+    Keyword{"OPTIONAL"sv, yy::parser::make_OPTIONAL},
+};
+
+yy::parser::symbol_type yylex() {
+	LexerStackEntry &context = lexerStack.back();
+	int c = context.file.sbumpc();
+
+	// First, skip leading whitespace.
+	while (isWhiteSpace(c)) {
+		c = context.file.sbumpc();
+	}
+	// Then, skip a comment if applicable.
+	if (c == ';') {
+		while (c != EOF && !isNewline(c)) {
+			c = context.file.sbumpc();
+		}
+	}
+
+	// Alright, what token should we return?
+	if (c == EOF) {
+		return yywrap();
+	} else if (c == ',') {
+		return yy::parser::make_COMMA();
+	} else if (isNewline(c)) {
+		// Handle CRLF.
+		if (c == '\r' && context.file.sgetc() == '\n') {
+			context.file.sbumpc();
+		}
+		return yy::parser::make_newline();
+	} else if (c == '"') {
+		return parseString();
+	} else if (c == '$') {
+		return parseHexNumber("$");
+	} else if (c == '%') {
+		return parseBinNumber("%");
+	} else if (c == '&') {
+		return parseOctNumber("&");
+	} else if (isDecDigit(c)) {
+		return parseNumber(c);
+	} else if (isIdentChar(c)) { // Note that we match these *after* digit characters!
+		std::string ident = readIdent(c);
+
+		auto strUpperCmp = [](char cmp, char ref) { return toupper(cmp) == ref; };
+
+		for (SectionType type : EnumSeq(SECTTYPE_INVALID)) {
+			if (std::equal(RANGE(ident), RANGE(sectionTypeInfo[type].name), strUpperCmp)) {
+				return yy::parser::make_sect_type(type);
+			}
+		}
+
+		for (Keyword const &keyword : keywords) {
+			if (std::equal(RANGE(ident), RANGE(keyword.name), strUpperCmp)) {
+				return keyword.tokenGen();
+			}
+		}
+
+		lexer_Error("Unknown keyword \"%s\"", ident.c_str());
+		return yylex();
+	} else {
+		lexer_Error("Unexpected character %s", printChar(c));
+		// Keep reading characters until the EOL, to avoid reporting too many errors.
+		for (c = context.file.sgetc(); !isNewline(c); c = context.file.sgetc()) {
+			if (c == EOF) {
+				break;
+			}
+			context.file.sbumpc();
+		}
+		return yylex();
+	}
+	// Not marking as unreachable; this will generate a warning if any codepath forgets to return.
+}
+
+bool lexer_Init(char const *linkerScriptName) {
+	if (LexerStackEntry &newContext = lexerStack.emplace_back(std::string(linkerScriptName));
+	    !newContext.file.open(newContext.path, std::ios_base::in)) {
+		error("Failed to open linker script \"%s\"", linkerScriptName);
+		lexerStack.clear();
+		return false;
+	}
+	return true;
+}
@@ -18,7 +18,7 @@
 #include "version.hpp"

 #include "link/assign.hpp"
-#include "link/layout.hpp"
+#include "link/lexer.hpp"
 #include "link/object.hpp"
 #include "link/output.hpp"
 #include "link/patch.hpp"
@@ -151,7 +151,7 @@ static void parseScrambleSpec(char const *spec) {

 		// If this trips, `spec` must be pointing at a ',' or '=' (or NUL) due to the assumption
 		if (regionNameLen == 0) {
-			argErr('S', "Missing region name");
+			argError('S', "Missing region name");

 			if (*spec == '\0') {
 				break;
@@ -165,7 +165,7 @@ static void parseScrambleSpec(char const *spec) {
 		// Find the next non-blank char after the region name's end
 		spec += regionNameLen + strspn(&spec[regionNameLen], " \t");
 		if (*spec != '\0' && *spec != ',' && *spec != '=') {
-			argErr(
+			argError(
 			    'S',
 			    "Unexpected '%c' after region name \"%.*s\"",
 			    *spec,
@@ -188,7 +188,7 @@ static void parseScrambleSpec(char const *spec) {
 		}

 		if (region == SCRAMBLE_UNK) {
-			argErr('S', "Unknown region \"%.*s\"", regionNameFmtLen, regionName);
+			argError('S', "Unknown region \"%.*s\"", regionNameFmtLen, regionName);
 		}

 		if (*spec == '=') {
@@ -197,13 +197,13 @@ static void parseScrambleSpec(char const *spec) {
 			char *endptr;

 			if (*spec == '\0' || *spec == ',') {
-				argErr('S', "Empty limit for region \"%.*s\"", regionNameFmtLen, regionName);
+				argError('S', "Empty limit for region \"%.*s\"", regionNameFmtLen, regionName);
 				goto next;
 			}
 			limit = strtoul(spec, &endptr, 10);
 			endptr += strspn(endptr, " \t");
 			if (*endptr != '\0' && *endptr != ',') {
-				argErr(
+				argError(
 				    'S',
 				    "Invalid non-numeric limit for region \"%.*s\"",
 				    regionNameFmtLen,
@@ -214,7 +214,7 @@ static void parseScrambleSpec(char const *spec) {
 			spec = endptr;

 			if (region != SCRAMBLE_UNK && limit > scrambleSpecs[region].max) {
-				argErr(
+				argError(
 				    'S',
 				    "Limit for region \"%.*s\" may not exceed %" PRIu16,
 				    regionNameFmtLen,
@@ -241,7 +241,7 @@ static void parseScrambleSpec(char const *spec) {
 			// Only WRAMX can be implied, since ROMX and SRAM size may vary
 			options.scrambleWRAMX = 7;
 		} else {
-			argErr('S', "Cannot imply limit for region \"%.*s\"", regionNameFmtLen, regionName);
+			argError('S', "Cannot imply limit for region \"%.*s\"", regionNameFmtLen, regionName);
 		}

 next: // Can't `continue` a `for` loop with this nontrivial iteration logic
@@ -310,7 +310,7 @@ int main(int argc, char *argv[]) {
 			unsigned long value = strtoul(musl_optarg, &endptr, 0);

 			if (musl_optarg[0] == '\0' || *endptr != '\0' || value > 0xFF) {
-				argErr('p', "Argument for 'p' must be a byte (between 0 and 0xFF)");
+				argError('p', "Argument for 'p' must be a byte (between 0 and 0xFF)");
 				value = 0xFF;
 			}
 			options.padValue = value;
@@ -12,8 +12,8 @@
 }

 %code {
+	#include "link/lexer.hpp"
 	#include "link/layout.hpp"
-	#include "link/warning.hpp"

 	yy::parser::symbol_type yylex(); // Provided by layout.cpp
 }
@@ -110,5 +110,5 @@ optional:
 /******************** Error handler ********************/

 void yy::parser::error(std::string const &msg) {
-	scriptError(lexer_Context(), "%s", msg.c_str());
+	lexer_Error("%s", msg.c_str());
 }
@@ -103,7 +103,7 @@ void errorNoDump(char const *fmt, ...) {
 	warnings.incrementErrors();
 }

-void argErr(char flag, char const *fmt, ...) {
+void argError(char flag, char const *fmt, ...) {
 	va_list args;
 	fprintf(stderr, "error: Invalid argument for option '%c': ", flag);
 	va_start(args, fmt);
@@ -114,6 +114,14 @@ void argErr(char flag, char const *fmt, ...) {
 	warnings.incrementErrors();
 }

+void scriptError(char const *name, uint32_t lineNo, char const *fmt, va_list args) {
+	fprintf(stderr, "error: %s(%" PRIu32 "): ", name, lineNo);
+	vfprintf(stderr, fmt, args);
+	putc('\n', stderr);
+
+	warnings.incrementErrors();
+}
+
 [[noreturn]]
 void fatal(FileStackNode const *src, uint32_t lineNo, char const *fmt, ...) {
 	va_list args;