mirror of
https://github.com/gbdev/rgbds.git
synced 2025-11-20 10:12:06 +00:00
362 lines
9.4 KiB
C++
362 lines
9.4 KiB
C++
// SPDX-License-Identifier: MIT
|
|
|
|
#include "link/lexer.hpp"
|
|
|
|
#include <array>
|
|
#include <ctype.h>
|
|
#include <errno.h>
|
|
#include <fstream>
|
|
#include <inttypes.h>
|
|
#include <stdio.h>
|
|
#include <string_view>
|
|
#include <vector>
|
|
|
|
#include "helpers.hpp"
|
|
#include "itertools.hpp"
|
|
#include "util.hpp"
|
|
|
|
#include "link/warning.hpp"
|
|
// Include this last so it gets all type & constant definitions
|
|
#include "script.hpp" // For token definitions, generated from script.y
|
|
|
|
struct LexerStackEntry {
|
|
std::filebuf file;
|
|
std::string path;
|
|
uint32_t lineNo;
|
|
|
|
explicit LexerStackEntry(std::string &&path_) : file(), path(path_), lineNo(1) {}
|
|
};
|
|
|
|
static std::vector<LexerStackEntry> lexerStack;
|
|
|
|
void lexer_Error(char const *fmt, ...) {
|
|
LexerStackEntry &context = lexerStack.back();
|
|
va_list args;
|
|
va_start(args, fmt);
|
|
scriptError(context.path.c_str(), context.lineNo, fmt, args);
|
|
va_end(args);
|
|
}
|
|
|
|
void lexer_IncludeFile(std::string &&path) {
|
|
// `.emplace_back` can invalidate references to the stack's elements!
|
|
// This is why `newContext` must be gotten before `prevContext`.
|
|
LexerStackEntry &newContext = lexerStack.emplace_back(std::move(path));
|
|
LexerStackEntry &prevContext = lexerStack[lexerStack.size() - 2];
|
|
|
|
if (!newContext.file.open(newContext.path, std::ios_base::in)) {
|
|
// `.pop_back()` will invalidate `newContext`, which is why `path` must be moved first.
|
|
std::string badPath = std::move(newContext.path);
|
|
lexerStack.pop_back();
|
|
// This error will occur in `prevContext`, *before* incrementing the line number!
|
|
lexer_Error(
|
|
"Failed to open included linker script \"%s\": %s", badPath.c_str(), strerror(errno)
|
|
);
|
|
}
|
|
|
|
// `.pop_back()` cannot invalidate an unpopped reference, so `prevContext`
|
|
// is still valid even if `.open()` failed.
|
|
++prevContext.lineNo;
|
|
}
|
|
|
|
void lexer_IncLineNo() {
|
|
++lexerStack.back().lineNo;
|
|
}
|
|
|
|
static bool isWhiteSpace(int c) {
|
|
return c == ' ' || c == '\t';
|
|
}
|
|
|
|
static bool isNewline(int c) {
|
|
return c == '\r' || c == '\n';
|
|
}
|
|
|
|
yy::parser::symbol_type yylex(); // Forward declaration for `yywrap`
|
|
|
|
static yy::parser::symbol_type yywrap() {
|
|
static bool atEof = false;
|
|
if (lexerStack.size() != 1) {
|
|
if (!atEof) {
|
|
// Inject a newline at EOF to simplify parsing.
|
|
atEof = true;
|
|
return yy::parser::make_newline();
|
|
}
|
|
lexerStack.pop_back();
|
|
return yylex();
|
|
}
|
|
if (!atEof) {
|
|
// Inject a newline at EOF to simplify parsing.
|
|
atEof = true;
|
|
return yy::parser::make_newline();
|
|
}
|
|
return yy::parser::make_YYEOF();
|
|
}
|
|
|
|
static bool isIdentChar(int c) {
|
|
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9');
|
|
}
|
|
|
|
static std::string readIdent(int c) {
|
|
LexerStackEntry &context = lexerStack.back();
|
|
std::string ident;
|
|
ident.push_back(c);
|
|
for (c = context.file.sgetc(); isIdentChar(c); c = context.file.snextc()) {
|
|
ident.push_back(c);
|
|
}
|
|
return ident;
|
|
}
|
|
|
|
static bool isDecDigit(int c) {
|
|
return c >= '0' && c <= '9';
|
|
}
|
|
|
|
static yy::parser::symbol_type parseDecNumber(int c) {
|
|
LexerStackEntry &context = lexerStack.back();
|
|
uint32_t number = c - '0';
|
|
for (c = context.file.sgetc(); isDecDigit(c) || c == '_'; c = context.file.sgetc()) {
|
|
if (c != '_') {
|
|
number = number * 10 + (c - '0');
|
|
}
|
|
context.file.sbumpc();
|
|
}
|
|
return yy::parser::make_number(number);
|
|
}
|
|
|
|
static bool isBinDigit(int c) {
|
|
return c >= '0' && c <= '1';
|
|
}
|
|
|
|
static yy::parser::symbol_type parseBinNumber(char const *prefix) {
|
|
LexerStackEntry &context = lexerStack.back();
|
|
int c = context.file.sgetc();
|
|
if (!isBinDigit(c)) {
|
|
lexer_Error("No binary digits found after '%s'", prefix);
|
|
return yy::parser::make_number(0);
|
|
}
|
|
|
|
uint32_t number = c - '0';
|
|
context.file.sbumpc();
|
|
for (c = context.file.sgetc(); isBinDigit(c) || c == '_'; c = context.file.sgetc()) {
|
|
if (c != '_') {
|
|
number = number * 2 + (c - '0');
|
|
}
|
|
context.file.sbumpc();
|
|
}
|
|
return yy::parser::make_number(number);
|
|
}
|
|
|
|
static bool isOctDigit(int c) {
|
|
return c >= '0' && c <= '7';
|
|
}
|
|
|
|
static yy::parser::symbol_type parseOctNumber(char const *prefix) {
|
|
LexerStackEntry &context = lexerStack.back();
|
|
int c = context.file.sgetc();
|
|
if (!isOctDigit(c)) {
|
|
lexer_Error("No octal digits found after '%s'", prefix);
|
|
return yy::parser::make_number(0);
|
|
}
|
|
|
|
uint32_t number = c - '0';
|
|
context.file.sbumpc();
|
|
for (c = context.file.sgetc(); isOctDigit(c) || c == '_'; c = context.file.sgetc()) {
|
|
if (c != '_') {
|
|
number = number * 8 + (c - '0');
|
|
}
|
|
context.file.sbumpc();
|
|
}
|
|
return yy::parser::make_number(number);
|
|
}
|
|
|
|
static bool isHexDigit(int c) {
|
|
return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
|
|
}
|
|
|
|
static uint8_t parseHexDigit(int c) {
|
|
if (c >= '0' && c <= '9') {
|
|
return c - '0';
|
|
} else if (c >= 'A' && c <= 'F') {
|
|
return c - 'A' + 10;
|
|
} else if (c >= 'a' && c <= 'f') {
|
|
return c - 'a' + 10;
|
|
} else {
|
|
unreachable_(); // LCOV_EXCL_LINE
|
|
}
|
|
}
|
|
|
|
static yy::parser::symbol_type parseHexNumber(char const *prefix) {
|
|
LexerStackEntry &context = lexerStack.back();
|
|
int c = context.file.sgetc();
|
|
if (!isHexDigit(c)) {
|
|
lexer_Error("No hexadecimal digits found after '%s'", prefix);
|
|
return yy::parser::make_number(0);
|
|
}
|
|
|
|
uint32_t number = parseHexDigit(c);
|
|
context.file.sbumpc();
|
|
for (c = context.file.sgetc(); isHexDigit(c) || c == '_'; c = context.file.sgetc()) {
|
|
if (c != '_') {
|
|
number = number * 16 + parseHexDigit(c);
|
|
}
|
|
context.file.sbumpc();
|
|
}
|
|
return yy::parser::make_number(number);
|
|
}
|
|
|
|
static yy::parser::symbol_type parseNumber(int c) {
|
|
LexerStackEntry &context = lexerStack.back();
|
|
if (c == '0') {
|
|
switch (context.file.sgetc()) {
|
|
case 'x':
|
|
context.file.sbumpc();
|
|
return parseHexNumber("0x");
|
|
case 'X':
|
|
context.file.sbumpc();
|
|
return parseHexNumber("0X");
|
|
case 'o':
|
|
context.file.sbumpc();
|
|
return parseOctNumber("0o");
|
|
case 'O':
|
|
context.file.sbumpc();
|
|
return parseOctNumber("0O");
|
|
case 'b':
|
|
context.file.sbumpc();
|
|
return parseBinNumber("0b");
|
|
case 'B':
|
|
context.file.sbumpc();
|
|
return parseBinNumber("0B");
|
|
}
|
|
}
|
|
return parseDecNumber(c);
|
|
}
|
|
|
|
static yy::parser::symbol_type parseString() {
|
|
LexerStackEntry &context = lexerStack.back();
|
|
int c = context.file.sgetc();
|
|
std::string str;
|
|
for (; c != '"'; c = context.file.sgetc()) {
|
|
if (c == EOF || isNewline(c)) {
|
|
lexer_Error("Unterminated string");
|
|
break;
|
|
}
|
|
context.file.sbumpc();
|
|
if (c == '\\') {
|
|
c = context.file.sgetc();
|
|
if (c == EOF || isNewline(c)) {
|
|
lexer_Error("Unterminated string");
|
|
break;
|
|
} else if (c == 'n') {
|
|
c = '\n';
|
|
} else if (c == 'r') {
|
|
c = '\r';
|
|
} else if (c == 't') {
|
|
c = '\t';
|
|
} else if (c == '0') {
|
|
c = '\0';
|
|
} else if (c != '\\' && c != '"' && c != '\'') {
|
|
lexer_Error("Cannot escape character %s", printChar(c));
|
|
}
|
|
context.file.sbumpc();
|
|
}
|
|
str.push_back(c);
|
|
}
|
|
if (c == '"') {
|
|
context.file.sbumpc();
|
|
}
|
|
return yy::parser::make_string(std::move(str));
|
|
}
|
|
|
|
struct Keyword {
|
|
std::string_view name;
|
|
yy::parser::symbol_type (*tokenGen)();
|
|
};
|
|
|
|
using namespace std::literals;
|
|
|
|
static std::array keywords{
|
|
Keyword{"ORG"sv, yy::parser::make_ORG },
|
|
Keyword{"FLOATING"sv, yy::parser::make_FLOATING},
|
|
Keyword{"INCLUDE"sv, yy::parser::make_INCLUDE },
|
|
Keyword{"ALIGN"sv, yy::parser::make_ALIGN },
|
|
Keyword{"DS"sv, yy::parser::make_DS },
|
|
Keyword{"OPTIONAL"sv, yy::parser::make_OPTIONAL},
|
|
};
|
|
|
|
yy::parser::symbol_type yylex() {
|
|
LexerStackEntry &context = lexerStack.back();
|
|
int c = context.file.sbumpc();
|
|
|
|
// First, skip leading whitespace.
|
|
while (isWhiteSpace(c)) {
|
|
c = context.file.sbumpc();
|
|
}
|
|
// Then, skip a comment if applicable.
|
|
if (c == ';') {
|
|
while (c != EOF && !isNewline(c)) {
|
|
c = context.file.sbumpc();
|
|
}
|
|
}
|
|
|
|
// Alright, what token should we return?
|
|
if (c == EOF) {
|
|
return yywrap();
|
|
} else if (c == ',') {
|
|
return yy::parser::make_COMMA();
|
|
} else if (isNewline(c)) {
|
|
// Handle CRLF.
|
|
if (c == '\r' && context.file.sgetc() == '\n') {
|
|
context.file.sbumpc();
|
|
}
|
|
return yy::parser::make_newline();
|
|
} else if (c == '"') {
|
|
return parseString();
|
|
} else if (c == '$') {
|
|
return parseHexNumber("$");
|
|
} else if (c == '%') {
|
|
return parseBinNumber("%");
|
|
} else if (c == '&') {
|
|
return parseOctNumber("&");
|
|
} else if (isDecDigit(c)) {
|
|
return parseNumber(c);
|
|
} else if (isIdentChar(c)) { // Note that we match these *after* digit characters!
|
|
std::string ident = readIdent(c);
|
|
|
|
auto strUpperCmp = [](char cmp, char ref) { return toupper(cmp) == ref; };
|
|
|
|
for (SectionType type : EnumSeq(SECTTYPE_INVALID)) {
|
|
if (std::equal(RANGE(ident), RANGE(sectionTypeInfo[type].name), strUpperCmp)) {
|
|
return yy::parser::make_sect_type(type);
|
|
}
|
|
}
|
|
|
|
for (Keyword const &keyword : keywords) {
|
|
if (std::equal(RANGE(ident), RANGE(keyword.name), strUpperCmp)) {
|
|
return keyword.tokenGen();
|
|
}
|
|
}
|
|
|
|
lexer_Error("Unknown keyword \"%s\"", ident.c_str());
|
|
return yylex();
|
|
} else {
|
|
lexer_Error("Unexpected character %s", printChar(c));
|
|
// Keep reading characters until the EOL, to avoid reporting too many errors.
|
|
for (c = context.file.sgetc(); !isNewline(c); c = context.file.sgetc()) {
|
|
if (c == EOF) {
|
|
break;
|
|
}
|
|
context.file.sbumpc();
|
|
}
|
|
return yylex();
|
|
}
|
|
// Not marking as unreachable; this will generate a warning if any codepath forgets to return.
|
|
}
|
|
|
|
bool lexer_Init(char const *linkerScriptName) {
|
|
if (LexerStackEntry &newContext = lexerStack.emplace_back(std::string(linkerScriptName));
|
|
!newContext.file.open(newContext.path, std::ios_base::in)) {
|
|
error("Failed to open linker script \"%s\"", linkerScriptName);
|
|
lexerStack.clear();
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|