Support more syntax in linkerscripts (#1752)

* No need to use `locale`s

* Implement octal numbers, `_` digit separators, and `0x/0b/0o` prefixes in linkerscripts

* Refactor some functions out of `yylex`

* Support `\0` in linkerscripts
This commit is contained in:
Rangi
2025-07-16 15:00:02 -04:00
committed by GitHub
parent cf6e5fec63
commit 7f24d46d44
7 changed files with 209 additions and 109 deletions

View File

@@ -24,18 +24,20 @@ They are simply ignored.
.Pp
Keywords are composed of letters and digits (but they can't start with a digit); they are all case-insensitive.
.Pp
Numbers can be written in decimal format, or in binary using the
.Ql %
prefix, or in hexadecimal using the
.Ql $
prefix (hexadecimal digits are case-insensitive).
Note that unlike
.Xr rgbasm 5 ,
an octal
.Ql &
prefix is not supported, nor are
.Ql _
digit separators.
Numbers can be written in a number of formats.
.Bl -column -offset indent "Hexadecimal" "Possible prefixes"
.It Sy Format type Ta Sy Possible prefixes Ta Sy Accepted characters
.It Decimal Ta none Ta 0123456789
.It Hexadecimal Ta Li $ , 0x , 0X Ta 0123456789ABCDEF
.It Octal Ta Li & , 0o , 0O Ta 01234567
.It Binary Ta Li % , 0b , 0B Ta 01
.El
.Pp
Underscores are also accepted in numbers, except at the beginning of one.
This can be useful for grouping digits, like
.Ql 1_234
or
.Ql $ff_80 .
.Pp
Strings begin with a double quote, and end at the next (non-escaped) double quote.
Strings must not contain literal newline characters.
@@ -46,8 +48,9 @@ are supported, specifically
.Ql \e" ,
.Ql \en ,
.Ql \er ,
.Ql \et ,
and
.Ql \et .
.Ql \e0 .
Other backslash escape sequences in
.Xr rgbasm 5
are only relevant to assembly code and do not apply in linker scripts.

View File

@@ -17,9 +17,9 @@
#include <algorithm>
#include <array>
#include <bit>
#include <ctype.h>
#include <fstream>
#include <inttypes.h>
#include <locale>
#include <stdio.h>
#include <string_view>
#include <vector>
@@ -209,18 +209,100 @@ static bool isNewline(int c) {
return c == '\r' || c == '\n';
}
static yy::parser::symbol_type yywrap() {
if (lexerStack.size() != 1) {
if (!atEof) {
// Inject a newline at EOF to simplify parsing.
atEof = true;
return yy::parser::make_newline();
}
lexerStack.pop_back();
return yylex();
}
if (!atEof) {
// Inject a newline at EOF to simplify parsing.
atEof = true;
return yy::parser::make_newline();
}
return yy::parser::make_YYEOF();
}
static bool isIdentChar(int c) {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9');
}
static std::string readIdent(int c) {
auto &context = lexerStack.back();
std::string ident;
ident.push_back(c);
for (c = context.file.sgetc(); isIdentChar(c); c = context.file.snextc()) {
ident.push_back(c);
}
return ident;
}
static bool isDecDigit(int c) {
return c >= '0' && c <= '9';
}
static yy::parser::symbol_type parseDecNumber(int c) {
auto &context = lexerStack.back();
uint32_t number = c - '0';
for (c = context.file.sgetc(); isDecDigit(c) || c == '_'; c = context.file.sgetc()) {
if (c != '_') {
number = number * 10 + (c - '0');
}
context.file.sbumpc();
}
return yy::parser::make_number(number);
}
static bool isBinDigit(int c) {
return c >= '0' && c <= '1';
}
static yy::parser::symbol_type parseBinNumber(char const *prefix) {
auto &context = lexerStack.back();
auto c = context.file.sgetc();
if (!isBinDigit(c)) {
scriptError(context, "No binary digits found after '%s'", prefix);
return yy::parser::make_number(0);
}
uint32_t number = c - '0';
context.file.sbumpc();
for (c = context.file.sgetc(); isBinDigit(c) || c == '_'; c = context.file.sgetc()) {
if (c != '_') {
number = number * 2 + (c - '0');
}
context.file.sbumpc();
}
return yy::parser::make_number(number);
}
static bool isOctDigit(int c) {
return c >= '0' && c <= '7';
}
static yy::parser::symbol_type parseOctNumber(char const *prefix) {
auto &context = lexerStack.back();
auto c = context.file.sgetc();
if (!isOctDigit(c)) {
scriptError(context, "No octal digits found after '%s'", prefix);
return yy::parser::make_number(0);
}
uint32_t number = c - '0';
context.file.sbumpc();
for (c = context.file.sgetc(); isOctDigit(c) || c == '_'; c = context.file.sgetc()) {
if (c != '_') {
number = number * 8 + (c - '0');
}
context.file.sbumpc();
}
return yy::parser::make_number(number);
}
static bool isHexDigit(int c) {
return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
}
@@ -237,6 +319,88 @@ static uint8_t parseHexDigit(int c) {
}
}
static yy::parser::symbol_type parseHexNumber(char const *prefix) {
auto &context = lexerStack.back();
auto c = context.file.sgetc();
if (!isHexDigit(c)) {
scriptError(context, "No hexadecimal digits found after '%s'", prefix);
return yy::parser::make_number(0);
}
uint32_t number = parseHexDigit(c);
context.file.sbumpc();
for (c = context.file.sgetc(); isHexDigit(c) || c == '_'; c = context.file.sgetc()) {
if (c != '_') {
number = number * 16 + parseHexDigit(c);
}
context.file.sbumpc();
}
return yy::parser::make_number(number);
}
static yy::parser::symbol_type parseNumber(int c) {
auto &context = lexerStack.back();
if (c == '0') {
switch (context.file.sgetc()) {
case 'x':
context.file.sbumpc();
return parseHexNumber("0x");
case 'X':
context.file.sbumpc();
return parseHexNumber("0X");
case 'o':
context.file.sbumpc();
return parseOctNumber("0o");
case 'O':
context.file.sbumpc();
return parseOctNumber("0O");
case 'b':
context.file.sbumpc();
return parseBinNumber("0b");
case 'B':
context.file.sbumpc();
return parseBinNumber("0B");
}
}
return parseDecNumber(c);
}
static yy::parser::symbol_type parseString() {
auto &context = lexerStack.back();
auto c = context.file.sgetc();
std::string str;
for (; c != '"'; c = context.file.sgetc()) {
if (c == EOF || isNewline(c)) {
scriptError(context, "Unterminated string");
break;
}
context.file.sbumpc();
if (c == '\\') {
c = context.file.sgetc();
if (c == EOF || isNewline(c)) {
scriptError(context, "Unterminated string");
break;
} else if (c == 'n') {
c = '\n';
} else if (c == 'r') {
c = '\r';
} else if (c == 't') {
c = '\t';
} else if (c == '0') {
c = '\0';
} else if (c != '\\' && c != '"' && c != '\'') {
scriptError(context, "Cannot escape character %s", printChar(c));
}
context.file.sbumpc();
}
str.push_back(c);
}
if (c == '"') {
context.file.sbumpc();
}
return yy::parser::make_string(std::move(str));
}
yy::parser::symbol_type yylex() {
auto &context = lexerStack.back();
auto c = context.file.sbumpc();
@@ -254,23 +418,7 @@ yy::parser::symbol_type yylex() {
// Alright, what token should we return?
if (c == EOF) {
// Basically yywrap().
if (lexerStack.size() != 1) {
if (!atEof) {
// Inject a newline at EOF to simplify parsing.
atEof = true;
return yy::parser::make_newline();
} else {
lexerStack.pop_back();
return yylex();
}
} else if (!atEof) {
// Inject a newline at EOF to simplify parsing.
atEof = true;
return yy::parser::make_newline();
} else {
return yy::parser::make_YYEOF();
}
return yywrap();
} else if (c == ',') {
return yy::parser::make_COMMA();
} else if (isNewline(c)) {
@@ -280,85 +428,21 @@ yy::parser::symbol_type yylex() {
}
return yy::parser::make_newline();
} else if (c == '"') {
std::string str;
for (c = context.file.sgetc(); c != '"'; c = context.file.sgetc()) {
if (c == EOF || isNewline(c)) {
scriptError(context, "Unterminated string");
break;
}
context.file.sbumpc();
if (c == '\\') {
c = context.file.sgetc();
if (c == EOF || isNewline(c)) {
scriptError(context, "Unterminated string");
break;
} else if (c == 'n') {
c = '\n';
} else if (c == 'r') {
c = '\r';
} else if (c == 't') {
c = '\t';
} else if (c != '\\' && c != '"' && c != '\'') {
scriptError(context, "Cannot escape character %s", printChar(c));
}
context.file.sbumpc();
}
str.push_back(c);
}
if (c == '"') {
context.file.sbumpc();
}
return yy::parser::make_string(std::move(str));
return parseString();
} else if (c == '$') {
c = context.file.sgetc();
if (!isHexDigit(c)) {
scriptError(context, "No hexadecimal digits found after '$'");
return yy::parser::make_number(0);
}
uint32_t number = parseHexDigit(c);
context.file.sbumpc();
for (c = context.file.sgetc(); isHexDigit(c); c = context.file.sgetc()) {
number = number * 16 + parseHexDigit(c);
context.file.sbumpc();
}
return yy::parser::make_number(number);
return parseHexNumber("$");
} else if (c == '%') {
c = context.file.sgetc();
if (!isBinDigit(c)) {
scriptError(context, "No binary digits found after '%%'");
return yy::parser::make_number(0);
}
uint32_t number = c - '0';
context.file.sbumpc();
for (c = context.file.sgetc(); isBinDigit(c); c = context.file.sgetc()) {
number = number * 2 + (c - '0');
context.file.sbumpc();
}
return yy::parser::make_number(number);
return parseBinNumber("%");
} else if (c == '&') {
return parseOctNumber("&");
} else if (isDecDigit(c)) {
uint32_t number = c - '0';
for (c = context.file.sgetc(); isDecDigit(c); c = context.file.sgetc()) {
number = number * 10 + (c - '0');
context.file.sbumpc();
}
return yy::parser::make_number(number);
return parseNumber(c);
} else if (isIdentChar(c)) { // Note that we match these *after* digit characters!
std::string ident;
auto strUpperCmp = [](char cmp, char ref) {
// `locale::classic()` yields the "C" locale.
assume(!std::use_facet<std::ctype<char>>(std::locale::classic())
.is(std::ctype_base::lower, ref));
return std::use_facet<std::ctype<char>>(std::locale::classic()).toupper(cmp) == ref;
};
std::string ident = readIdent(c);
ident.push_back(c);
for (c = context.file.sgetc(); isIdentChar(c); c = context.file.snextc()) {
ident.push_back(c);
}
auto strUpperCmp = [](char cmp, char ref) {
return toupper(cmp) == ref;
};
for (SectionType type : EnumSeq(SECTTYPE_INVALID)) {
if (std::equal(RANGE(ident), RANGE(sectionTypeInfo[type].name), strUpperCmp)) {

View File

@@ -1,5 +1,9 @@
ROM0
org 42
org %101010
org $2A
org 4_2
org %10_10_10
org &52_
org $2A_
org 0b101_010
org 0o5_2
org 0x2_A
org 41 ; Error!

View File

@@ -1,2 +1,2 @@
error: script-num-fmt.link(5): Cannot decrease the current address (from $002a to $0029)
error: script-num-fmt.link(9): Cannot decrease the current address (from $002a to $0029)
Linking failed with 1 error

View File

@@ -0,0 +1,8 @@
ROM0
"ROM0"
"\\\"\'\n\r\t\0"
ROMX 1
"ROM1"
ROMX 2
"ROM2 1K"
"ROM2 1"

View File

View File

@@ -7,3 +7,4 @@ SECTION "ROM2 1K", ROMX,BANK[2]
ds $1000
SECTION "ROM2 1", ROMX,BANK[2]
ds 1
SECTION "\\\"\'\n\r\t\0", ROM0