From b438c83bda55fc5505e50f2531c52e695ce86e32 Mon Sep 17 00:00:00 2001 From: Sylvie <35663410+Rangi42@users.noreply.github.com> Date: Wed, 21 Aug 2024 13:31:44 -0400 Subject: [PATCH] Implement a '#' prefix for raw identifiers that may alias keywords (#1480) * Implement a '#' prefix for raw identifiers that may alias keywords * Review comments * Disallow hashless raw identifiers in interpolations * Run clang-format --- man/rgbasm.5 | 15 ++++++++- src/asm/lexer.cpp | 52 +++++++++++++++++++++++------- test/asm/empty-raw-identifier.asm | 5 +++ test/asm/empty-raw-identifier.err | 5 +++ test/asm/empty-raw-identifier.out | 2 ++ test/asm/interpolation.asm | 3 ++ test/asm/interpolation.err | 4 ++- test/asm/interpolation.out | 1 + test/asm/raw-identifiers.asm | 36 +++++++++++++++++++++ test/asm/raw-identifiers.out | 4 +++ test/asm/raw-identifiers.out.bin | Bin 0 -> 4 bytes 11 files changed, 114 insertions(+), 13 deletions(-) create mode 100644 test/asm/empty-raw-identifier.asm create mode 100644 test/asm/empty-raw-identifier.err create mode 100644 test/asm/empty-raw-identifier.out create mode 100644 test/asm/raw-identifiers.asm create mode 100644 test/asm/raw-identifiers.out create mode 100644 test/asm/raw-identifiers.out.bin diff --git a/man/rgbasm.5 b/man/rgbasm.5 index 2755d14f..02678262 100644 --- a/man/rgbasm.5 +++ b/man/rgbasm.5 @@ -1068,7 +1068,20 @@ Additionally, label names can contain up to a single dot .Ql \&. , which may not be the first character. .Pp -A symbol cannot have the same name as a reserved keyword. +A symbol cannot have the same name as a reserved keyword, unless it is prefixed by a hash +.Sq # . +For example, +.Ql #load +denotes a symbol named +.Ql load , +and +.Ql #LOAD +denotes a different symbol named +.Ql LOAD ; +in both cases the +.Sq # +prevents them from being treated as the keyword +.Ic LOAD . .Ss Labels One of the assembler's main tasks is to keep track of addresses for you, so you can work with meaningful names instead of .Dq magic diff --git a/src/asm/lexer.cpp b/src/asm/lexer.cpp index e1be3a7e..1f6bd20a 100644 --- a/src/asm/lexer.cpp +++ b/src/asm/lexer.cpp @@ -596,7 +596,16 @@ static uint32_t readBracketedMacroArgNum() { if (c >= '0' && c <= '9') { num = readNumber(10, 0); - } else if (startsIdentifier(c)) { + } else if (startsIdentifier(c) || c == '#') { + if (c == '#') { + shiftChar(); + c = peek(); + if (!startsIdentifier(c)) { + error("Empty raw symbol in bracketed macro argument\n"); + return 0; + } + } + std::string symName; for (; continuesIdentifier(c); c = peek()) { @@ -1138,8 +1147,7 @@ static bool continuesIdentifier(int c) { return startsIdentifier(c) || (c <= '9' && c >= '0') || c == '#' || c == '@'; } -static Token readIdentifier(char firstChar) { - // Lex while checking for a keyword +static Token readIdentifier(char firstChar, bool raw) { std::string identifier(1, firstChar); int tokenType = firstChar == '.' ? T_(LOCAL_ID) : T_(ID); @@ -1155,9 +1163,13 @@ static Token readIdentifier(char firstChar) { tokenType = T_(LOCAL_ID); } - // Attempt to check for a keyword - auto search = keywordDict.find(identifier.c_str()); - return search != keywordDict.end() ? Token(search->second) : Token(tokenType, identifier); + // Attempt to check for a keyword if the identifier is not raw + if (!raw) { + if (auto search = keywordDict.find(identifier.c_str()); search != keywordDict.end()) + return Token(search->second); + } + + return Token(tokenType, identifier); } // Functions to read strings @@ -1207,6 +1219,19 @@ static std::shared_ptr readInterpolation(size_t depth) { // Don't return before `lexerState->disableInterpolation` is reset! lexerState->disableInterpolation = disableInterpolation; + if (fmtBuf.starts_with('#')) { + // Skip a '#' raw identifier prefix, but after expanding any nested interpolations. + fmtBuf.erase(0, 1); + } else if (keywordDict.find(fmtBuf.c_str()) != keywordDict.end()) { + // Don't allow symbols that alias keywords without a '#' prefix. + error( + "Interpolated symbol \"%s\" is a reserved keyword; add a '#' prefix to use it as a raw " + "symbol\n", + fmtBuf.c_str() + ); + return nullptr; + } + Symbol const *sym = sym_FindScopedValidSymbol(fmtBuf); if (!sym || !sym->isDefined()) { @@ -1781,8 +1806,13 @@ static Token yylex_NORMAL() { // Handle identifiers... or report garbage characters default: + bool raw = c == '#'; + if (raw && startsIdentifier(peek())) { + c = nextChar(); + } + if (startsIdentifier(c)) { - Token token = readIdentifier(c); + Token token = readIdentifier(c, raw); // An ELIF after a taken IF needs to not evaluate its condition if (token.type == T_(POP_ELIF) && lexerState->lastToken == T_(NEWLINE) @@ -2017,7 +2047,7 @@ static Token skipIfBlock(bool toEndc) { if (startsIdentifier(c)) { shiftChar(); - switch (Token token = readIdentifier(c); token.type) { + switch (Token token = readIdentifier(c, false); token.type) { case T_(POP_IF): lexer_IncIFDepth(); break; @@ -2103,7 +2133,7 @@ static Token yylex_SKIP_TO_ENDR() { if (startsIdentifier(c)) { shiftChar(); - switch (readIdentifier(c).type) { + switch (readIdentifier(c, false).type) { case T_(POP_FOR): case T_(POP_REPT): depth++; @@ -2250,7 +2280,7 @@ Capture lexer_CaptureRept() { } while (isWhitespace(c)); // Now, try to match `REPT`, `FOR` or `ENDR` as a **whole** identifier if (startsIdentifier(c)) { - switch (readIdentifier(c).type) { + switch (readIdentifier(c, false).type) { case T_(POP_REPT): case T_(POP_FOR): depth++; @@ -2303,7 +2333,7 @@ Capture lexer_CaptureMacro() { } while (isWhitespace(c)); // Now, try to match `ENDM` as a **whole** identifier if (startsIdentifier(c)) { - switch (readIdentifier(c).type) { + switch (readIdentifier(c, false).type) { case T_(POP_ENDM): endCapture(capture); // The ENDM has been captured, but we don't want it! diff --git a/test/asm/empty-raw-identifier.asm b/test/asm/empty-raw-identifier.asm new file mode 100644 index 00000000..b1998924 --- /dev/null +++ b/test/asm/empty-raw-identifier.asm @@ -0,0 +1,5 @@ +MACRO #macro + println "all args: \#" + println "bad args: \, \<#>" +ENDM + #macro a, #b, c, 1, #2, 3 diff --git a/test/asm/empty-raw-identifier.err b/test/asm/empty-raw-identifier.err new file mode 100644 index 00000000..ad10640c --- /dev/null +++ b/test/asm/empty-raw-identifier.err @@ -0,0 +1,5 @@ +error: empty-raw-identifier.asm(5) -> empty-raw-identifier.asm::macro(3): + Invalid character in bracketed macro argument '?' +error: empty-raw-identifier.asm(5) -> empty-raw-identifier.asm::macro(3): + Empty raw symbol in bracketed macro argument +error: Assembly aborted (2 errors)! diff --git a/test/asm/empty-raw-identifier.out b/test/asm/empty-raw-identifier.out new file mode 100644 index 00000000..bd8297be --- /dev/null +++ b/test/asm/empty-raw-identifier.out @@ -0,0 +1,2 @@ +all args: a,#b,c,1,#2,3 +bad args: >, > diff --git a/test/asm/interpolation.asm b/test/asm/interpolation.asm index 90cda992..fc21db07 100644 --- a/test/asm/interpolation.asm +++ b/test/asm/interpolation.asm @@ -27,3 +27,6 @@ PRINTLN "label {label}" MACRO foo ENDM PRINTLN "foo {foo}" + +; hashless keyword +PRINTLN "xor {xor}" diff --git a/test/asm/interpolation.err b/test/asm/interpolation.err index 6622a426..06ce79ba 100644 --- a/test/asm/interpolation.err +++ b/test/asm/interpolation.err @@ -4,4 +4,6 @@ error: interpolation.asm(22): Interpolated symbol "label" does not exist error: interpolation.asm(29): Interpolated symbol "foo" is not a numeric or string symbol -error: Assembly aborted (3 errors)! +error: interpolation.asm(32): + Interpolated symbol "xor" is a reserved keyword; add a '#' prefix to use it as a raw symbol +error: Assembly aborted (4 errors)! diff --git a/test/asm/interpolation.out b/test/asm/interpolation.out index 32890c61..c744f68d 100644 --- a/test/asm/interpolation.out +++ b/test/asm/interpolation.out @@ -3,3 +3,4 @@ undef label label $7E foo +xor diff --git a/test/asm/raw-identifiers.asm b/test/asm/raw-identifiers.asm new file mode 100644 index 00000000..017fc561 --- /dev/null +++ b/test/asm/raw-identifiers.asm @@ -0,0 +1,36 @@ +def #DEF equ 1 +def #def equ 2 +def #ghi equ 3 +export #def, #ghi + +def #align = 0 +def #rb rb #def + +MACRO #macro + println "\<#def> is not \<#DEF>" +ENDM + #macro first, second + purge #macro + assert !def(#macro) + +section "section", rom0 +#section:: + dw #section +#.rom0: + db BANK(#section.rom0) +#section.romx: + println "section.romx is in ", SECTION(.romx) + +def #sub equs "def" +{#sub} #add equs "#" + +for #for, {{#add}{#sub}} + println "for == ", #for +endr + assert #for == 2 + assert !{#sub}(#FOR) + + newcharmap #charmap, #main + charmap "#", $42 + setcharmap #charmap + db "#" diff --git a/test/asm/raw-identifiers.out b/test/asm/raw-identifiers.out new file mode 100644 index 00000000..420e54d3 --- /dev/null +++ b/test/asm/raw-identifiers.out @@ -0,0 +1,4 @@ +second is not first +section.romx is in section +for == $0 +for == $1 diff --git a/test/asm/raw-identifiers.out.bin b/test/asm/raw-identifiers.out.bin new file mode 100644 index 0000000000000000000000000000000000000000..041f2e07ab59216242655117d1507d41fba46b10 GIT binary patch literal 4 LcmZQzU~mEe07d{q literal 0 HcmV?d00001