From b2e865ee2a804a6fd5334b36abd484dbb844d3a4 Mon Sep 17 00:00:00 2001 From: Rangi <35663410+Rangi42@users.noreply.github.com> Date: Sat, 15 Feb 2025 10:44:51 +0100 Subject: [PATCH] Disable `EQUS` expansion for raw symbols (by parsing them as strings) (#1648) --- man/rgbasm.5 | 10 ++- src/asm/lexer.cpp | 4 +- src/asm/parser.y | 111 +++++++++++++++++++++++--- test/asm/raw-string-symbol-errors.asm | 29 +++++++ test/asm/raw-string-symbol-errors.err | 27 +++++++ test/asm/raw-string-symbols.asm | 34 ++++++++ test/asm/raw-string-symbols.out | 8 ++ test/asm/raw-string-symbols.out.bin | Bin 0 -> 19 bytes 8 files changed, 206 insertions(+), 17 deletions(-) create mode 100644 test/asm/raw-string-symbol-errors.asm create mode 100644 test/asm/raw-string-symbol-errors.err create mode 100644 test/asm/raw-string-symbols.asm create mode 100644 test/asm/raw-string-symbols.out create mode 100644 test/asm/raw-string-symbols.out.bin diff --git a/man/rgbasm.5 b/man/rgbasm.5 index b025011e..9be4296f 100644 --- a/man/rgbasm.5 +++ b/man/rgbasm.5 @@ -1125,7 +1125,9 @@ Additionally, label names can contain up to a single dot .Ql \&. , which may not be the first character. .Pp -A symbol cannot have the same name as a reserved keyword, unless it is prefixed by a hash +A symbol cannot have the same name as a reserved keyword, unless its name is a +.Dq raw identifier +prefixed by a hash .Sq # . For example, .Ql #load @@ -1300,7 +1302,7 @@ it at the same time. below). .Ss Numeric constants .Ic EQU -is used to define immutable numeric symbols. +is used to define numeric constant symbols. Unlike .Sq = above, constants defined this way cannot be redefined. @@ -1408,6 +1410,8 @@ This expansion is disabled in a few contexts: and .Ql MACRO name will not expand string constants in their names. +Expansion is also disabled if the string constant's name is a raw identifier prefixed by a hash +.Sq # . .Bd -literal -offset indent DEF COUNTREG EQUS "[hl+]" ld a, COUNTREG @@ -1873,7 +1877,7 @@ being the second, and so on. Since there are only nine digits, you can only use To use the rest, you put the argument number in angle brackets, like .Ic \e<10> . .Pp -This bracketed syntax supports decimal numbers and numeric constant symbols. +This bracketed syntax supports decimal numbers and numeric symbols. For example, .Ql \e<_NARG> will get the last argument. diff --git a/src/asm/lexer.cpp b/src/asm/lexer.cpp index 46f3419a..d981c596 100644 --- a/src/asm/lexer.cpp +++ b/src/asm/lexer.cpp @@ -1915,8 +1915,8 @@ static Token yylex_NORMAL() { // `token` is either a `SYMBOL` or a `LOCAL`, and both have a `std::string` value. assume(token.value.holds()); - // Local symbols cannot be string expansions - if (token.type == T_(SYMBOL) && lexerState->expandStrings) { + // Raw symbols and local symbols cannot be string expansions + if (!raw && token.type == T_(SYMBOL) && lexerState->expandStrings) { // Attempt string expansion Symbol const *sym = sym_FindExactSymbol(token.value.get()); diff --git a/src/asm/parser.y b/src/asm/parser.y index b18fea29..d051685e 100644 --- a/src/asm/parser.y +++ b/src/asm/parser.y @@ -80,6 +80,17 @@ static void failAssert(AssertionType type); static void failAssertMsg(AssertionType type, std::string const &message); + template + static auto handleSymbolByType(std::string const &symName, N numCallback, S strCallback) { + if (Symbol *sym = sym_FindScopedSymbol(symName); sym && sym->type == SYM_EQUS) { + return strCallback(*sym->getEqus()); + } else { + Expression expr; + expr.makeSymbol(symName); + return numCallback(expr); + } + } + // The CPU encodes instructions in a logical way, so most instructions actually follow patterns. // These enums thus help with bit twiddling to compute opcodes. enum { REG_B, REG_C, REG_D, REG_E, REG_H, REG_L, REG_HL_IND, REG_A }; @@ -343,6 +354,7 @@ // Strings %type string +%type string_literal %type strcat_args // Strings used for identifiers %type def_id @@ -1210,10 +1222,17 @@ print_expr: relocexpr_no_str { printf("$%" PRIX32, $1.getConstVal()); } - | string { + | string_literal { // Allow printing NUL characters fwrite($1.data(), 1, $1.length(), stdout); } + | scoped_sym { + handleSymbolByType( + $1, + [](Expression const &expr) { printf("$%" PRIX32, expr.getConstVal()); }, + [](std::string const &str) { fwrite(str.data(), 1, str.length(), stdout); } + ); + } ; reloc_3bit: @@ -1233,10 +1252,23 @@ constlist_8bit_entry: $1.checkNBit(8); sect_RelByte($1, 0); } - | string { + | string_literal { std::vector output = charmap_Convert($1); sect_ByteString(output); } + | scoped_sym { + handleSymbolByType( + $1, + [](Expression const &expr) { + expr.checkNBit(8); + sect_RelByte(expr, 0); + }, + [](std::string const &str) { + std::vector output = charmap_Convert(str); + sect_ByteString(output); + } + ); + } ; constlist_16bit: @@ -1249,10 +1281,23 @@ constlist_16bit_entry: $1.checkNBit(16); sect_RelWord($1, 0); } - | string { + | string_literal { std::vector output = charmap_Convert($1); sect_WordString(output); } + | scoped_sym { + handleSymbolByType( + $1, + [](Expression const &expr) { + expr.checkNBit(16); + sect_RelWord(expr, 0); + }, + [](std::string const &str) { + std::vector output = charmap_Convert(str); + sect_WordString(output); + } + ); + } ; constlist_32bit: @@ -1264,10 +1309,20 @@ constlist_32bit_entry: relocexpr_no_str { sect_RelLong($1, 0); } - | string { + | string_literal { std::vector output = charmap_Convert($1); sect_LongString(output); } + | scoped_sym { + handleSymbolByType( + $1, + [](Expression const &expr) { sect_RelLong(expr, 0); }, + [](std::string const &str) { + std::vector output = charmap_Convert(str); + sect_LongString(output); + } + ); + } ; reloc_8bit: @@ -1299,17 +1354,26 @@ relocexpr: relocexpr_no_str { $$ = std::move($1); } - | string { + | string_literal { std::vector output = charmap_Convert($1); $$.makeNumber(strToNum(output)); } + | scoped_sym { + $$ = handleSymbolByType( + $1, + [](Expression const &expr) { return expr; }, + [](std::string const &str) { + std::vector output = charmap_Convert(str); + Expression expr; + expr.makeNumber(strToNum(output)); + return expr; + } + ); + } ; relocexpr_no_str: - scoped_sym { - $$.makeSymbol($1); - } - | NUMBER { + NUMBER { $$.makeNumber($1); } | OP_LOGICNOT relocexpr %prec NEG { @@ -1403,7 +1467,7 @@ relocexpr_no_str: // '@' is also a SYMBOL; it is handled here $$.makeBankSymbol($3); } - | OP_BANK LPAREN string RPAREN { + | OP_BANK LPAREN string_literal RPAREN { $$.makeBankSection($3); } | OP_SIZEOF LPAREN string RPAREN { @@ -1540,7 +1604,7 @@ precision_arg: } ; -string: +string_literal: STRING { $$ = std::move($1); } @@ -1625,6 +1689,19 @@ string: } ; +string: + string_literal { + $$ = std::move($1); + } + | scoped_sym { + if (Symbol *sym = sym_FindScopedSymbol($1); sym && sym->type == SYM_EQUS) { + $$ = *sym->getEqus(); + } else { + ::error("'%s' is not a string symbol\n", $1.c_str()); + } + } +; + strcat_args: string { $$ = std::move($1); @@ -1649,10 +1726,20 @@ strfmt_va_args: $$ = std::move($1); $$.args.push_back(static_cast($3.getConstVal())); } - | strfmt_va_args COMMA string { + | strfmt_va_args COMMA string_literal { $$ = std::move($1); $$.args.push_back(std::move($3)); } + | strfmt_va_args COMMA scoped_sym { + $$ = std::move($1); + handleSymbolByType( + $3, + [&](Expression const &expr) { + $$.args.push_back(static_cast(expr.getConstVal())); + }, + [&](std::string const &str) { $$.args.push_back(str); } + ); + } ; section: diff --git a/test/asm/raw-string-symbol-errors.asm b/test/asm/raw-string-symbol-errors.asm new file mode 100644 index 00000000..d9bad0be --- /dev/null +++ b/test/asm/raw-string-symbol-errors.asm @@ -0,0 +1,29 @@ +def n equ 42 +def s equs "hello" +macro m +endm + +assert (#n) == 42 +assert (#s) == $656c6c6f +assert (#m) == 0 +assert (#u) == 0 + +assert strlen(#n) == 0 +assert strlen(#s) == 5 +assert strlen(#m) == 0 +assert strlen(#u) == 0 + +def d_n = (#n) +def d_s = (#s) +def d_m = (#m) +def d_u = (#u) + +def s_n equs #n +def s_s equs #s +def s_m equs #m +def s_u equs #u + +purge #s +purge #s +assert (#s) == 0 +assert strlen(#s) == 0 diff --git a/test/asm/raw-string-symbol-errors.err b/test/asm/raw-string-symbol-errors.err new file mode 100644 index 00000000..2863df6f --- /dev/null +++ b/test/asm/raw-string-symbol-errors.err @@ -0,0 +1,27 @@ +warning: raw-string-symbol-errors.asm(7): [-Wobsolete] + Treating multi-unit strings as numbers is deprecated +error: raw-string-symbol-errors.asm(8): + 'm' is not a numeric symbol +error: raw-string-symbol-errors.asm(11): + 'n' is not a string symbol +error: raw-string-symbol-errors.asm(13): + 'm' is not a string symbol +error: raw-string-symbol-errors.asm(14): + 'u' is not a string symbol +warning: raw-string-symbol-errors.asm(17): [-Wobsolete] + Treating multi-unit strings as numbers is deprecated +error: raw-string-symbol-errors.asm(18): + 'm' is not a numeric symbol +error: raw-string-symbol-errors.asm(19): + Expected constant expression: 'u' is not constant at assembly time +error: raw-string-symbol-errors.asm(21): + 'n' is not a string symbol +error: raw-string-symbol-errors.asm(23): + 'm' is not a string symbol +error: raw-string-symbol-errors.asm(24): + 'u' is not a string symbol +error: raw-string-symbol-errors.asm(27): + 's' was already purged +error: raw-string-symbol-errors.asm(29): + 's' is not a string symbol +error: Assembly aborted (11 errors)! diff --git a/test/asm/raw-string-symbols.asm b/test/asm/raw-string-symbols.asm new file mode 100644 index 00000000..3f08cb72 --- /dev/null +++ b/test/asm/raw-string-symbols.asm @@ -0,0 +1,34 @@ +opt Wno-unmapped-char + +def hello equs "world" +def name equs "hello" +println "{name}" +println #name +assert !strcmp(strsub(#name, 1, 4), "hell") +assert strlen(#hello) == charlen(#hello) +assert strlen("{hello}") == 5 + +def multi equs """the quick +brown fox""" +println #multi + +def char equs "A" +def n = #char +println n +def n = (#char) +println n +def n = 1 + #char +println n +assert #char == $41 + +def fmt equs "%s %s %d" +println strfmt(#fmt, #name, #hello, (#char)) + +purge #name +assert !def(name) && !def(#name) && def(hello) + +section "test", rom0 +#label: +db #hello +dw #hello +dw BANK(#label), #label diff --git a/test/asm/raw-string-symbols.out b/test/asm/raw-string-symbols.out new file mode 100644 index 00000000..9f71d245 --- /dev/null +++ b/test/asm/raw-string-symbols.out @@ -0,0 +1,8 @@ +hello +hello +the quick +brown fox +$41 +$41 +$42 +hello world 65 diff --git a/test/asm/raw-string-symbols.out.bin b/test/asm/raw-string-symbols.out.bin new file mode 100644 index 0000000000000000000000000000000000000000..ad0d52e2eeaa13f70f1f17cdd2453acebf0690fc GIT binary patch literal 19 XcmXTVFUmrQ1 literal 0 HcmV?d00001