Disable EQUS expansion for raw symbols (by parsing them as strings) (#1648)

This commit is contained in:
Rangi
2025-02-15 10:44:51 +01:00
committed by GitHub
parent 3feb75f84f
commit b2e865ee2a
8 changed files with 206 additions and 17 deletions

View File

@@ -1125,7 +1125,9 @@ Additionally, label names can contain up to a single dot
.Ql \&. , .Ql \&. ,
which may not be the first character. which may not be the first character.
.Pp .Pp
A symbol cannot have the same name as a reserved keyword, unless it is prefixed by a hash A symbol cannot have the same name as a reserved keyword, unless its name is a
.Dq raw identifier
prefixed by a hash
.Sq # . .Sq # .
For example, For example,
.Ql #load .Ql #load
@@ -1300,7 +1302,7 @@ it at the same time.
below). below).
.Ss Numeric constants .Ss Numeric constants
.Ic EQU .Ic EQU
is used to define immutable numeric symbols. is used to define numeric constant symbols.
Unlike Unlike
.Sq = .Sq =
above, constants defined this way cannot be redefined. above, constants defined this way cannot be redefined.
@@ -1408,6 +1410,8 @@ This expansion is disabled in a few contexts:
and and
.Ql MACRO name .Ql MACRO name
will not expand string constants in their names. will not expand string constants in their names.
Expansion is also disabled if the string constant's name is a raw identifier prefixed by a hash
.Sq # .
.Bd -literal -offset indent .Bd -literal -offset indent
DEF COUNTREG EQUS "[hl+]" DEF COUNTREG EQUS "[hl+]"
ld a, COUNTREG ld a, COUNTREG
@@ -1873,7 +1877,7 @@ being the second, and so on. Since there are only nine digits, you can only use
To use the rest, you put the argument number in angle brackets, like To use the rest, you put the argument number in angle brackets, like
.Ic \e<10> . .Ic \e<10> .
.Pp .Pp
This bracketed syntax supports decimal numbers and numeric constant symbols. This bracketed syntax supports decimal numbers and numeric symbols.
For example, For example,
.Ql \e<_NARG> .Ql \e<_NARG>
will get the last argument. will get the last argument.

View File

@@ -1915,8 +1915,8 @@ static Token yylex_NORMAL() {
// `token` is either a `SYMBOL` or a `LOCAL`, and both have a `std::string` value. // `token` is either a `SYMBOL` or a `LOCAL`, and both have a `std::string` value.
assume(token.value.holds<std::string>()); assume(token.value.holds<std::string>());
// Local symbols cannot be string expansions // Raw symbols and local symbols cannot be string expansions
if (token.type == T_(SYMBOL) && lexerState->expandStrings) { if (!raw && token.type == T_(SYMBOL) && lexerState->expandStrings) {
// Attempt string expansion // Attempt string expansion
Symbol const *sym = sym_FindExactSymbol(token.value.get<std::string>()); Symbol const *sym = sym_FindExactSymbol(token.value.get<std::string>());

View File

@@ -80,6 +80,17 @@
static void failAssert(AssertionType type); static void failAssert(AssertionType type);
static void failAssertMsg(AssertionType type, std::string const &message); static void failAssertMsg(AssertionType type, std::string const &message);
template <typename N, typename S>
static auto handleSymbolByType(std::string const &symName, N numCallback, S strCallback) {
if (Symbol *sym = sym_FindScopedSymbol(symName); sym && sym->type == SYM_EQUS) {
return strCallback(*sym->getEqus());
} else {
Expression expr;
expr.makeSymbol(symName);
return numCallback(expr);
}
}
// The CPU encodes instructions in a logical way, so most instructions actually follow patterns. // The CPU encodes instructions in a logical way, so most instructions actually follow patterns.
// These enums thus help with bit twiddling to compute opcodes. // These enums thus help with bit twiddling to compute opcodes.
enum { REG_B, REG_C, REG_D, REG_E, REG_H, REG_L, REG_HL_IND, REG_A }; enum { REG_B, REG_C, REG_D, REG_E, REG_H, REG_L, REG_HL_IND, REG_A };
@@ -343,6 +354,7 @@
// Strings // Strings
%type <std::string> string %type <std::string> string
%type <std::string> string_literal
%type <std::string> strcat_args %type <std::string> strcat_args
// Strings used for identifiers // Strings used for identifiers
%type <std::string> def_id %type <std::string> def_id
@@ -1210,10 +1222,17 @@ print_expr:
relocexpr_no_str { relocexpr_no_str {
printf("$%" PRIX32, $1.getConstVal()); printf("$%" PRIX32, $1.getConstVal());
} }
| string { | string_literal {
// Allow printing NUL characters // Allow printing NUL characters
fwrite($1.data(), 1, $1.length(), stdout); fwrite($1.data(), 1, $1.length(), stdout);
} }
| scoped_sym {
handleSymbolByType(
$1,
[](Expression const &expr) { printf("$%" PRIX32, expr.getConstVal()); },
[](std::string const &str) { fwrite(str.data(), 1, str.length(), stdout); }
);
}
; ;
reloc_3bit: reloc_3bit:
@@ -1233,10 +1252,23 @@ constlist_8bit_entry:
$1.checkNBit(8); $1.checkNBit(8);
sect_RelByte($1, 0); sect_RelByte($1, 0);
} }
| string { | string_literal {
std::vector<int32_t> output = charmap_Convert($1); std::vector<int32_t> output = charmap_Convert($1);
sect_ByteString(output); sect_ByteString(output);
} }
| scoped_sym {
handleSymbolByType(
$1,
[](Expression const &expr) {
expr.checkNBit(8);
sect_RelByte(expr, 0);
},
[](std::string const &str) {
std::vector<int32_t> output = charmap_Convert(str);
sect_ByteString(output);
}
);
}
; ;
constlist_16bit: constlist_16bit:
@@ -1249,10 +1281,23 @@ constlist_16bit_entry:
$1.checkNBit(16); $1.checkNBit(16);
sect_RelWord($1, 0); sect_RelWord($1, 0);
} }
| string { | string_literal {
std::vector<int32_t> output = charmap_Convert($1); std::vector<int32_t> output = charmap_Convert($1);
sect_WordString(output); sect_WordString(output);
} }
| scoped_sym {
handleSymbolByType(
$1,
[](Expression const &expr) {
expr.checkNBit(16);
sect_RelWord(expr, 0);
},
[](std::string const &str) {
std::vector<int32_t> output = charmap_Convert(str);
sect_WordString(output);
}
);
}
; ;
constlist_32bit: constlist_32bit:
@@ -1264,10 +1309,20 @@ constlist_32bit_entry:
relocexpr_no_str { relocexpr_no_str {
sect_RelLong($1, 0); sect_RelLong($1, 0);
} }
| string { | string_literal {
std::vector<int32_t> output = charmap_Convert($1); std::vector<int32_t> output = charmap_Convert($1);
sect_LongString(output); sect_LongString(output);
} }
| scoped_sym {
handleSymbolByType(
$1,
[](Expression const &expr) { sect_RelLong(expr, 0); },
[](std::string const &str) {
std::vector<int32_t> output = charmap_Convert(str);
sect_LongString(output);
}
);
}
; ;
reloc_8bit: reloc_8bit:
@@ -1299,17 +1354,26 @@ relocexpr:
relocexpr_no_str { relocexpr_no_str {
$$ = std::move($1); $$ = std::move($1);
} }
| string { | string_literal {
std::vector<int32_t> output = charmap_Convert($1); std::vector<int32_t> output = charmap_Convert($1);
$$.makeNumber(strToNum(output)); $$.makeNumber(strToNum(output));
} }
| scoped_sym {
$$ = handleSymbolByType(
$1,
[](Expression const &expr) { return expr; },
[](std::string const &str) {
std::vector<int32_t> output = charmap_Convert(str);
Expression expr;
expr.makeNumber(strToNum(output));
return expr;
}
);
}
; ;
relocexpr_no_str: relocexpr_no_str:
scoped_sym { NUMBER {
$$.makeSymbol($1);
}
| NUMBER {
$$.makeNumber($1); $$.makeNumber($1);
} }
| OP_LOGICNOT relocexpr %prec NEG { | OP_LOGICNOT relocexpr %prec NEG {
@@ -1403,7 +1467,7 @@ relocexpr_no_str:
// '@' is also a SYMBOL; it is handled here // '@' is also a SYMBOL; it is handled here
$$.makeBankSymbol($3); $$.makeBankSymbol($3);
} }
| OP_BANK LPAREN string RPAREN { | OP_BANK LPAREN string_literal RPAREN {
$$.makeBankSection($3); $$.makeBankSection($3);
} }
| OP_SIZEOF LPAREN string RPAREN { | OP_SIZEOF LPAREN string RPAREN {
@@ -1540,7 +1604,7 @@ precision_arg:
} }
; ;
string: string_literal:
STRING { STRING {
$$ = std::move($1); $$ = std::move($1);
} }
@@ -1625,6 +1689,19 @@ string:
} }
; ;
string:
string_literal {
$$ = std::move($1);
}
| scoped_sym {
if (Symbol *sym = sym_FindScopedSymbol($1); sym && sym->type == SYM_EQUS) {
$$ = *sym->getEqus();
} else {
::error("'%s' is not a string symbol\n", $1.c_str());
}
}
;
strcat_args: strcat_args:
string { string {
$$ = std::move($1); $$ = std::move($1);
@@ -1649,10 +1726,20 @@ strfmt_va_args:
$$ = std::move($1); $$ = std::move($1);
$$.args.push_back(static_cast<uint32_t>($3.getConstVal())); $$.args.push_back(static_cast<uint32_t>($3.getConstVal()));
} }
| strfmt_va_args COMMA string { | strfmt_va_args COMMA string_literal {
$$ = std::move($1); $$ = std::move($1);
$$.args.push_back(std::move($3)); $$.args.push_back(std::move($3));
} }
| strfmt_va_args COMMA scoped_sym {
$$ = std::move($1);
handleSymbolByType(
$3,
[&](Expression const &expr) {
$$.args.push_back(static_cast<uint32_t>(expr.getConstVal()));
},
[&](std::string const &str) { $$.args.push_back(str); }
);
}
; ;
section: section:

View File

@@ -0,0 +1,29 @@
def n equ 42
def s equs "hello"
macro m
endm
assert (#n) == 42
assert (#s) == $656c6c6f
assert (#m) == 0
assert (#u) == 0
assert strlen(#n) == 0
assert strlen(#s) == 5
assert strlen(#m) == 0
assert strlen(#u) == 0
def d_n = (#n)
def d_s = (#s)
def d_m = (#m)
def d_u = (#u)
def s_n equs #n
def s_s equs #s
def s_m equs #m
def s_u equs #u
purge #s
purge #s
assert (#s) == 0
assert strlen(#s) == 0

View File

@@ -0,0 +1,27 @@
warning: raw-string-symbol-errors.asm(7): [-Wobsolete]
Treating multi-unit strings as numbers is deprecated
error: raw-string-symbol-errors.asm(8):
'm' is not a numeric symbol
error: raw-string-symbol-errors.asm(11):
'n' is not a string symbol
error: raw-string-symbol-errors.asm(13):
'm' is not a string symbol
error: raw-string-symbol-errors.asm(14):
'u' is not a string symbol
warning: raw-string-symbol-errors.asm(17): [-Wobsolete]
Treating multi-unit strings as numbers is deprecated
error: raw-string-symbol-errors.asm(18):
'm' is not a numeric symbol
error: raw-string-symbol-errors.asm(19):
Expected constant expression: 'u' is not constant at assembly time
error: raw-string-symbol-errors.asm(21):
'n' is not a string symbol
error: raw-string-symbol-errors.asm(23):
'm' is not a string symbol
error: raw-string-symbol-errors.asm(24):
'u' is not a string symbol
error: raw-string-symbol-errors.asm(27):
's' was already purged
error: raw-string-symbol-errors.asm(29):
's' is not a string symbol
error: Assembly aborted (11 errors)!

View File

@@ -0,0 +1,34 @@
opt Wno-unmapped-char
def hello equs "world"
def name equs "hello"
println "{name}"
println #name
assert !strcmp(strsub(#name, 1, 4), "hell")
assert strlen(#hello) == charlen(#hello)
assert strlen("{hello}") == 5
def multi equs """the quick
brown fox"""
println #multi
def char equs "A"
def n = #char
println n
def n = (#char)
println n
def n = 1 + #char
println n
assert #char == $41
def fmt equs "%s %s %d"
println strfmt(#fmt, #name, #hello, (#char))
purge #name
assert !def(name) && !def(#name) && def(hello)
section "test", rom0
#label:
db #hello
dw #hello
dw BANK(#label), #label

View File

@@ -0,0 +1,8 @@
hello
hello
the quick
brown fox
$41
$41
$42
hello world 65

Binary file not shown.