Implement a '#' prefix for raw identifiers that may alias keywords (#1480)

* Implement a '#' prefix for raw identifiers that may alias keywords

* Review comments

* Disallow hashless raw identifiers in interpolations

* Run clang-format
This commit is contained in:
Sylvie
2024-08-21 13:31:44 -04:00
committed by GitHub
parent 82e81ab1da
commit b438c83bda
11 changed files with 114 additions and 13 deletions

View File

@@ -1068,7 +1068,20 @@ Additionally, label names can contain up to a single dot
.Ql \&. , .Ql \&. ,
which may not be the first character. which may not be the first character.
.Pp .Pp
A symbol cannot have the same name as a reserved keyword. A symbol cannot have the same name as a reserved keyword, unless it is prefixed by a hash
.Sq # .
For example,
.Ql #load
denotes a symbol named
.Ql load ,
and
.Ql #LOAD
denotes a different symbol named
.Ql LOAD ;
in both cases the
.Sq #
prevents them from being treated as the keyword
.Ic LOAD .
.Ss Labels .Ss Labels
One of the assembler's main tasks is to keep track of addresses for you, so you can work with meaningful names instead of One of the assembler's main tasks is to keep track of addresses for you, so you can work with meaningful names instead of
.Dq magic .Dq magic

View File

@@ -596,7 +596,16 @@ static uint32_t readBracketedMacroArgNum() {
if (c >= '0' && c <= '9') { if (c >= '0' && c <= '9') {
num = readNumber(10, 0); num = readNumber(10, 0);
} else if (startsIdentifier(c)) { } else if (startsIdentifier(c) || c == '#') {
if (c == '#') {
shiftChar();
c = peek();
if (!startsIdentifier(c)) {
error("Empty raw symbol in bracketed macro argument\n");
return 0;
}
}
std::string symName; std::string symName;
for (; continuesIdentifier(c); c = peek()) { for (; continuesIdentifier(c); c = peek()) {
@@ -1138,8 +1147,7 @@ static bool continuesIdentifier(int c) {
return startsIdentifier(c) || (c <= '9' && c >= '0') || c == '#' || c == '@'; return startsIdentifier(c) || (c <= '9' && c >= '0') || c == '#' || c == '@';
} }
static Token readIdentifier(char firstChar) { static Token readIdentifier(char firstChar, bool raw) {
// Lex while checking for a keyword
std::string identifier(1, firstChar); std::string identifier(1, firstChar);
int tokenType = firstChar == '.' ? T_(LOCAL_ID) : T_(ID); int tokenType = firstChar == '.' ? T_(LOCAL_ID) : T_(ID);
@@ -1155,9 +1163,13 @@ static Token readIdentifier(char firstChar) {
tokenType = T_(LOCAL_ID); tokenType = T_(LOCAL_ID);
} }
// Attempt to check for a keyword // Attempt to check for a keyword if the identifier is not raw
auto search = keywordDict.find(identifier.c_str()); if (!raw) {
return search != keywordDict.end() ? Token(search->second) : Token(tokenType, identifier); if (auto search = keywordDict.find(identifier.c_str()); search != keywordDict.end())
return Token(search->second);
}
return Token(tokenType, identifier);
} }
// Functions to read strings // Functions to read strings
@@ -1207,6 +1219,19 @@ static std::shared_ptr<std::string> readInterpolation(size_t depth) {
// Don't return before `lexerState->disableInterpolation` is reset! // Don't return before `lexerState->disableInterpolation` is reset!
lexerState->disableInterpolation = disableInterpolation; lexerState->disableInterpolation = disableInterpolation;
if (fmtBuf.starts_with('#')) {
// Skip a '#' raw identifier prefix, but after expanding any nested interpolations.
fmtBuf.erase(0, 1);
} else if (keywordDict.find(fmtBuf.c_str()) != keywordDict.end()) {
// Don't allow symbols that alias keywords without a '#' prefix.
error(
"Interpolated symbol \"%s\" is a reserved keyword; add a '#' prefix to use it as a raw "
"symbol\n",
fmtBuf.c_str()
);
return nullptr;
}
Symbol const *sym = sym_FindScopedValidSymbol(fmtBuf); Symbol const *sym = sym_FindScopedValidSymbol(fmtBuf);
if (!sym || !sym->isDefined()) { if (!sym || !sym->isDefined()) {
@@ -1781,8 +1806,13 @@ static Token yylex_NORMAL() {
// Handle identifiers... or report garbage characters // Handle identifiers... or report garbage characters
default: default:
bool raw = c == '#';
if (raw && startsIdentifier(peek())) {
c = nextChar();
}
if (startsIdentifier(c)) { if (startsIdentifier(c)) {
Token token = readIdentifier(c); Token token = readIdentifier(c, raw);
// An ELIF after a taken IF needs to not evaluate its condition // An ELIF after a taken IF needs to not evaluate its condition
if (token.type == T_(POP_ELIF) && lexerState->lastToken == T_(NEWLINE) if (token.type == T_(POP_ELIF) && lexerState->lastToken == T_(NEWLINE)
@@ -2017,7 +2047,7 @@ static Token skipIfBlock(bool toEndc) {
if (startsIdentifier(c)) { if (startsIdentifier(c)) {
shiftChar(); shiftChar();
switch (Token token = readIdentifier(c); token.type) { switch (Token token = readIdentifier(c, false); token.type) {
case T_(POP_IF): case T_(POP_IF):
lexer_IncIFDepth(); lexer_IncIFDepth();
break; break;
@@ -2103,7 +2133,7 @@ static Token yylex_SKIP_TO_ENDR() {
if (startsIdentifier(c)) { if (startsIdentifier(c)) {
shiftChar(); shiftChar();
switch (readIdentifier(c).type) { switch (readIdentifier(c, false).type) {
case T_(POP_FOR): case T_(POP_FOR):
case T_(POP_REPT): case T_(POP_REPT):
depth++; depth++;
@@ -2250,7 +2280,7 @@ Capture lexer_CaptureRept() {
} while (isWhitespace(c)); } while (isWhitespace(c));
// Now, try to match `REPT`, `FOR` or `ENDR` as a **whole** identifier // Now, try to match `REPT`, `FOR` or `ENDR` as a **whole** identifier
if (startsIdentifier(c)) { if (startsIdentifier(c)) {
switch (readIdentifier(c).type) { switch (readIdentifier(c, false).type) {
case T_(POP_REPT): case T_(POP_REPT):
case T_(POP_FOR): case T_(POP_FOR):
depth++; depth++;
@@ -2303,7 +2333,7 @@ Capture lexer_CaptureMacro() {
} while (isWhitespace(c)); } while (isWhitespace(c));
// Now, try to match `ENDM` as a **whole** identifier // Now, try to match `ENDM` as a **whole** identifier
if (startsIdentifier(c)) { if (startsIdentifier(c)) {
switch (readIdentifier(c).type) { switch (readIdentifier(c, false).type) {
case T_(POP_ENDM): case T_(POP_ENDM):
endCapture(capture); endCapture(capture);
// The ENDM has been captured, but we don't want it! // The ENDM has been captured, but we don't want it!

View File

@@ -0,0 +1,5 @@
MACRO #macro
println "all args: \#"
println "bad args: \<?>, \<#>"
ENDM
#macro a, #b, c, 1, #2, 3

View File

@@ -0,0 +1,5 @@
error: empty-raw-identifier.asm(5) -> empty-raw-identifier.asm::macro(3):
Invalid character in bracketed macro argument '?'
error: empty-raw-identifier.asm(5) -> empty-raw-identifier.asm::macro(3):
Empty raw symbol in bracketed macro argument
error: Assembly aborted (2 errors)!

View File

@@ -0,0 +1,2 @@
all args: a,#b,c,1,#2,3
bad args: >, >

View File

@@ -27,3 +27,6 @@ PRINTLN "label {label}"
MACRO foo MACRO foo
ENDM ENDM
PRINTLN "foo {foo}" PRINTLN "foo {foo}"
; hashless keyword
PRINTLN "xor {xor}"

View File

@@ -4,4 +4,6 @@ error: interpolation.asm(22):
Interpolated symbol "label" does not exist Interpolated symbol "label" does not exist
error: interpolation.asm(29): error: interpolation.asm(29):
Interpolated symbol "foo" is not a numeric or string symbol Interpolated symbol "foo" is not a numeric or string symbol
error: Assembly aborted (3 errors)! error: interpolation.asm(32):
Interpolated symbol "xor" is a reserved keyword; add a '#' prefix to use it as a raw symbol
error: Assembly aborted (4 errors)!

View File

@@ -3,3 +3,4 @@ undef
label label
label $7E label $7E
foo foo
xor

View File

@@ -0,0 +1,36 @@
def #DEF equ 1
def #def equ 2
def #ghi equ 3
export #def, #ghi
def #align = 0
def #rb rb #def
MACRO #macro
println "\<#def> is not \<#DEF>"
ENDM
#macro first, second
purge #macro
assert !def(#macro)
section "section", rom0
#section::
dw #section
#.rom0:
db BANK(#section.rom0)
#section.romx:
println "section.romx is in ", SECTION(.romx)
def #sub equs "def"
{#sub} #add equs "#"
for #for, {{#add}{#sub}}
println "for == ", #for
endr
assert #for == 2
assert !{#sub}(#FOR)
newcharmap #charmap, #main
charmap "#", $42
setcharmap #charmap
db "#"

View File

@@ -0,0 +1,4 @@
second is not first
section.romx is in section
for == $0
for == $1

Binary file not shown.