Implement a '#' prefix for raw identifiers that may alias keywords (#1480)

* Implement a '#' prefix for raw identifiers that may alias keywords

* Review comments

* Disallow hashless raw identifiers in interpolations

* Run clang-format
This commit is contained in:
Sylvie
2024-08-21 13:31:44 -04:00
committed by GitHub
parent 82e81ab1da
commit b438c83bda
11 changed files with 114 additions and 13 deletions

View File

@@ -1068,7 +1068,20 @@ Additionally, label names can contain up to a single dot
.Ql \&. ,
which may not be the first character.
.Pp
A symbol cannot have the same name as a reserved keyword.
A symbol cannot have the same name as a reserved keyword, unless it is prefixed by a hash
.Sq # .
For example,
.Ql #load
denotes a symbol named
.Ql load ,
and
.Ql #LOAD
denotes a different symbol named
.Ql LOAD ;
in both cases the
.Sq #
prevents them from being treated as the keyword
.Ic LOAD .
.Ss Labels
One of the assembler's main tasks is to keep track of addresses for you, so you can work with meaningful names instead of
.Dq magic

View File

@@ -596,7 +596,16 @@ static uint32_t readBracketedMacroArgNum() {
if (c >= '0' && c <= '9') {
num = readNumber(10, 0);
} else if (startsIdentifier(c)) {
} else if (startsIdentifier(c) || c == '#') {
if (c == '#') {
shiftChar();
c = peek();
if (!startsIdentifier(c)) {
error("Empty raw symbol in bracketed macro argument\n");
return 0;
}
}
std::string symName;
for (; continuesIdentifier(c); c = peek()) {
@@ -1138,8 +1147,7 @@ static bool continuesIdentifier(int c) {
return startsIdentifier(c) || (c <= '9' && c >= '0') || c == '#' || c == '@';
}
static Token readIdentifier(char firstChar) {
// Lex while checking for a keyword
static Token readIdentifier(char firstChar, bool raw) {
std::string identifier(1, firstChar);
int tokenType = firstChar == '.' ? T_(LOCAL_ID) : T_(ID);
@@ -1155,9 +1163,13 @@ static Token readIdentifier(char firstChar) {
tokenType = T_(LOCAL_ID);
}
// Attempt to check for a keyword
auto search = keywordDict.find(identifier.c_str());
return search != keywordDict.end() ? Token(search->second) : Token(tokenType, identifier);
// Attempt to check for a keyword if the identifier is not raw
if (!raw) {
if (auto search = keywordDict.find(identifier.c_str()); search != keywordDict.end())
return Token(search->second);
}
return Token(tokenType, identifier);
}
// Functions to read strings
@@ -1207,6 +1219,19 @@ static std::shared_ptr<std::string> readInterpolation(size_t depth) {
// Don't return before `lexerState->disableInterpolation` is reset!
lexerState->disableInterpolation = disableInterpolation;
if (fmtBuf.starts_with('#')) {
// Skip a '#' raw identifier prefix, but after expanding any nested interpolations.
fmtBuf.erase(0, 1);
} else if (keywordDict.find(fmtBuf.c_str()) != keywordDict.end()) {
// Don't allow symbols that alias keywords without a '#' prefix.
error(
"Interpolated symbol \"%s\" is a reserved keyword; add a '#' prefix to use it as a raw "
"symbol\n",
fmtBuf.c_str()
);
return nullptr;
}
Symbol const *sym = sym_FindScopedValidSymbol(fmtBuf);
if (!sym || !sym->isDefined()) {
@@ -1781,8 +1806,13 @@ static Token yylex_NORMAL() {
// Handle identifiers... or report garbage characters
default:
bool raw = c == '#';
if (raw && startsIdentifier(peek())) {
c = nextChar();
}
if (startsIdentifier(c)) {
Token token = readIdentifier(c);
Token token = readIdentifier(c, raw);
// An ELIF after a taken IF needs to not evaluate its condition
if (token.type == T_(POP_ELIF) && lexerState->lastToken == T_(NEWLINE)
@@ -2017,7 +2047,7 @@ static Token skipIfBlock(bool toEndc) {
if (startsIdentifier(c)) {
shiftChar();
switch (Token token = readIdentifier(c); token.type) {
switch (Token token = readIdentifier(c, false); token.type) {
case T_(POP_IF):
lexer_IncIFDepth();
break;
@@ -2103,7 +2133,7 @@ static Token yylex_SKIP_TO_ENDR() {
if (startsIdentifier(c)) {
shiftChar();
switch (readIdentifier(c).type) {
switch (readIdentifier(c, false).type) {
case T_(POP_FOR):
case T_(POP_REPT):
depth++;
@@ -2250,7 +2280,7 @@ Capture lexer_CaptureRept() {
} while (isWhitespace(c));
// Now, try to match `REPT`, `FOR` or `ENDR` as a **whole** identifier
if (startsIdentifier(c)) {
switch (readIdentifier(c).type) {
switch (readIdentifier(c, false).type) {
case T_(POP_REPT):
case T_(POP_FOR):
depth++;
@@ -2303,7 +2333,7 @@ Capture lexer_CaptureMacro() {
} while (isWhitespace(c));
// Now, try to match `ENDM` as a **whole** identifier
if (startsIdentifier(c)) {
switch (readIdentifier(c).type) {
switch (readIdentifier(c, false).type) {
case T_(POP_ENDM):
endCapture(capture);
// The ENDM has been captured, but we don't want it!

View File

@@ -0,0 +1,5 @@
MACRO #macro
println "all args: \#"
println "bad args: \<?>, \<#>"
ENDM
#macro a, #b, c, 1, #2, 3

View File

@@ -0,0 +1,5 @@
error: empty-raw-identifier.asm(5) -> empty-raw-identifier.asm::macro(3):
Invalid character in bracketed macro argument '?'
error: empty-raw-identifier.asm(5) -> empty-raw-identifier.asm::macro(3):
Empty raw symbol in bracketed macro argument
error: Assembly aborted (2 errors)!

View File

@@ -0,0 +1,2 @@
all args: a,#b,c,1,#2,3
bad args: >, >

View File

@@ -27,3 +27,6 @@ PRINTLN "label {label}"
MACRO foo
ENDM
PRINTLN "foo {foo}"
; hashless keyword
PRINTLN "xor {xor}"

View File

@@ -4,4 +4,6 @@ error: interpolation.asm(22):
Interpolated symbol "label" does not exist
error: interpolation.asm(29):
Interpolated symbol "foo" is not a numeric or string symbol
error: Assembly aborted (3 errors)!
error: interpolation.asm(32):
Interpolated symbol "xor" is a reserved keyword; add a '#' prefix to use it as a raw symbol
error: Assembly aborted (4 errors)!

View File

@@ -3,3 +3,4 @@ undef
label
label $7E
foo
xor

View File

@@ -0,0 +1,36 @@
def #DEF equ 1
def #def equ 2
def #ghi equ 3
export #def, #ghi
def #align = 0
def #rb rb #def
MACRO #macro
println "\<#def> is not \<#DEF>"
ENDM
#macro first, second
purge #macro
assert !def(#macro)
section "section", rom0
#section::
dw #section
#.rom0:
db BANK(#section.rom0)
#section.romx:
println "section.romx is in ", SECTION(.romx)
def #sub equs "def"
{#sub} #add equs "#"
for #for, {{#add}{#sub}}
println "for == ", #for
endr
assert #for == 2
assert !{#sub}(#FOR)
newcharmap #charmap, #main
charmap "#", $42
setcharmap #charmap
db "#"

View File

@@ -0,0 +1,4 @@
second is not first
section.romx is in section
for == $0
for == $1

Binary file not shown.