Parser refers to "symbol"s, "label"s, and "local label"s, not "identifier"s (#1652)

This better matches how the lexed tokens are discussed in rgbasm(5)
This commit is contained in:
Rangi
2025-02-06 18:01:33 +01:00
committed by GitHub
parent d9d381cb62
commit 4c916b8da8
8 changed files with 74 additions and 94 deletions

View File

@@ -134,11 +134,9 @@ struct CaseInsensitive {
} }
}; };
// Identifiers that are also keywords are listed here. This ONLY applies to ones // This map lists all RGBASM keywords which `yylex_NORMAL` lexes as identifiers
// that would normally be matched as identifiers! Check out `yylex_NORMAL` to // (see `startsIdentifier` and `continuesIdentifier` below). All non-identifier
// see how this is used. // tokens are lexed separately.
// Tokens / keywords not handled here are handled in `yylex_NORMAL`'s switch.
// This assumes that no two keywords have the same name.
static std::unordered_map<std::string, int, CaseInsensitive, CaseInsensitive> keywordDict = { static std::unordered_map<std::string, int, CaseInsensitive, CaseInsensitive> keywordDict = {
{"ADC", T_(SM83_ADC) }, {"ADC", T_(SM83_ADC) },
{"ADD", T_(SM83_ADD) }, {"ADD", T_(SM83_ADD) },
@@ -1179,7 +1177,7 @@ static uint32_t readGfxConstant() {
return bitPlaneUpper << 8 | bitPlaneLower; return bitPlaneUpper << 8 | bitPlaneLower;
} }
// Functions to read identifiers & keywords // Functions to read identifiers and keywords
static bool startsIdentifier(int c) { static bool startsIdentifier(int c) {
// Anonymous labels internally start with '!' // Anonymous labels internally start with '!'
@@ -1192,18 +1190,18 @@ static bool continuesIdentifier(int c) {
static Token readIdentifier(char firstChar, bool raw) { static Token readIdentifier(char firstChar, bool raw) {
std::string identifier(1, firstChar); std::string identifier(1, firstChar);
int tokenType = firstChar == '.' ? T_(LOCAL_ID) : T_(ID); int tokenType = firstChar == '.' ? T_(LOCAL) : T_(SYMBOL);
// Continue reading while the char is in the symbol charset // Continue reading while the char is in the identifier charset
for (int c = peek(); continuesIdentifier(c); c = peek()) { for (int c = peek(); continuesIdentifier(c); c = peek()) {
shiftChar(); shiftChar();
// Write the char to the identifier's name // Write the char to the identifier's name
identifier += c; identifier += c;
// If the char was a dot, mark the identifier as local // If the char was a dot, the identifier is a local label
if (c == '.') { if (c == '.') {
tokenType = T_(LOCAL_ID); tokenType = T_(LOCAL);
} }
} }
@@ -1219,7 +1217,7 @@ static Token readIdentifier(char firstChar, bool raw) {
// Label scopes `.` and `..` are the only nonlocal identifiers that start with a dot // Label scopes `.` and `..` are the only nonlocal identifiers that start with a dot
if (identifier.find_first_not_of('.') == identifier.npos) { if (identifier.find_first_not_of('.') == identifier.npos) {
tokenType = T_(ID); tokenType = T_(SYMBOL);
} }
return Token(tokenType, identifier); return Token(tokenType, identifier);
@@ -1276,7 +1274,7 @@ static std::shared_ptr<std::string> readInterpolation(size_t depth) {
lexerState->disableInterpolation = disableInterpolation; lexerState->disableInterpolation = disableInterpolation;
if (fmtBuf.starts_with('#')) { if (fmtBuf.starts_with('#')) {
// Skip a '#' raw identifier prefix, but after expanding any nested interpolations. // Skip a '#' raw symbol prefix, but after expanding any nested interpolations.
fmtBuf.erase(0, 1); fmtBuf.erase(0, 1);
} else if (keywordDict.find(fmtBuf) != keywordDict.end()) { } else if (keywordDict.find(fmtBuf) != keywordDict.end()) {
// Don't allow symbols that alias keywords without a '#' prefix. // Don't allow symbols that alias keywords without a '#' prefix.
@@ -1641,7 +1639,7 @@ static Token yylex_NORMAL() {
case '@': { case '@': {
std::string symName("@"); std::string symName("@");
return Token(T_(ID), symName); return Token(T_(SYMBOL), symName);
} }
case '[': case '[':
@@ -1903,15 +1901,15 @@ static Token yylex_NORMAL() {
} }
// If a keyword, don't try to expand // If a keyword, don't try to expand
if (token.type != T_(ID) && token.type != T_(LOCAL_ID)) { if (token.type != T_(SYMBOL) && token.type != T_(LOCAL)) {
return token; return token;
} }
// `token` is either an `ID` or a `LOCAL_ID`, and both have a `std::string` value. // `token` is either a `SYMBOL` or a `LOCAL`, and both have a `std::string` value.
assume(token.value.holds<std::string>()); assume(token.value.holds<std::string>());
// Local symbols cannot be string expansions // Local symbols cannot be string expansions
if (token.type == T_(ID) && lexerState->expandStrings) { if (token.type == T_(SYMBOL) && lexerState->expandStrings) {
// Attempt string expansion // Attempt string expansion
Symbol const *sym = sym_FindExactSymbol(token.value.get<std::string>()); Symbol const *sym = sym_FindExactSymbol(token.value.get<std::string>());
@@ -1925,18 +1923,18 @@ static Token yylex_NORMAL() {
} }
// This is a "lexer hack"! We need it to distinguish between label definitions // This is a "lexer hack"! We need it to distinguish between label definitions
// (which start with `LABEL`) and macro invocations (which start with `ID`). // (which start with `LABEL`) and macro invocations (which start with `SYMBOL`).
// //
// If we had one `IDENTIFIER` token, the parser would need to perform "lookahead" // If we had one `IDENTIFIER` token, the parser would need to perform "lookahead"
// to determine which rule applies. But since macros need to enter "raw" mode to // to determine which rule applies. But since macros need to enter "raw" mode to
// parse their arguments, which may not even be valid tokens in "normal" mode, we // parse their arguments, which may not even be valid tokens in "normal" mode, we
// cannot use lookahead to check for the presence of a `COLON`. // cannot use lookahead to check for the presence of a `COLON`.
// //
// Instead, we have separate `ID` and `LABEL` tokens, lexing as a `LABEL` if a ':' // Instead, we have separate `SYMBOL` and `LABEL` tokens, lexing as a `LABEL` if a
// character *immediately* follows the identifier. Thus, at the beginning of a line, // ':' character *immediately* follows the identifier. Thus, at the beginning of a
// "Label:" and "mac:" are treated as label definitions, but "Label :" and "mac :" // line, "Label:" and "mac:" are treated as label definitions, but "Label :" and
// are treated as macro invocations. // "mac :" are treated as macro invocations.
if (token.type == T_(ID) && peek() == ':') { if (token.type == T_(SYMBOL) && peek() == ':') {
token.type = T_(LABEL); token.type = T_(LABEL);
} }
@@ -2390,7 +2388,7 @@ Capture lexer_CaptureRept() {
do { // Discard initial whitespace do { // Discard initial whitespace
c = nextChar(); c = nextChar();
} while (isWhitespace(c)); } while (isWhitespace(c));
// Now, try to match `REPT`, `FOR` or `ENDR` as a **whole** identifier // Now, try to match `REPT`, `FOR` or `ENDR` as a **whole** keyword
if (startsIdentifier(c)) { if (startsIdentifier(c)) {
switch (readIdentifier(c, false).type) { switch (readIdentifier(c, false).type) {
case T_(POP_REPT): case T_(POP_REPT):
@@ -2443,7 +2441,7 @@ Capture lexer_CaptureMacro() {
do { // Discard initial whitespace do { // Discard initial whitespace
c = nextChar(); c = nextChar();
} while (isWhitespace(c)); } while (isWhitespace(c));
// Now, try to match `ENDM` as a **whole** identifier // Now, try to match `ENDM` as a **whole** keyword
if (startsIdentifier(c)) { if (startsIdentifier(c)) {
switch (readIdentifier(c, false).type) { switch (readIdentifier(c, false).type) {
case T_(POP_ENDM): case T_(POP_ENDM):

View File

@@ -313,18 +313,17 @@
// Literals // Literals
%token <int32_t> NUMBER "number" %token <int32_t> NUMBER "number"
%token <std::string> STRING "string" %token <std::string> STRING "string"
%token <std::string> SYMBOL "symbol"
%token <std::string> LABEL "label" %token <std::string> LABEL "label"
%token <std::string> ID "identifier" %token <std::string> LOCAL "local label"
%token <std::string> LOCAL_ID "local identifier"
%token <std::string> ANON "anonymous label" %token <std::string> ANON "anonymous label"
/******************** Data types ********************/ /******************** Data types ********************/
// The "no_str" types below are to distinguish numeric and string expressions, since many
// contexts treat strings differently than numbers, e.g. `db "string"` or `print "string"`.
// RPN expressions // RPN expressions
%type <Expression> relocexpr %type <Expression> relocexpr
// `relocexpr_no_str` exists because strings usually count as numeric expressions, but some
// contexts treat numbers and strings differently, e.g. `db "string"` or `print "string"`.
%type <Expression> relocexpr_no_str %type <Expression> relocexpr_no_str
%type <Expression> reloc_8bit %type <Expression> reloc_8bit
%type <Expression> reloc_8bit_offset %type <Expression> reloc_8bit_offset
@@ -355,8 +354,10 @@
%type <std::string> def_rl %type <std::string> def_rl
%type <std::string> def_equs %type <std::string> def_equs
%type <std::string> redef_equs %type <std::string> redef_equs
%type <std::string> scoped_id %type <std::string> scoped_sym
%type <std::string> scoped_anon_id // `scoped_sym_no_anon` exists because anonymous labels usually count as "scoped symbols", but some
// contexts treat anonymous labels and other labels/symbols differently, e.g. `purge` or `export`.
%type <std::string> scoped_sym_no_anon
// SM83 instruction parameters // SM83 instruction parameters
%type <int32_t> reg_r %type <int32_t> reg_r
@@ -513,7 +514,7 @@ endc:
def_id: def_id:
OP_DEF { OP_DEF {
lexer_ToggleStringExpansion(false); lexer_ToggleStringExpansion(false);
} ID { } SYMBOL {
lexer_ToggleStringExpansion(true); lexer_ToggleStringExpansion(true);
$$ = std::move($3); $$ = std::move($3);
} }
@@ -522,61 +523,42 @@ def_id:
redef_id: redef_id:
POP_REDEF { POP_REDEF {
lexer_ToggleStringExpansion(false); lexer_ToggleStringExpansion(false);
} ID { } SYMBOL {
lexer_ToggleStringExpansion(true); lexer_ToggleStringExpansion(true);
$$ = std::move($3); $$ = std::move($3);
} }
; ;
// LABEL covers identifiers followed by a double colon (e.g. `call Function::ret`, scoped_sym_no_anon: SYMBOL | LABEL | LOCAL;
// to be read as `call Function :: ret`). This should not conflict with anything.
scoped_id:
ID {
$$ = std::move($1);
}
| LOCAL_ID {
$$ = std::move($1);
}
| LABEL {
$$ = std::move($1);
}
;
scoped_anon_id: scoped_sym: scoped_sym_no_anon | ANON;
scoped_id {
$$ = std::move($1);
}
| ANON {
$$ = std::move($1);
}
;
label: label:
%empty %empty
| COLON {
sym_AddAnonLabel();
}
| LOCAL_ID {
sym_AddLocalLabel($1);
}
| LOCAL_ID COLON {
sym_AddLocalLabel($1);
}
| LABEL COLON { | LABEL COLON {
sym_AddLabel($1); sym_AddLabel($1);
} }
| LOCAL_ID DOUBLE_COLON {
sym_AddLocalLabel($1);
sym_Export($1);
}
| LABEL DOUBLE_COLON { | LABEL DOUBLE_COLON {
sym_AddLabel($1); sym_AddLabel($1);
sym_Export($1); sym_Export($1);
} }
| LOCAL {
sym_AddLocalLabel($1);
}
| LOCAL COLON {
sym_AddLocalLabel($1);
}
| LOCAL DOUBLE_COLON {
sym_AddLocalLabel($1);
sym_Export($1);
}
| COLON {
sym_AddAnonLabel();
}
; ;
macro: macro:
ID { SYMBOL {
// Parsing 'macro_args' will restore the lexer's normal mode // Parsing 'macro_args' will restore the lexer's normal mode
lexer_SetMode(LEXER_RAW); lexer_SetMode(LEXER_RAW);
} macro_args { } macro_args {
@@ -862,7 +844,7 @@ rept:
for: for:
POP_FOR { POP_FOR {
lexer_ToggleStringExpansion(false); lexer_ToggleStringExpansion(false);
} ID { } SYMBOL {
lexer_ToggleStringExpansion(true); lexer_ToggleStringExpansion(true);
} COMMA for_args NEWLINE capture_rept endofline { } COMMA for_args NEWLINE capture_rept endofline {
if ($8.span.ptr) { if ($8.span.ptr) {
@@ -906,7 +888,7 @@ break:
def_macro: def_macro:
POP_MACRO { POP_MACRO {
lexer_ToggleStringExpansion(false); lexer_ToggleStringExpansion(false);
} ID { } SYMBOL {
lexer_ToggleStringExpansion(true); lexer_ToggleStringExpansion(true);
} NEWLINE capture_macro endofline { } NEWLINE capture_macro endofline {
if ($6.span.ptr) { if ($6.span.ptr) {
@@ -1096,10 +1078,10 @@ purge:
; ;
purge_args: purge_args:
scoped_id { scoped_sym_no_anon {
$$.push_back($1); $$.push_back($1);
} }
| purge_args COMMA scoped_id { | purge_args COMMA scoped_sym_no_anon {
$$ = std::move($1); $$ = std::move($1);
$$.push_back($3); $$.push_back($3);
} }
@@ -1113,7 +1095,7 @@ export_list:
; ;
export_list_entry: export_list_entry:
scoped_id { scoped_sym_no_anon {
sym_Export($1); sym_Export($1);
} }
; ;
@@ -1171,16 +1153,16 @@ charmap_args:
; ;
newcharmap: newcharmap:
POP_NEWCHARMAP ID { POP_NEWCHARMAP SYMBOL {
charmap_New($2, nullptr); charmap_New($2, nullptr);
} }
| POP_NEWCHARMAP ID COMMA ID { | POP_NEWCHARMAP SYMBOL COMMA SYMBOL {
charmap_New($2, &$4); charmap_New($2, &$4);
} }
; ;
setcharmap: setcharmap:
POP_SETCHARMAP ID { POP_SETCHARMAP SYMBOL {
charmap_Set($2); charmap_Set($2);
} }
; ;
@@ -1192,7 +1174,7 @@ pushc:
; ;
pushc_setcharmap: pushc_setcharmap:
POP_PUSHC ID { POP_PUSHC SYMBOL {
charmap_Push(); charmap_Push();
charmap_Set($2); charmap_Set($2);
} }
@@ -1325,7 +1307,7 @@ relocexpr:
; ;
relocexpr_no_str: relocexpr_no_str:
scoped_anon_id { scoped_sym {
$$.makeSymbol($1); $$.makeSymbol($1);
} }
| NUMBER { | NUMBER {
@@ -1418,8 +1400,8 @@ relocexpr_no_str:
| OP_ISCONST LPAREN relocexpr RPAREN { | OP_ISCONST LPAREN relocexpr RPAREN {
$$.makeNumber($3.isKnown()); $$.makeNumber($3.isKnown());
} }
| OP_BANK LPAREN scoped_anon_id RPAREN { | OP_BANK LPAREN scoped_sym RPAREN {
// '@' is also an ID; it is handled here // '@' is also a SYMBOL; it is handled here
$$.makeBankSymbol($3); $$.makeBankSymbol($3);
} }
| OP_BANK LPAREN string RPAREN { | OP_BANK LPAREN string RPAREN {
@@ -1439,7 +1421,7 @@ relocexpr_no_str:
} }
| OP_DEF { | OP_DEF {
lexer_ToggleStringExpansion(false); lexer_ToggleStringExpansion(false);
} LPAREN scoped_anon_id RPAREN { } LPAREN scoped_sym RPAREN {
$$.makeNumber(sym_FindScopedValidSymbol($4) != nullptr); $$.makeNumber(sym_FindScopedValidSymbol($4) != nullptr);
lexer_ToggleStringExpansion(true); lexer_ToggleStringExpansion(true);
} }
@@ -1585,7 +1567,7 @@ string:
| OP_STRFMT LPAREN strfmt_args RPAREN { | OP_STRFMT LPAREN strfmt_args RPAREN {
$$ = strfmt($3.format, $3.args); $$ = strfmt($3.format, $3.args);
} }
| POP_SECTION LPAREN scoped_anon_id RPAREN { | POP_SECTION LPAREN scoped_sym RPAREN {
Symbol *sym = sym_FindScopedValidSymbol($3); Symbol *sym = sym_FindScopedValidSymbol($3);
if (!sym) { if (!sym) {

View File

@@ -5,7 +5,7 @@ error: anon-label-bad.asm(6):
error: anon-label-bad.asm(9): error: anon-label-bad.asm(9):
syntax error, unexpected anonymous label syntax error, unexpected anonymous label
error: anon-label-bad.asm(10): error: anon-label-bad.asm(10):
syntax error, unexpected anonymous label, expecting label or identifier or local identifier syntax error, unexpected anonymous label, expecting symbol or label or local label
error: anon-label-bad.asm(22): error: anon-label-bad.asm(22):
syntax error, unexpected :: syntax error, unexpected ::
error: Assembly aborted (5 errors)! error: Assembly aborted (5 errors)!

View File

@@ -1,7 +1,7 @@
error: def-scoped.asm(10): error: def-scoped.asm(10):
syntax error, unexpected local identifier, expecting identifier syntax error, unexpected local label, expecting symbol
error: def-scoped.asm(13): error: def-scoped.asm(13):
syntax error, unexpected local identifier, expecting identifier syntax error, unexpected local label, expecting symbol
error: def-scoped.asm(16): error: def-scoped.asm(16):
syntax error, unexpected local identifier, expecting identifier syntax error, unexpected local label, expecting symbol
error: Assembly aborted (3 errors)! error: Assembly aborted (3 errors)!

View File

@@ -1,5 +1,5 @@
error: error-recovery.asm(3): error: error-recovery.asm(3):
syntax error, unexpected number syntax error, unexpected number
error: error-recovery.asm(5) -> error-recovery.asm::REPT~1(7): error: error-recovery.asm(5) -> error-recovery.asm::REPT~1(7):
syntax error, unexpected identifier syntax error, unexpected symbol
error: Assembly aborted (2 errors)! error: Assembly aborted (2 errors)!

View File

@@ -1,15 +1,15 @@
error: label-macro-arg.asm(38) -> label-macro-arg.asm::test_char(25): error: label-macro-arg.asm(38) -> label-macro-arg.asm::test_char(25):
syntax error, unexpected local identifier, expecting identifier syntax error, unexpected local label, expecting symbol
while expanding symbol "VAR_DEF" while expanding symbol "VAR_DEF"
error: label-macro-arg.asm(38) -> label-macro-arg.asm::test_char(26): error: label-macro-arg.asm(38) -> label-macro-arg.asm::test_char(26):
syntax error, unexpected local identifier, expecting identifier syntax error, unexpected local label, expecting symbol
error: label-macro-arg.asm(38) -> label-macro-arg.asm::test_char(29): error: label-macro-arg.asm(38) -> label-macro-arg.asm::test_char(29):
Interpolated symbol "sizeof_.something" does not exist Interpolated symbol "sizeof_.something" does not exist
error: label-macro-arg.asm(39) -> label-macro-arg.asm::test_char(25): error: label-macro-arg.asm(39) -> label-macro-arg.asm::test_char(25):
syntax error, unexpected label, expecting identifier syntax error, unexpected label, expecting symbol
while expanding symbol "VAR_DEF" while expanding symbol "VAR_DEF"
error: label-macro-arg.asm(39) -> label-macro-arg.asm::test_char(26): error: label-macro-arg.asm(39) -> label-macro-arg.asm::test_char(26):
syntax error, unexpected label, expecting identifier syntax error, unexpected label, expecting symbol
error: label-macro-arg.asm(39) -> label-macro-arg.asm::test_char(29): error: label-macro-arg.asm(39) -> label-macro-arg.asm::test_char(29):
Invalid format spec 'sizeof_' Invalid format spec 'sizeof_'
error: label-macro-arg.asm(39) -> label-macro-arg.asm::test_char(29): error: label-macro-arg.asm(39) -> label-macro-arg.asm::test_char(29):

View File

@@ -9,9 +9,9 @@ error: null-outside-string.asm(4):
error: null-outside-string.asm(6): error: null-outside-string.asm(6):
Begun line continuation, but encountered character 'X' Begun line continuation, but encountered character 'X'
error: null-outside-string.asm(6): error: null-outside-string.asm(6):
syntax error, unexpected identifier syntax error, unexpected symbol
error: null-outside-string.asm(7): error: null-outside-string.asm(7):
Begun line continuation, but encountered character 'X' Begun line continuation, but encountered character 'X'
error: null-outside-string.asm(7): error: null-outside-string.asm(7):
syntax error, unexpected identifier syntax error, unexpected symbol
error: Assembly aborted (8 errors)! error: Assembly aborted (8 errors)!

View File

@@ -1,5 +1,5 @@
error: syntax-error-lexer-mode.asm(7): error: syntax-error-lexer-mode.asm(7):
syntax error, unexpected af syntax error, unexpected af
error: syntax-error-lexer-mode.asm(11): error: syntax-error-lexer-mode.asm(11):
syntax error, unexpected af, expecting identifier syntax error, unexpected af, expecting symbol
error: Assembly aborted (2 errors)! error: Assembly aborted (2 errors)!