diff --git a/src/asm/lexer.cpp b/src/asm/lexer.cpp index 3027baf7..2cfa7298 100644 --- a/src/asm/lexer.cpp +++ b/src/asm/lexer.cpp @@ -134,11 +134,9 @@ struct CaseInsensitive { } }; -// Identifiers that are also keywords are listed here. This ONLY applies to ones -// that would normally be matched as identifiers! Check out `yylex_NORMAL` to -// see how this is used. -// Tokens / keywords not handled here are handled in `yylex_NORMAL`'s switch. -// This assumes that no two keywords have the same name. +// This map lists all RGBASM keywords which `yylex_NORMAL` lexes as identifiers +// (see `startsIdentifier` and `continuesIdentifier` below). All non-identifier +// tokens are lexed separately. static std::unordered_map keywordDict = { {"ADC", T_(SM83_ADC) }, {"ADD", T_(SM83_ADD) }, @@ -1179,7 +1177,7 @@ static uint32_t readGfxConstant() { return bitPlaneUpper << 8 | bitPlaneLower; } -// Functions to read identifiers & keywords +// Functions to read identifiers and keywords static bool startsIdentifier(int c) { // Anonymous labels internally start with '!' @@ -1192,18 +1190,18 @@ static bool continuesIdentifier(int c) { static Token readIdentifier(char firstChar, bool raw) { std::string identifier(1, firstChar); - int tokenType = firstChar == '.' ? T_(LOCAL_ID) : T_(ID); + int tokenType = firstChar == '.' ? T_(LOCAL) : T_(SYMBOL); - // Continue reading while the char is in the symbol charset + // Continue reading while the char is in the identifier charset for (int c = peek(); continuesIdentifier(c); c = peek()) { shiftChar(); // Write the char to the identifier's name identifier += c; - // If the char was a dot, mark the identifier as local + // If the char was a dot, the identifier is a local label if (c == '.') { - tokenType = T_(LOCAL_ID); + tokenType = T_(LOCAL); } } @@ -1219,7 +1217,7 @@ static Token readIdentifier(char firstChar, bool raw) { // Label scopes `.` and `..` are the only nonlocal identifiers that start with a dot if (identifier.find_first_not_of('.') == identifier.npos) { - tokenType = T_(ID); + tokenType = T_(SYMBOL); } return Token(tokenType, identifier); @@ -1276,7 +1274,7 @@ static std::shared_ptr readInterpolation(size_t depth) { lexerState->disableInterpolation = disableInterpolation; if (fmtBuf.starts_with('#')) { - // Skip a '#' raw identifier prefix, but after expanding any nested interpolations. + // Skip a '#' raw symbol prefix, but after expanding any nested interpolations. fmtBuf.erase(0, 1); } else if (keywordDict.find(fmtBuf) != keywordDict.end()) { // Don't allow symbols that alias keywords without a '#' prefix. @@ -1641,7 +1639,7 @@ static Token yylex_NORMAL() { case '@': { std::string symName("@"); - return Token(T_(ID), symName); + return Token(T_(SYMBOL), symName); } case '[': @@ -1903,15 +1901,15 @@ static Token yylex_NORMAL() { } // If a keyword, don't try to expand - if (token.type != T_(ID) && token.type != T_(LOCAL_ID)) { + if (token.type != T_(SYMBOL) && token.type != T_(LOCAL)) { return token; } - // `token` is either an `ID` or a `LOCAL_ID`, and both have a `std::string` value. + // `token` is either a `SYMBOL` or a `LOCAL`, and both have a `std::string` value. assume(token.value.holds()); // Local symbols cannot be string expansions - if (token.type == T_(ID) && lexerState->expandStrings) { + if (token.type == T_(SYMBOL) && lexerState->expandStrings) { // Attempt string expansion Symbol const *sym = sym_FindExactSymbol(token.value.get()); @@ -1925,18 +1923,18 @@ static Token yylex_NORMAL() { } // This is a "lexer hack"! We need it to distinguish between label definitions - // (which start with `LABEL`) and macro invocations (which start with `ID`). + // (which start with `LABEL`) and macro invocations (which start with `SYMBOL`). // // If we had one `IDENTIFIER` token, the parser would need to perform "lookahead" // to determine which rule applies. But since macros need to enter "raw" mode to // parse their arguments, which may not even be valid tokens in "normal" mode, we // cannot use lookahead to check for the presence of a `COLON`. // - // Instead, we have separate `ID` and `LABEL` tokens, lexing as a `LABEL` if a ':' - // character *immediately* follows the identifier. Thus, at the beginning of a line, - // "Label:" and "mac:" are treated as label definitions, but "Label :" and "mac :" - // are treated as macro invocations. - if (token.type == T_(ID) && peek() == ':') { + // Instead, we have separate `SYMBOL` and `LABEL` tokens, lexing as a `LABEL` if a + // ':' character *immediately* follows the identifier. Thus, at the beginning of a + // line, "Label:" and "mac:" are treated as label definitions, but "Label :" and + // "mac :" are treated as macro invocations. + if (token.type == T_(SYMBOL) && peek() == ':') { token.type = T_(LABEL); } @@ -2390,7 +2388,7 @@ Capture lexer_CaptureRept() { do { // Discard initial whitespace c = nextChar(); } while (isWhitespace(c)); - // Now, try to match `REPT`, `FOR` or `ENDR` as a **whole** identifier + // Now, try to match `REPT`, `FOR` or `ENDR` as a **whole** keyword if (startsIdentifier(c)) { switch (readIdentifier(c, false).type) { case T_(POP_REPT): @@ -2443,7 +2441,7 @@ Capture lexer_CaptureMacro() { do { // Discard initial whitespace c = nextChar(); } while (isWhitespace(c)); - // Now, try to match `ENDM` as a **whole** identifier + // Now, try to match `ENDM` as a **whole** keyword if (startsIdentifier(c)) { switch (readIdentifier(c, false).type) { case T_(POP_ENDM): diff --git a/src/asm/parser.y b/src/asm/parser.y index e6b82c5c..746f6279 100644 --- a/src/asm/parser.y +++ b/src/asm/parser.y @@ -313,18 +313,17 @@ // Literals %token NUMBER "number" %token STRING "string" +%token SYMBOL "symbol" %token LABEL "label" -%token ID "identifier" -%token LOCAL_ID "local identifier" +%token LOCAL "local label" %token ANON "anonymous label" /******************** Data types ********************/ -// The "no_str" types below are to distinguish numeric and string expressions, since many -// contexts treat strings differently than numbers, e.g. `db "string"` or `print "string"`. - // RPN expressions %type relocexpr +// `relocexpr_no_str` exists because strings usually count as numeric expressions, but some +// contexts treat numbers and strings differently, e.g. `db "string"` or `print "string"`. %type relocexpr_no_str %type reloc_8bit %type reloc_8bit_offset @@ -355,8 +354,10 @@ %type def_rl %type def_equs %type redef_equs -%type scoped_id -%type scoped_anon_id +%type scoped_sym +// `scoped_sym_no_anon` exists because anonymous labels usually count as "scoped symbols", but some +// contexts treat anonymous labels and other labels/symbols differently, e.g. `purge` or `export`. +%type scoped_sym_no_anon // SM83 instruction parameters %type reg_r @@ -513,7 +514,7 @@ endc: def_id: OP_DEF { lexer_ToggleStringExpansion(false); - } ID { + } SYMBOL { lexer_ToggleStringExpansion(true); $$ = std::move($3); } @@ -522,61 +523,42 @@ def_id: redef_id: POP_REDEF { lexer_ToggleStringExpansion(false); - } ID { + } SYMBOL { lexer_ToggleStringExpansion(true); $$ = std::move($3); } ; -// LABEL covers identifiers followed by a double colon (e.g. `call Function::ret`, -// to be read as `call Function :: ret`). This should not conflict with anything. -scoped_id: - ID { - $$ = std::move($1); - } - | LOCAL_ID { - $$ = std::move($1); - } - | LABEL { - $$ = std::move($1); - } -; +scoped_sym_no_anon: SYMBOL | LABEL | LOCAL; -scoped_anon_id: - scoped_id { - $$ = std::move($1); - } - | ANON { - $$ = std::move($1); - } -; +scoped_sym: scoped_sym_no_anon | ANON; label: %empty - | COLON { - sym_AddAnonLabel(); - } - | LOCAL_ID { - sym_AddLocalLabel($1); - } - | LOCAL_ID COLON { - sym_AddLocalLabel($1); - } | LABEL COLON { sym_AddLabel($1); } - | LOCAL_ID DOUBLE_COLON { - sym_AddLocalLabel($1); - sym_Export($1); - } | LABEL DOUBLE_COLON { sym_AddLabel($1); sym_Export($1); } + | LOCAL { + sym_AddLocalLabel($1); + } + | LOCAL COLON { + sym_AddLocalLabel($1); + } + | LOCAL DOUBLE_COLON { + sym_AddLocalLabel($1); + sym_Export($1); + } + | COLON { + sym_AddAnonLabel(); + } ; macro: - ID { + SYMBOL { // Parsing 'macro_args' will restore the lexer's normal mode lexer_SetMode(LEXER_RAW); } macro_args { @@ -862,7 +844,7 @@ rept: for: POP_FOR { lexer_ToggleStringExpansion(false); - } ID { + } SYMBOL { lexer_ToggleStringExpansion(true); } COMMA for_args NEWLINE capture_rept endofline { if ($8.span.ptr) { @@ -906,7 +888,7 @@ break: def_macro: POP_MACRO { lexer_ToggleStringExpansion(false); - } ID { + } SYMBOL { lexer_ToggleStringExpansion(true); } NEWLINE capture_macro endofline { if ($6.span.ptr) { @@ -1096,10 +1078,10 @@ purge: ; purge_args: - scoped_id { + scoped_sym_no_anon { $$.push_back($1); } - | purge_args COMMA scoped_id { + | purge_args COMMA scoped_sym_no_anon { $$ = std::move($1); $$.push_back($3); } @@ -1113,7 +1095,7 @@ export_list: ; export_list_entry: - scoped_id { + scoped_sym_no_anon { sym_Export($1); } ; @@ -1171,16 +1153,16 @@ charmap_args: ; newcharmap: - POP_NEWCHARMAP ID { + POP_NEWCHARMAP SYMBOL { charmap_New($2, nullptr); } - | POP_NEWCHARMAP ID COMMA ID { + | POP_NEWCHARMAP SYMBOL COMMA SYMBOL { charmap_New($2, &$4); } ; setcharmap: - POP_SETCHARMAP ID { + POP_SETCHARMAP SYMBOL { charmap_Set($2); } ; @@ -1192,7 +1174,7 @@ pushc: ; pushc_setcharmap: - POP_PUSHC ID { + POP_PUSHC SYMBOL { charmap_Push(); charmap_Set($2); } @@ -1325,7 +1307,7 @@ relocexpr: ; relocexpr_no_str: - scoped_anon_id { + scoped_sym { $$.makeSymbol($1); } | NUMBER { @@ -1418,8 +1400,8 @@ relocexpr_no_str: | OP_ISCONST LPAREN relocexpr RPAREN { $$.makeNumber($3.isKnown()); } - | OP_BANK LPAREN scoped_anon_id RPAREN { - // '@' is also an ID; it is handled here + | OP_BANK LPAREN scoped_sym RPAREN { + // '@' is also a SYMBOL; it is handled here $$.makeBankSymbol($3); } | OP_BANK LPAREN string RPAREN { @@ -1439,7 +1421,7 @@ relocexpr_no_str: } | OP_DEF { lexer_ToggleStringExpansion(false); - } LPAREN scoped_anon_id RPAREN { + } LPAREN scoped_sym RPAREN { $$.makeNumber(sym_FindScopedValidSymbol($4) != nullptr); lexer_ToggleStringExpansion(true); } @@ -1585,7 +1567,7 @@ string: | OP_STRFMT LPAREN strfmt_args RPAREN { $$ = strfmt($3.format, $3.args); } - | POP_SECTION LPAREN scoped_anon_id RPAREN { + | POP_SECTION LPAREN scoped_sym RPAREN { Symbol *sym = sym_FindScopedValidSymbol($3); if (!sym) { diff --git a/test/asm/anon-label-bad.err b/test/asm/anon-label-bad.err index 5fa5cf6a..31b225ba 100644 --- a/test/asm/anon-label-bad.err +++ b/test/asm/anon-label-bad.err @@ -5,7 +5,7 @@ error: anon-label-bad.asm(6): error: anon-label-bad.asm(9): syntax error, unexpected anonymous label error: anon-label-bad.asm(10): - syntax error, unexpected anonymous label, expecting label or identifier or local identifier + syntax error, unexpected anonymous label, expecting symbol or label or local label error: anon-label-bad.asm(22): syntax error, unexpected :: error: Assembly aborted (5 errors)! diff --git a/test/asm/def-scoped.err b/test/asm/def-scoped.err index ac3a412e..f357e2a7 100644 --- a/test/asm/def-scoped.err +++ b/test/asm/def-scoped.err @@ -1,7 +1,7 @@ error: def-scoped.asm(10): - syntax error, unexpected local identifier, expecting identifier + syntax error, unexpected local label, expecting symbol error: def-scoped.asm(13): - syntax error, unexpected local identifier, expecting identifier + syntax error, unexpected local label, expecting symbol error: def-scoped.asm(16): - syntax error, unexpected local identifier, expecting identifier + syntax error, unexpected local label, expecting symbol error: Assembly aborted (3 errors)! diff --git a/test/asm/error-recovery.err b/test/asm/error-recovery.err index c3fdd914..2ebd07c4 100644 --- a/test/asm/error-recovery.err +++ b/test/asm/error-recovery.err @@ -1,5 +1,5 @@ error: error-recovery.asm(3): syntax error, unexpected number error: error-recovery.asm(5) -> error-recovery.asm::REPT~1(7): - syntax error, unexpected identifier + syntax error, unexpected symbol error: Assembly aborted (2 errors)! diff --git a/test/asm/label-macro-arg.err b/test/asm/label-macro-arg.err index c2843b93..39f1e2cb 100644 --- a/test/asm/label-macro-arg.err +++ b/test/asm/label-macro-arg.err @@ -1,15 +1,15 @@ error: label-macro-arg.asm(38) -> label-macro-arg.asm::test_char(25): - syntax error, unexpected local identifier, expecting identifier + syntax error, unexpected local label, expecting symbol while expanding symbol "VAR_DEF" error: label-macro-arg.asm(38) -> label-macro-arg.asm::test_char(26): - syntax error, unexpected local identifier, expecting identifier + syntax error, unexpected local label, expecting symbol error: label-macro-arg.asm(38) -> label-macro-arg.asm::test_char(29): Interpolated symbol "sizeof_.something" does not exist error: label-macro-arg.asm(39) -> label-macro-arg.asm::test_char(25): - syntax error, unexpected label, expecting identifier + syntax error, unexpected label, expecting symbol while expanding symbol "VAR_DEF" error: label-macro-arg.asm(39) -> label-macro-arg.asm::test_char(26): - syntax error, unexpected label, expecting identifier + syntax error, unexpected label, expecting symbol error: label-macro-arg.asm(39) -> label-macro-arg.asm::test_char(29): Invalid format spec 'sizeof_' error: label-macro-arg.asm(39) -> label-macro-arg.asm::test_char(29): diff --git a/test/asm/null-outside-string.err b/test/asm/null-outside-string.err index f5261a8c..03d39622 100644 --- a/test/asm/null-outside-string.err +++ b/test/asm/null-outside-string.err @@ -9,9 +9,9 @@ error: null-outside-string.asm(4): error: null-outside-string.asm(6): Begun line continuation, but encountered character 'X' error: null-outside-string.asm(6): - syntax error, unexpected identifier + syntax error, unexpected symbol error: null-outside-string.asm(7): Begun line continuation, but encountered character 'X' error: null-outside-string.asm(7): - syntax error, unexpected identifier + syntax error, unexpected symbol error: Assembly aborted (8 errors)! diff --git a/test/asm/syntax-error-lexer-mode.err b/test/asm/syntax-error-lexer-mode.err index 9f18a14e..319cbe01 100644 --- a/test/asm/syntax-error-lexer-mode.err +++ b/test/asm/syntax-error-lexer-mode.err @@ -1,5 +1,5 @@ error: syntax-error-lexer-mode.asm(7): syntax error, unexpected af error: syntax-error-lexer-mode.asm(11): - syntax error, unexpected af, expecting identifier + syntax error, unexpected af, expecting symbol error: Assembly aborted (2 errors)!