mirror of
https://github.com/gbdev/rgbds.git
synced 2025-11-20 18:22:07 +00:00
Parser refers to "symbol"s, "label"s, and "local label"s, not "identifier"s (#1652)
This better matches how the lexed tokens are discussed in rgbasm(5)
This commit is contained in:
@@ -134,11 +134,9 @@ struct CaseInsensitive {
|
||||
}
|
||||
};
|
||||
|
||||
// Identifiers that are also keywords are listed here. This ONLY applies to ones
|
||||
// that would normally be matched as identifiers! Check out `yylex_NORMAL` to
|
||||
// see how this is used.
|
||||
// Tokens / keywords not handled here are handled in `yylex_NORMAL`'s switch.
|
||||
// This assumes that no two keywords have the same name.
|
||||
// This map lists all RGBASM keywords which `yylex_NORMAL` lexes as identifiers
|
||||
// (see `startsIdentifier` and `continuesIdentifier` below). All non-identifier
|
||||
// tokens are lexed separately.
|
||||
static std::unordered_map<std::string, int, CaseInsensitive, CaseInsensitive> keywordDict = {
|
||||
{"ADC", T_(SM83_ADC) },
|
||||
{"ADD", T_(SM83_ADD) },
|
||||
@@ -1179,7 +1177,7 @@ static uint32_t readGfxConstant() {
|
||||
return bitPlaneUpper << 8 | bitPlaneLower;
|
||||
}
|
||||
|
||||
// Functions to read identifiers & keywords
|
||||
// Functions to read identifiers and keywords
|
||||
|
||||
static bool startsIdentifier(int c) {
|
||||
// Anonymous labels internally start with '!'
|
||||
@@ -1192,18 +1190,18 @@ static bool continuesIdentifier(int c) {
|
||||
|
||||
static Token readIdentifier(char firstChar, bool raw) {
|
||||
std::string identifier(1, firstChar);
|
||||
int tokenType = firstChar == '.' ? T_(LOCAL_ID) : T_(ID);
|
||||
int tokenType = firstChar == '.' ? T_(LOCAL) : T_(SYMBOL);
|
||||
|
||||
// Continue reading while the char is in the symbol charset
|
||||
// Continue reading while the char is in the identifier charset
|
||||
for (int c = peek(); continuesIdentifier(c); c = peek()) {
|
||||
shiftChar();
|
||||
|
||||
// Write the char to the identifier's name
|
||||
identifier += c;
|
||||
|
||||
// If the char was a dot, mark the identifier as local
|
||||
// If the char was a dot, the identifier is a local label
|
||||
if (c == '.') {
|
||||
tokenType = T_(LOCAL_ID);
|
||||
tokenType = T_(LOCAL);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1219,7 +1217,7 @@ static Token readIdentifier(char firstChar, bool raw) {
|
||||
|
||||
// Label scopes `.` and `..` are the only nonlocal identifiers that start with a dot
|
||||
if (identifier.find_first_not_of('.') == identifier.npos) {
|
||||
tokenType = T_(ID);
|
||||
tokenType = T_(SYMBOL);
|
||||
}
|
||||
|
||||
return Token(tokenType, identifier);
|
||||
@@ -1276,7 +1274,7 @@ static std::shared_ptr<std::string> readInterpolation(size_t depth) {
|
||||
lexerState->disableInterpolation = disableInterpolation;
|
||||
|
||||
if (fmtBuf.starts_with('#')) {
|
||||
// Skip a '#' raw identifier prefix, but after expanding any nested interpolations.
|
||||
// Skip a '#' raw symbol prefix, but after expanding any nested interpolations.
|
||||
fmtBuf.erase(0, 1);
|
||||
} else if (keywordDict.find(fmtBuf) != keywordDict.end()) {
|
||||
// Don't allow symbols that alias keywords without a '#' prefix.
|
||||
@@ -1641,7 +1639,7 @@ static Token yylex_NORMAL() {
|
||||
|
||||
case '@': {
|
||||
std::string symName("@");
|
||||
return Token(T_(ID), symName);
|
||||
return Token(T_(SYMBOL), symName);
|
||||
}
|
||||
|
||||
case '[':
|
||||
@@ -1903,15 +1901,15 @@ static Token yylex_NORMAL() {
|
||||
}
|
||||
|
||||
// If a keyword, don't try to expand
|
||||
if (token.type != T_(ID) && token.type != T_(LOCAL_ID)) {
|
||||
if (token.type != T_(SYMBOL) && token.type != T_(LOCAL)) {
|
||||
return token;
|
||||
}
|
||||
|
||||
// `token` is either an `ID` or a `LOCAL_ID`, and both have a `std::string` value.
|
||||
// `token` is either a `SYMBOL` or a `LOCAL`, and both have a `std::string` value.
|
||||
assume(token.value.holds<std::string>());
|
||||
|
||||
// Local symbols cannot be string expansions
|
||||
if (token.type == T_(ID) && lexerState->expandStrings) {
|
||||
if (token.type == T_(SYMBOL) && lexerState->expandStrings) {
|
||||
// Attempt string expansion
|
||||
Symbol const *sym = sym_FindExactSymbol(token.value.get<std::string>());
|
||||
|
||||
@@ -1925,18 +1923,18 @@ static Token yylex_NORMAL() {
|
||||
}
|
||||
|
||||
// This is a "lexer hack"! We need it to distinguish between label definitions
|
||||
// (which start with `LABEL`) and macro invocations (which start with `ID`).
|
||||
// (which start with `LABEL`) and macro invocations (which start with `SYMBOL`).
|
||||
//
|
||||
// If we had one `IDENTIFIER` token, the parser would need to perform "lookahead"
|
||||
// to determine which rule applies. But since macros need to enter "raw" mode to
|
||||
// parse their arguments, which may not even be valid tokens in "normal" mode, we
|
||||
// cannot use lookahead to check for the presence of a `COLON`.
|
||||
//
|
||||
// Instead, we have separate `ID` and `LABEL` tokens, lexing as a `LABEL` if a ':'
|
||||
// character *immediately* follows the identifier. Thus, at the beginning of a line,
|
||||
// "Label:" and "mac:" are treated as label definitions, but "Label :" and "mac :"
|
||||
// are treated as macro invocations.
|
||||
if (token.type == T_(ID) && peek() == ':') {
|
||||
// Instead, we have separate `SYMBOL` and `LABEL` tokens, lexing as a `LABEL` if a
|
||||
// ':' character *immediately* follows the identifier. Thus, at the beginning of a
|
||||
// line, "Label:" and "mac:" are treated as label definitions, but "Label :" and
|
||||
// "mac :" are treated as macro invocations.
|
||||
if (token.type == T_(SYMBOL) && peek() == ':') {
|
||||
token.type = T_(LABEL);
|
||||
}
|
||||
|
||||
@@ -2390,7 +2388,7 @@ Capture lexer_CaptureRept() {
|
||||
do { // Discard initial whitespace
|
||||
c = nextChar();
|
||||
} while (isWhitespace(c));
|
||||
// Now, try to match `REPT`, `FOR` or `ENDR` as a **whole** identifier
|
||||
// Now, try to match `REPT`, `FOR` or `ENDR` as a **whole** keyword
|
||||
if (startsIdentifier(c)) {
|
||||
switch (readIdentifier(c, false).type) {
|
||||
case T_(POP_REPT):
|
||||
@@ -2443,7 +2441,7 @@ Capture lexer_CaptureMacro() {
|
||||
do { // Discard initial whitespace
|
||||
c = nextChar();
|
||||
} while (isWhitespace(c));
|
||||
// Now, try to match `ENDM` as a **whole** identifier
|
||||
// Now, try to match `ENDM` as a **whole** keyword
|
||||
if (startsIdentifier(c)) {
|
||||
switch (readIdentifier(c, false).type) {
|
||||
case T_(POP_ENDM):
|
||||
|
||||
@@ -313,18 +313,17 @@
|
||||
// Literals
|
||||
%token <int32_t> NUMBER "number"
|
||||
%token <std::string> STRING "string"
|
||||
%token <std::string> SYMBOL "symbol"
|
||||
%token <std::string> LABEL "label"
|
||||
%token <std::string> ID "identifier"
|
||||
%token <std::string> LOCAL_ID "local identifier"
|
||||
%token <std::string> LOCAL "local label"
|
||||
%token <std::string> ANON "anonymous label"
|
||||
|
||||
/******************** Data types ********************/
|
||||
|
||||
// The "no_str" types below are to distinguish numeric and string expressions, since many
|
||||
// contexts treat strings differently than numbers, e.g. `db "string"` or `print "string"`.
|
||||
|
||||
// RPN expressions
|
||||
%type <Expression> relocexpr
|
||||
// `relocexpr_no_str` exists because strings usually count as numeric expressions, but some
|
||||
// contexts treat numbers and strings differently, e.g. `db "string"` or `print "string"`.
|
||||
%type <Expression> relocexpr_no_str
|
||||
%type <Expression> reloc_8bit
|
||||
%type <Expression> reloc_8bit_offset
|
||||
@@ -355,8 +354,10 @@
|
||||
%type <std::string> def_rl
|
||||
%type <std::string> def_equs
|
||||
%type <std::string> redef_equs
|
||||
%type <std::string> scoped_id
|
||||
%type <std::string> scoped_anon_id
|
||||
%type <std::string> scoped_sym
|
||||
// `scoped_sym_no_anon` exists because anonymous labels usually count as "scoped symbols", but some
|
||||
// contexts treat anonymous labels and other labels/symbols differently, e.g. `purge` or `export`.
|
||||
%type <std::string> scoped_sym_no_anon
|
||||
|
||||
// SM83 instruction parameters
|
||||
%type <int32_t> reg_r
|
||||
@@ -513,7 +514,7 @@ endc:
|
||||
def_id:
|
||||
OP_DEF {
|
||||
lexer_ToggleStringExpansion(false);
|
||||
} ID {
|
||||
} SYMBOL {
|
||||
lexer_ToggleStringExpansion(true);
|
||||
$$ = std::move($3);
|
||||
}
|
||||
@@ -522,61 +523,42 @@ def_id:
|
||||
redef_id:
|
||||
POP_REDEF {
|
||||
lexer_ToggleStringExpansion(false);
|
||||
} ID {
|
||||
} SYMBOL {
|
||||
lexer_ToggleStringExpansion(true);
|
||||
$$ = std::move($3);
|
||||
}
|
||||
;
|
||||
|
||||
// LABEL covers identifiers followed by a double colon (e.g. `call Function::ret`,
|
||||
// to be read as `call Function :: ret`). This should not conflict with anything.
|
||||
scoped_id:
|
||||
ID {
|
||||
$$ = std::move($1);
|
||||
}
|
||||
| LOCAL_ID {
|
||||
$$ = std::move($1);
|
||||
}
|
||||
| LABEL {
|
||||
$$ = std::move($1);
|
||||
}
|
||||
;
|
||||
scoped_sym_no_anon: SYMBOL | LABEL | LOCAL;
|
||||
|
||||
scoped_anon_id:
|
||||
scoped_id {
|
||||
$$ = std::move($1);
|
||||
}
|
||||
| ANON {
|
||||
$$ = std::move($1);
|
||||
}
|
||||
;
|
||||
scoped_sym: scoped_sym_no_anon | ANON;
|
||||
|
||||
label:
|
||||
%empty
|
||||
| COLON {
|
||||
sym_AddAnonLabel();
|
||||
}
|
||||
| LOCAL_ID {
|
||||
sym_AddLocalLabel($1);
|
||||
}
|
||||
| LOCAL_ID COLON {
|
||||
sym_AddLocalLabel($1);
|
||||
}
|
||||
| LABEL COLON {
|
||||
sym_AddLabel($1);
|
||||
}
|
||||
| LOCAL_ID DOUBLE_COLON {
|
||||
sym_AddLocalLabel($1);
|
||||
sym_Export($1);
|
||||
}
|
||||
| LABEL DOUBLE_COLON {
|
||||
sym_AddLabel($1);
|
||||
sym_Export($1);
|
||||
}
|
||||
| LOCAL {
|
||||
sym_AddLocalLabel($1);
|
||||
}
|
||||
| LOCAL COLON {
|
||||
sym_AddLocalLabel($1);
|
||||
}
|
||||
| LOCAL DOUBLE_COLON {
|
||||
sym_AddLocalLabel($1);
|
||||
sym_Export($1);
|
||||
}
|
||||
| COLON {
|
||||
sym_AddAnonLabel();
|
||||
}
|
||||
;
|
||||
|
||||
macro:
|
||||
ID {
|
||||
SYMBOL {
|
||||
// Parsing 'macro_args' will restore the lexer's normal mode
|
||||
lexer_SetMode(LEXER_RAW);
|
||||
} macro_args {
|
||||
@@ -862,7 +844,7 @@ rept:
|
||||
for:
|
||||
POP_FOR {
|
||||
lexer_ToggleStringExpansion(false);
|
||||
} ID {
|
||||
} SYMBOL {
|
||||
lexer_ToggleStringExpansion(true);
|
||||
} COMMA for_args NEWLINE capture_rept endofline {
|
||||
if ($8.span.ptr) {
|
||||
@@ -906,7 +888,7 @@ break:
|
||||
def_macro:
|
||||
POP_MACRO {
|
||||
lexer_ToggleStringExpansion(false);
|
||||
} ID {
|
||||
} SYMBOL {
|
||||
lexer_ToggleStringExpansion(true);
|
||||
} NEWLINE capture_macro endofline {
|
||||
if ($6.span.ptr) {
|
||||
@@ -1096,10 +1078,10 @@ purge:
|
||||
;
|
||||
|
||||
purge_args:
|
||||
scoped_id {
|
||||
scoped_sym_no_anon {
|
||||
$$.push_back($1);
|
||||
}
|
||||
| purge_args COMMA scoped_id {
|
||||
| purge_args COMMA scoped_sym_no_anon {
|
||||
$$ = std::move($1);
|
||||
$$.push_back($3);
|
||||
}
|
||||
@@ -1113,7 +1095,7 @@ export_list:
|
||||
;
|
||||
|
||||
export_list_entry:
|
||||
scoped_id {
|
||||
scoped_sym_no_anon {
|
||||
sym_Export($1);
|
||||
}
|
||||
;
|
||||
@@ -1171,16 +1153,16 @@ charmap_args:
|
||||
;
|
||||
|
||||
newcharmap:
|
||||
POP_NEWCHARMAP ID {
|
||||
POP_NEWCHARMAP SYMBOL {
|
||||
charmap_New($2, nullptr);
|
||||
}
|
||||
| POP_NEWCHARMAP ID COMMA ID {
|
||||
| POP_NEWCHARMAP SYMBOL COMMA SYMBOL {
|
||||
charmap_New($2, &$4);
|
||||
}
|
||||
;
|
||||
|
||||
setcharmap:
|
||||
POP_SETCHARMAP ID {
|
||||
POP_SETCHARMAP SYMBOL {
|
||||
charmap_Set($2);
|
||||
}
|
||||
;
|
||||
@@ -1192,7 +1174,7 @@ pushc:
|
||||
;
|
||||
|
||||
pushc_setcharmap:
|
||||
POP_PUSHC ID {
|
||||
POP_PUSHC SYMBOL {
|
||||
charmap_Push();
|
||||
charmap_Set($2);
|
||||
}
|
||||
@@ -1325,7 +1307,7 @@ relocexpr:
|
||||
;
|
||||
|
||||
relocexpr_no_str:
|
||||
scoped_anon_id {
|
||||
scoped_sym {
|
||||
$$.makeSymbol($1);
|
||||
}
|
||||
| NUMBER {
|
||||
@@ -1418,8 +1400,8 @@ relocexpr_no_str:
|
||||
| OP_ISCONST LPAREN relocexpr RPAREN {
|
||||
$$.makeNumber($3.isKnown());
|
||||
}
|
||||
| OP_BANK LPAREN scoped_anon_id RPAREN {
|
||||
// '@' is also an ID; it is handled here
|
||||
| OP_BANK LPAREN scoped_sym RPAREN {
|
||||
// '@' is also a SYMBOL; it is handled here
|
||||
$$.makeBankSymbol($3);
|
||||
}
|
||||
| OP_BANK LPAREN string RPAREN {
|
||||
@@ -1439,7 +1421,7 @@ relocexpr_no_str:
|
||||
}
|
||||
| OP_DEF {
|
||||
lexer_ToggleStringExpansion(false);
|
||||
} LPAREN scoped_anon_id RPAREN {
|
||||
} LPAREN scoped_sym RPAREN {
|
||||
$$.makeNumber(sym_FindScopedValidSymbol($4) != nullptr);
|
||||
lexer_ToggleStringExpansion(true);
|
||||
}
|
||||
@@ -1585,7 +1567,7 @@ string:
|
||||
| OP_STRFMT LPAREN strfmt_args RPAREN {
|
||||
$$ = strfmt($3.format, $3.args);
|
||||
}
|
||||
| POP_SECTION LPAREN scoped_anon_id RPAREN {
|
||||
| POP_SECTION LPAREN scoped_sym RPAREN {
|
||||
Symbol *sym = sym_FindScopedValidSymbol($3);
|
||||
|
||||
if (!sym) {
|
||||
|
||||
@@ -5,7 +5,7 @@ error: anon-label-bad.asm(6):
|
||||
error: anon-label-bad.asm(9):
|
||||
syntax error, unexpected anonymous label
|
||||
error: anon-label-bad.asm(10):
|
||||
syntax error, unexpected anonymous label, expecting label or identifier or local identifier
|
||||
syntax error, unexpected anonymous label, expecting symbol or label or local label
|
||||
error: anon-label-bad.asm(22):
|
||||
syntax error, unexpected ::
|
||||
error: Assembly aborted (5 errors)!
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
error: def-scoped.asm(10):
|
||||
syntax error, unexpected local identifier, expecting identifier
|
||||
syntax error, unexpected local label, expecting symbol
|
||||
error: def-scoped.asm(13):
|
||||
syntax error, unexpected local identifier, expecting identifier
|
||||
syntax error, unexpected local label, expecting symbol
|
||||
error: def-scoped.asm(16):
|
||||
syntax error, unexpected local identifier, expecting identifier
|
||||
syntax error, unexpected local label, expecting symbol
|
||||
error: Assembly aborted (3 errors)!
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
error: error-recovery.asm(3):
|
||||
syntax error, unexpected number
|
||||
error: error-recovery.asm(5) -> error-recovery.asm::REPT~1(7):
|
||||
syntax error, unexpected identifier
|
||||
syntax error, unexpected symbol
|
||||
error: Assembly aborted (2 errors)!
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
error: label-macro-arg.asm(38) -> label-macro-arg.asm::test_char(25):
|
||||
syntax error, unexpected local identifier, expecting identifier
|
||||
syntax error, unexpected local label, expecting symbol
|
||||
while expanding symbol "VAR_DEF"
|
||||
error: label-macro-arg.asm(38) -> label-macro-arg.asm::test_char(26):
|
||||
syntax error, unexpected local identifier, expecting identifier
|
||||
syntax error, unexpected local label, expecting symbol
|
||||
error: label-macro-arg.asm(38) -> label-macro-arg.asm::test_char(29):
|
||||
Interpolated symbol "sizeof_.something" does not exist
|
||||
error: label-macro-arg.asm(39) -> label-macro-arg.asm::test_char(25):
|
||||
syntax error, unexpected label, expecting identifier
|
||||
syntax error, unexpected label, expecting symbol
|
||||
while expanding symbol "VAR_DEF"
|
||||
error: label-macro-arg.asm(39) -> label-macro-arg.asm::test_char(26):
|
||||
syntax error, unexpected label, expecting identifier
|
||||
syntax error, unexpected label, expecting symbol
|
||||
error: label-macro-arg.asm(39) -> label-macro-arg.asm::test_char(29):
|
||||
Invalid format spec 'sizeof_'
|
||||
error: label-macro-arg.asm(39) -> label-macro-arg.asm::test_char(29):
|
||||
|
||||
@@ -9,9 +9,9 @@ error: null-outside-string.asm(4):
|
||||
error: null-outside-string.asm(6):
|
||||
Begun line continuation, but encountered character 'X'
|
||||
error: null-outside-string.asm(6):
|
||||
syntax error, unexpected identifier
|
||||
syntax error, unexpected symbol
|
||||
error: null-outside-string.asm(7):
|
||||
Begun line continuation, but encountered character 'X'
|
||||
error: null-outside-string.asm(7):
|
||||
syntax error, unexpected identifier
|
||||
syntax error, unexpected symbol
|
||||
error: Assembly aborted (8 errors)!
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
error: syntax-error-lexer-mode.asm(7):
|
||||
syntax error, unexpected af
|
||||
error: syntax-error-lexer-mode.asm(11):
|
||||
syntax error, unexpected af, expecting identifier
|
||||
syntax error, unexpected af, expecting symbol
|
||||
error: Assembly aborted (2 errors)!
|
||||
|
||||
Reference in New Issue
Block a user