mirror of
https://github.com/gbdev/rgbds.git
synced 2026-05-08 10:59:36 +00:00
Consolidate, refactor, and bugfix the lexer's handling of captures and skips (#1957)
- Do not error about local labels following keywords in skips or captures (fixes #1955) - Do not incompletely attempt to handle line continuations in skips (fixes #1956) - Rename `skipToLeadingIdentifier` to `skipToLeadingKeyword`, refactor to merge `skipToEOL` into it, and use it for both skips and captures
This commit is contained in:
+44
-57
@@ -2142,42 +2142,39 @@ finish: // Can't `break` out of a nested `for`-`switch`
|
||||
return Token(T_(YYEOF));
|
||||
}
|
||||
|
||||
static int skipPastEOL() {
|
||||
if (lexerState->atLineStart) {
|
||||
lexerState->atLineStart = false;
|
||||
return skipChars(isBlankSpace);
|
||||
}
|
||||
|
||||
// This function is called when capturing `REPT`/`FOR` loops and `MACRO` bodies,
|
||||
// and when skipping unexecuted `IF`/`ELIF`/`ELSE` blocks and `REPT`/`FOR` loops.
|
||||
// It expects that these constructs' `ENDC`/`ENDR`/`ENDM` closing tokens are only
|
||||
// valid at the start of their lines, which enables ignoring everything except
|
||||
// the leading keyword in lines that have one (as well as line continuations).
|
||||
//
|
||||
// Note that when these constructs are *evaluated*, they can perform expansions
|
||||
// (for macro args, interpolations, and macro invocations) which may produce
|
||||
// tokens that would change how these constructs were captured or skipped, if
|
||||
// they had been produced during the capture/skip non-evaluating phase.
|
||||
static Token skipToLeadingKeyword() {
|
||||
for (;;) {
|
||||
if (lexerState->atLineStart) {
|
||||
lexerState->atLineStart = false;
|
||||
if (int c = skipChars(isBlankSpace); c == EOF) {
|
||||
return Token(T_(YYEOF));
|
||||
} else if (startsIdentifier(c) && c != '.') {
|
||||
shiftChar();
|
||||
std::string keyword(1, c);
|
||||
for (c = peek(); continuesIdentifier(c) && c != '.'; c = nextChar()) {
|
||||
keyword += c;
|
||||
}
|
||||
if (auto search = keywords.find(keyword); search != keywords.end()) {
|
||||
return Token(search->second);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (int c = bumpChar(); c == EOF) {
|
||||
return EOF;
|
||||
return Token(T_(YYEOF));
|
||||
} else if (isNewline(c)) {
|
||||
handleCRLF(c);
|
||||
nextLine();
|
||||
return skipChars(isBlankSpace);
|
||||
} else if (c == '\\') {
|
||||
// Unconditionally skip the next char, including line continuations
|
||||
c = bumpChar();
|
||||
if (isNewline(c)) {
|
||||
handleCRLF(c);
|
||||
nextLine();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This function uses the fact that `IF` and `REPT` constructs are only valid
|
||||
// when there's nothing before them on their lines. This enables filtering
|
||||
// "meaningful" tokens (at line start) vs. "meaningless" (everything else) ones.
|
||||
// It's especially important due to macro args not being handled in this
|
||||
// state, and lexing them in "normal" mode potentially producing such tokens.
|
||||
static Token skipToLeadingIdentifier() {
|
||||
for (;;) {
|
||||
if (int c = skipPastEOL(); c == EOF) {
|
||||
return Token(T_(YYEOF));
|
||||
} else if (startsIdentifier(c)) {
|
||||
shiftChar();
|
||||
return readIdentifier(c, false);
|
||||
lexerState->atLineStart = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2187,7 +2184,7 @@ static Token skipIfBlock(bool toEndc) {
|
||||
|
||||
Defer reenableExpansions = scopedDisableExpansions();
|
||||
for (uint32_t startingDepth = lexer_GetIFDepth();;) {
|
||||
switch (Token token = skipToLeadingIdentifier(); token.type) {
|
||||
switch (Token token = skipToLeadingKeyword(); token.type) {
|
||||
case T_(YYEOF):
|
||||
return token;
|
||||
|
||||
@@ -2241,7 +2238,7 @@ static Token yylex_SKIP_TO_ENDR() {
|
||||
// context, which yields an EOF.
|
||||
Defer reenableExpansions = scopedDisableExpansions();
|
||||
for (;;) {
|
||||
switch (Token token = skipToLeadingIdentifier(); token.type) {
|
||||
switch (Token token = skipToLeadingKeyword(); token.type) {
|
||||
case T_(YYEOF):
|
||||
return token;
|
||||
|
||||
@@ -2323,38 +2320,28 @@ static Capture makeCapture(char const *name, CallbackFnT callback) {
|
||||
assume(capture.span.ptr == nullptr);
|
||||
}
|
||||
|
||||
nextLine();
|
||||
|
||||
Defer reenableExpansions = scopedDisableExpansions();
|
||||
for (;;) {
|
||||
nextLine();
|
||||
|
||||
if (int c = skipChars(isBlankSpace); startsIdentifier(c)) {
|
||||
shiftChar();
|
||||
int tokenType = readIdentifier(c, false).type;
|
||||
if (size_t endTokenLength = callback(tokenType); endTokenLength > 0) {
|
||||
if (!capture.span.ptr) {
|
||||
// Retrieve the capture buffer now that we're done capturing
|
||||
capture.span.ptr = lexerState->makeSharedCaptureBufPtr();
|
||||
}
|
||||
// Subtract the length of the ending token; we know we have read it exactly, not
|
||||
// e.g. an interpolation or EQUS expansion, since those are disabled.
|
||||
capture.span.size = lexerState->captureSize - endTokenLength;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Just consume characters until EOL or EOF
|
||||
if (int c = skipChars([](int d) { return d != EOF && !isNewline(d); }); c == EOF) {
|
||||
if (Token token = skipToLeadingKeyword(); token.type == T_(YYEOF)) {
|
||||
error("Unterminated %s", name);
|
||||
capture.span = {.ptr = nullptr, .size = lexerState->captureSize};
|
||||
break;
|
||||
} else {
|
||||
assume(isNewline(c));
|
||||
shiftChar();
|
||||
handleCRLF(c);
|
||||
} else if (size_t endTokenLength = callback(token.type); endTokenLength > 0) {
|
||||
if (!capture.span.ptr) {
|
||||
// Retrieve the capture buffer now that we're done capturing
|
||||
capture.span.ptr = lexerState->makeSharedCaptureBufPtr();
|
||||
}
|
||||
// Subtract the length of the ending token; we know we have read it exactly,
|
||||
// not e.g. an interpolation or EQUS expansion, since those are disabled.
|
||||
capture.span.size = lexerState->captureSize - endTokenLength;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
lexerState->atLineStart = false; // The ending token or EOF puts us past the start of the line
|
||||
assume(!lexerState->atLineStart); // `skipToLeadingKeyword` moves past the start of the line
|
||||
|
||||
lexerState->capturing = false;
|
||||
lexerState->captureBuf = nullptr;
|
||||
return capture;
|
||||
|
||||
@@ -0,0 +1,34 @@
|
||||
MACRO m
|
||||
ENDM
|
||||
|
||||
IF 0
|
||||
m
|
||||
ENDC
|
||||
|
||||
IF 0
|
||||
m \
|
||||
ENDC
|
||||
|
||||
IF 1
|
||||
m
|
||||
ELSE
|
||||
m
|
||||
ENDC
|
||||
|
||||
IF 1
|
||||
m
|
||||
ELSE
|
||||
m \
|
||||
ENDC
|
||||
|
||||
IF 1
|
||||
m
|
||||
ELIF 0
|
||||
m
|
||||
ENDC
|
||||
|
||||
IF 1
|
||||
m
|
||||
ELIF 0
|
||||
m \
|
||||
ENDC
|
||||
@@ -0,0 +1,20 @@
|
||||
section "test", rom0
|
||||
|
||||
if 0
|
||||
section.local "oops"
|
||||
else
|
||||
println "*sips coffee*"
|
||||
endc
|
||||
|
||||
rept 0
|
||||
assert.local "lol"
|
||||
endr
|
||||
rept 1
|
||||
println "this is fine"
|
||||
endr
|
||||
|
||||
macro m
|
||||
db.local 42
|
||||
endm
|
||||
|
||||
db.local 123
|
||||
@@ -0,0 +1,5 @@
|
||||
error: Identifier "db.local" begins with a keyword; did you mean to put a space between them?
|
||||
at local-after-keyword.asm(20)
|
||||
error: syntax error, unexpected number
|
||||
at local-after-keyword.asm(20)
|
||||
Assembly aborted with 2 errors
|
||||
@@ -0,0 +1,2 @@
|
||||
*sips coffee*
|
||||
this is fine
|
||||
Reference in New Issue
Block a user