mirror of
https://github.com/gbdev/rgbds.git
synced 2026-05-09 03:19:35 +00:00
Consolidate, refactor, and bugfix the lexer's handling of captures and skips (#1957)
- Do not error about local labels following keywords in skips or captures (fixes #1955) - Do not incompletely attempt to handle line continuations in skips (fixes #1956) - Rename `skipToLeadingIdentifier` to `skipToLeadingKeyword`, refactor to merge `skipToEOL` into it, and use it for both skips and captures
This commit is contained in:
+44
-57
@@ -2142,42 +2142,39 @@ finish: // Can't `break` out of a nested `for`-`switch`
|
||||
return Token(T_(YYEOF));
|
||||
}
|
||||
|
||||
static int skipPastEOL() {
|
||||
if (lexerState->atLineStart) {
|
||||
lexerState->atLineStart = false;
|
||||
return skipChars(isBlankSpace);
|
||||
}
|
||||
|
||||
// This function is called when capturing `REPT`/`FOR` loops and `MACRO` bodies,
|
||||
// and when skipping unexecuted `IF`/`ELIF`/`ELSE` blocks and `REPT`/`FOR` loops.
|
||||
// It expects that these constructs' `ENDC`/`ENDR`/`ENDM` closing tokens are only
|
||||
// valid at the start of their lines, which enables ignoring everything except
|
||||
// the leading keyword in lines that have one (as well as line continuations).
|
||||
//
|
||||
// Note that when these constructs are *evaluated*, they can perform expansions
|
||||
// (for macro args, interpolations, and macro invocations) which may produce
|
||||
// tokens that would change how these constructs were captured or skipped, if
|
||||
// they had been produced during the capture/skip non-evaluating phase.
|
||||
static Token skipToLeadingKeyword() {
|
||||
for (;;) {
|
||||
if (lexerState->atLineStart) {
|
||||
lexerState->atLineStart = false;
|
||||
if (int c = skipChars(isBlankSpace); c == EOF) {
|
||||
return Token(T_(YYEOF));
|
||||
} else if (startsIdentifier(c) && c != '.') {
|
||||
shiftChar();
|
||||
std::string keyword(1, c);
|
||||
for (c = peek(); continuesIdentifier(c) && c != '.'; c = nextChar()) {
|
||||
keyword += c;
|
||||
}
|
||||
if (auto search = keywords.find(keyword); search != keywords.end()) {
|
||||
return Token(search->second);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (int c = bumpChar(); c == EOF) {
|
||||
return EOF;
|
||||
return Token(T_(YYEOF));
|
||||
} else if (isNewline(c)) {
|
||||
handleCRLF(c);
|
||||
nextLine();
|
||||
return skipChars(isBlankSpace);
|
||||
} else if (c == '\\') {
|
||||
// Unconditionally skip the next char, including line continuations
|
||||
c = bumpChar();
|
||||
if (isNewline(c)) {
|
||||
handleCRLF(c);
|
||||
nextLine();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This function uses the fact that `IF` and `REPT` constructs are only valid
|
||||
// when there's nothing before them on their lines. This enables filtering
|
||||
// "meaningful" tokens (at line start) vs. "meaningless" (everything else) ones.
|
||||
// It's especially important due to macro args not being handled in this
|
||||
// state, and lexing them in "normal" mode potentially producing such tokens.
|
||||
static Token skipToLeadingIdentifier() {
|
||||
for (;;) {
|
||||
if (int c = skipPastEOL(); c == EOF) {
|
||||
return Token(T_(YYEOF));
|
||||
} else if (startsIdentifier(c)) {
|
||||
shiftChar();
|
||||
return readIdentifier(c, false);
|
||||
lexerState->atLineStart = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2187,7 +2184,7 @@ static Token skipIfBlock(bool toEndc) {
|
||||
|
||||
Defer reenableExpansions = scopedDisableExpansions();
|
||||
for (uint32_t startingDepth = lexer_GetIFDepth();;) {
|
||||
switch (Token token = skipToLeadingIdentifier(); token.type) {
|
||||
switch (Token token = skipToLeadingKeyword(); token.type) {
|
||||
case T_(YYEOF):
|
||||
return token;
|
||||
|
||||
@@ -2241,7 +2238,7 @@ static Token yylex_SKIP_TO_ENDR() {
|
||||
// context, which yields an EOF.
|
||||
Defer reenableExpansions = scopedDisableExpansions();
|
||||
for (;;) {
|
||||
switch (Token token = skipToLeadingIdentifier(); token.type) {
|
||||
switch (Token token = skipToLeadingKeyword(); token.type) {
|
||||
case T_(YYEOF):
|
||||
return token;
|
||||
|
||||
@@ -2323,38 +2320,28 @@ static Capture makeCapture(char const *name, CallbackFnT callback) {
|
||||
assume(capture.span.ptr == nullptr);
|
||||
}
|
||||
|
||||
nextLine();
|
||||
|
||||
Defer reenableExpansions = scopedDisableExpansions();
|
||||
for (;;) {
|
||||
nextLine();
|
||||
|
||||
if (int c = skipChars(isBlankSpace); startsIdentifier(c)) {
|
||||
shiftChar();
|
||||
int tokenType = readIdentifier(c, false).type;
|
||||
if (size_t endTokenLength = callback(tokenType); endTokenLength > 0) {
|
||||
if (!capture.span.ptr) {
|
||||
// Retrieve the capture buffer now that we're done capturing
|
||||
capture.span.ptr = lexerState->makeSharedCaptureBufPtr();
|
||||
}
|
||||
// Subtract the length of the ending token; we know we have read it exactly, not
|
||||
// e.g. an interpolation or EQUS expansion, since those are disabled.
|
||||
capture.span.size = lexerState->captureSize - endTokenLength;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Just consume characters until EOL or EOF
|
||||
if (int c = skipChars([](int d) { return d != EOF && !isNewline(d); }); c == EOF) {
|
||||
if (Token token = skipToLeadingKeyword(); token.type == T_(YYEOF)) {
|
||||
error("Unterminated %s", name);
|
||||
capture.span = {.ptr = nullptr, .size = lexerState->captureSize};
|
||||
break;
|
||||
} else {
|
||||
assume(isNewline(c));
|
||||
shiftChar();
|
||||
handleCRLF(c);
|
||||
} else if (size_t endTokenLength = callback(token.type); endTokenLength > 0) {
|
||||
if (!capture.span.ptr) {
|
||||
// Retrieve the capture buffer now that we're done capturing
|
||||
capture.span.ptr = lexerState->makeSharedCaptureBufPtr();
|
||||
}
|
||||
// Subtract the length of the ending token; we know we have read it exactly,
|
||||
// not e.g. an interpolation or EQUS expansion, since those are disabled.
|
||||
capture.span.size = lexerState->captureSize - endTokenLength;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
lexerState->atLineStart = false; // The ending token or EOF puts us past the start of the line
|
||||
assume(!lexerState->atLineStart); // `skipToLeadingKeyword` moves past the start of the line
|
||||
|
||||
lexerState->capturing = false;
|
||||
lexerState->captureBuf = nullptr;
|
||||
return capture;
|
||||
|
||||
Reference in New Issue
Block a user