Consolidate, refactor, and bugfix the lexer's handling of captures and skips (#1957)

- Do not error about local labels following keywords in skips or captures (fixes #1955)
- Do not incompletely attempt to handle line continuations in skips (fixes #1956)
- Rename `skipToLeadingIdentifier` to `skipToLeadingKeyword`, refactor to merge
  `skipToEOL` into it, and use it for both skips and captures
This commit is contained in:
Rangi
2026-04-20 13:04:20 +02:00
committed by GitHub
parent 12186fdccc
commit cfec017fed
5 changed files with 105 additions and 57 deletions
+44 -57
View File
@@ -2142,42 +2142,39 @@ finish: // Can't `break` out of a nested `for`-`switch`
return Token(T_(YYEOF)); return Token(T_(YYEOF));
} }
static int skipPastEOL() { // This function is called when capturing `REPT`/`FOR` loops and `MACRO` bodies,
if (lexerState->atLineStart) { // and when skipping unexecuted `IF`/`ELIF`/`ELSE` blocks and `REPT`/`FOR` loops.
lexerState->atLineStart = false; // It expects that these constructs' `ENDC`/`ENDR`/`ENDM` closing tokens are only
return skipChars(isBlankSpace); // valid at the start of their lines, which enables ignoring everything except
} // the leading keyword in lines that have one (as well as line continuations).
//
// Note that when these constructs are *evaluated*, they can perform expansions
// (for macro args, interpolations, and macro invocations) which may produce
// tokens that would change how these constructs were captured or skipped, if
// they had been produced during the capture/skip non-evaluating phase.
static Token skipToLeadingKeyword() {
for (;;) { for (;;) {
if (lexerState->atLineStart) {
lexerState->atLineStart = false;
if (int c = skipChars(isBlankSpace); c == EOF) {
return Token(T_(YYEOF));
} else if (startsIdentifier(c) && c != '.') {
shiftChar();
std::string keyword(1, c);
for (c = peek(); continuesIdentifier(c) && c != '.'; c = nextChar()) {
keyword += c;
}
if (auto search = keywords.find(keyword); search != keywords.end()) {
return Token(search->second);
}
}
}
if (int c = bumpChar(); c == EOF) { if (int c = bumpChar(); c == EOF) {
return EOF; return Token(T_(YYEOF));
} else if (isNewline(c)) { } else if (isNewline(c)) {
handleCRLF(c); handleCRLF(c);
nextLine(); nextLine();
return skipChars(isBlankSpace); lexerState->atLineStart = true;
} else if (c == '\\') {
// Unconditionally skip the next char, including line continuations
c = bumpChar();
if (isNewline(c)) {
handleCRLF(c);
nextLine();
}
}
}
}
// This function uses the fact that `IF` and `REPT` constructs are only valid
// when there's nothing before them on their lines. This enables filtering
// "meaningful" tokens (at line start) vs. "meaningless" (everything else) ones.
// It's especially important due to macro args not being handled in this
// state, and lexing them in "normal" mode potentially producing such tokens.
static Token skipToLeadingIdentifier() {
for (;;) {
if (int c = skipPastEOL(); c == EOF) {
return Token(T_(YYEOF));
} else if (startsIdentifier(c)) {
shiftChar();
return readIdentifier(c, false);
} }
} }
} }
@@ -2187,7 +2184,7 @@ static Token skipIfBlock(bool toEndc) {
Defer reenableExpansions = scopedDisableExpansions(); Defer reenableExpansions = scopedDisableExpansions();
for (uint32_t startingDepth = lexer_GetIFDepth();;) { for (uint32_t startingDepth = lexer_GetIFDepth();;) {
switch (Token token = skipToLeadingIdentifier(); token.type) { switch (Token token = skipToLeadingKeyword(); token.type) {
case T_(YYEOF): case T_(YYEOF):
return token; return token;
@@ -2241,7 +2238,7 @@ static Token yylex_SKIP_TO_ENDR() {
// context, which yields an EOF. // context, which yields an EOF.
Defer reenableExpansions = scopedDisableExpansions(); Defer reenableExpansions = scopedDisableExpansions();
for (;;) { for (;;) {
switch (Token token = skipToLeadingIdentifier(); token.type) { switch (Token token = skipToLeadingKeyword(); token.type) {
case T_(YYEOF): case T_(YYEOF):
return token; return token;
@@ -2323,38 +2320,28 @@ static Capture makeCapture(char const *name, CallbackFnT callback) {
assume(capture.span.ptr == nullptr); assume(capture.span.ptr == nullptr);
} }
nextLine();
Defer reenableExpansions = scopedDisableExpansions(); Defer reenableExpansions = scopedDisableExpansions();
for (;;) { for (;;) {
nextLine(); if (Token token = skipToLeadingKeyword(); token.type == T_(YYEOF)) {
if (int c = skipChars(isBlankSpace); startsIdentifier(c)) {
shiftChar();
int tokenType = readIdentifier(c, false).type;
if (size_t endTokenLength = callback(tokenType); endTokenLength > 0) {
if (!capture.span.ptr) {
// Retrieve the capture buffer now that we're done capturing
capture.span.ptr = lexerState->makeSharedCaptureBufPtr();
}
// Subtract the length of the ending token; we know we have read it exactly, not
// e.g. an interpolation or EQUS expansion, since those are disabled.
capture.span.size = lexerState->captureSize - endTokenLength;
break;
}
}
// Just consume characters until EOL or EOF
if (int c = skipChars([](int d) { return d != EOF && !isNewline(d); }); c == EOF) {
error("Unterminated %s", name); error("Unterminated %s", name);
capture.span = {.ptr = nullptr, .size = lexerState->captureSize}; capture.span = {.ptr = nullptr, .size = lexerState->captureSize};
break; break;
} else { } else if (size_t endTokenLength = callback(token.type); endTokenLength > 0) {
assume(isNewline(c)); if (!capture.span.ptr) {
shiftChar(); // Retrieve the capture buffer now that we're done capturing
handleCRLF(c); capture.span.ptr = lexerState->makeSharedCaptureBufPtr();
}
// Subtract the length of the ending token; we know we have read it exactly,
// not e.g. an interpolation or EQUS expansion, since those are disabled.
capture.span.size = lexerState->captureSize - endTokenLength;
break;
} }
} }
lexerState->atLineStart = false; // The ending token or EOF puts us past the start of the line assume(!lexerState->atLineStart); // `skipToLeadingKeyword` moves past the start of the line
lexerState->capturing = false; lexerState->capturing = false;
lexerState->captureBuf = nullptr; lexerState->captureBuf = nullptr;
return capture; return capture;
+34
View File
@@ -0,0 +1,34 @@
MACRO m
ENDM
IF 0
m
ENDC
IF 0
m \
ENDC
IF 1
m
ELSE
m
ENDC
IF 1
m
ELSE
m \
ENDC
IF 1
m
ELIF 0
m
ENDC
IF 1
m
ELIF 0
m \
ENDC
+20
View File
@@ -0,0 +1,20 @@
section "test", rom0
if 0
section.local "oops"
else
println "*sips coffee*"
endc
rept 0
assert.local "lol"
endr
rept 1
println "this is fine"
endr
macro m
db.local 42
endm
db.local 123
+5
View File
@@ -0,0 +1,5 @@
error: Identifier "db.local" begins with a keyword; did you mean to put a space between them?
at local-after-keyword.asm(20)
error: syntax error, unexpected number
at local-after-keyword.asm(20)
Assembly aborted with 2 errors
+2
View File
@@ -0,0 +1,2 @@
*sips coffee*
this is fine