mirror of
https://github.com/gbdev/rgbds.git
synced 2026-05-08 10:59:36 +00:00
Consolidate, refactor, and bugfix the lexer's handling of captures and skips (#1957)
- Do not error about local labels following keywords in skips or captures (fixes #1955) - Do not incompletely attempt to handle line continuations in skips (fixes #1956) - Rename `skipToLeadingIdentifier` to `skipToLeadingKeyword`, refactor to merge `skipToEOL` into it, and use it for both skips and captures
This commit is contained in:
+44
-57
@@ -2142,42 +2142,39 @@ finish: // Can't `break` out of a nested `for`-`switch`
|
|||||||
return Token(T_(YYEOF));
|
return Token(T_(YYEOF));
|
||||||
}
|
}
|
||||||
|
|
||||||
static int skipPastEOL() {
|
// This function is called when capturing `REPT`/`FOR` loops and `MACRO` bodies,
|
||||||
if (lexerState->atLineStart) {
|
// and when skipping unexecuted `IF`/`ELIF`/`ELSE` blocks and `REPT`/`FOR` loops.
|
||||||
lexerState->atLineStart = false;
|
// It expects that these constructs' `ENDC`/`ENDR`/`ENDM` closing tokens are only
|
||||||
return skipChars(isBlankSpace);
|
// valid at the start of their lines, which enables ignoring everything except
|
||||||
}
|
// the leading keyword in lines that have one (as well as line continuations).
|
||||||
|
//
|
||||||
|
// Note that when these constructs are *evaluated*, they can perform expansions
|
||||||
|
// (for macro args, interpolations, and macro invocations) which may produce
|
||||||
|
// tokens that would change how these constructs were captured or skipped, if
|
||||||
|
// they had been produced during the capture/skip non-evaluating phase.
|
||||||
|
static Token skipToLeadingKeyword() {
|
||||||
for (;;) {
|
for (;;) {
|
||||||
|
if (lexerState->atLineStart) {
|
||||||
|
lexerState->atLineStart = false;
|
||||||
|
if (int c = skipChars(isBlankSpace); c == EOF) {
|
||||||
|
return Token(T_(YYEOF));
|
||||||
|
} else if (startsIdentifier(c) && c != '.') {
|
||||||
|
shiftChar();
|
||||||
|
std::string keyword(1, c);
|
||||||
|
for (c = peek(); continuesIdentifier(c) && c != '.'; c = nextChar()) {
|
||||||
|
keyword += c;
|
||||||
|
}
|
||||||
|
if (auto search = keywords.find(keyword); search != keywords.end()) {
|
||||||
|
return Token(search->second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
if (int c = bumpChar(); c == EOF) {
|
if (int c = bumpChar(); c == EOF) {
|
||||||
return EOF;
|
return Token(T_(YYEOF));
|
||||||
} else if (isNewline(c)) {
|
} else if (isNewline(c)) {
|
||||||
handleCRLF(c);
|
handleCRLF(c);
|
||||||
nextLine();
|
nextLine();
|
||||||
return skipChars(isBlankSpace);
|
lexerState->atLineStart = true;
|
||||||
} else if (c == '\\') {
|
|
||||||
// Unconditionally skip the next char, including line continuations
|
|
||||||
c = bumpChar();
|
|
||||||
if (isNewline(c)) {
|
|
||||||
handleCRLF(c);
|
|
||||||
nextLine();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// This function uses the fact that `IF` and `REPT` constructs are only valid
|
|
||||||
// when there's nothing before them on their lines. This enables filtering
|
|
||||||
// "meaningful" tokens (at line start) vs. "meaningless" (everything else) ones.
|
|
||||||
// It's especially important due to macro args not being handled in this
|
|
||||||
// state, and lexing them in "normal" mode potentially producing such tokens.
|
|
||||||
static Token skipToLeadingIdentifier() {
|
|
||||||
for (;;) {
|
|
||||||
if (int c = skipPastEOL(); c == EOF) {
|
|
||||||
return Token(T_(YYEOF));
|
|
||||||
} else if (startsIdentifier(c)) {
|
|
||||||
shiftChar();
|
|
||||||
return readIdentifier(c, false);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2187,7 +2184,7 @@ static Token skipIfBlock(bool toEndc) {
|
|||||||
|
|
||||||
Defer reenableExpansions = scopedDisableExpansions();
|
Defer reenableExpansions = scopedDisableExpansions();
|
||||||
for (uint32_t startingDepth = lexer_GetIFDepth();;) {
|
for (uint32_t startingDepth = lexer_GetIFDepth();;) {
|
||||||
switch (Token token = skipToLeadingIdentifier(); token.type) {
|
switch (Token token = skipToLeadingKeyword(); token.type) {
|
||||||
case T_(YYEOF):
|
case T_(YYEOF):
|
||||||
return token;
|
return token;
|
||||||
|
|
||||||
@@ -2241,7 +2238,7 @@ static Token yylex_SKIP_TO_ENDR() {
|
|||||||
// context, which yields an EOF.
|
// context, which yields an EOF.
|
||||||
Defer reenableExpansions = scopedDisableExpansions();
|
Defer reenableExpansions = scopedDisableExpansions();
|
||||||
for (;;) {
|
for (;;) {
|
||||||
switch (Token token = skipToLeadingIdentifier(); token.type) {
|
switch (Token token = skipToLeadingKeyword(); token.type) {
|
||||||
case T_(YYEOF):
|
case T_(YYEOF):
|
||||||
return token;
|
return token;
|
||||||
|
|
||||||
@@ -2323,38 +2320,28 @@ static Capture makeCapture(char const *name, CallbackFnT callback) {
|
|||||||
assume(capture.span.ptr == nullptr);
|
assume(capture.span.ptr == nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
nextLine();
|
||||||
|
|
||||||
Defer reenableExpansions = scopedDisableExpansions();
|
Defer reenableExpansions = scopedDisableExpansions();
|
||||||
for (;;) {
|
for (;;) {
|
||||||
nextLine();
|
if (Token token = skipToLeadingKeyword(); token.type == T_(YYEOF)) {
|
||||||
|
|
||||||
if (int c = skipChars(isBlankSpace); startsIdentifier(c)) {
|
|
||||||
shiftChar();
|
|
||||||
int tokenType = readIdentifier(c, false).type;
|
|
||||||
if (size_t endTokenLength = callback(tokenType); endTokenLength > 0) {
|
|
||||||
if (!capture.span.ptr) {
|
|
||||||
// Retrieve the capture buffer now that we're done capturing
|
|
||||||
capture.span.ptr = lexerState->makeSharedCaptureBufPtr();
|
|
||||||
}
|
|
||||||
// Subtract the length of the ending token; we know we have read it exactly, not
|
|
||||||
// e.g. an interpolation or EQUS expansion, since those are disabled.
|
|
||||||
capture.span.size = lexerState->captureSize - endTokenLength;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Just consume characters until EOL or EOF
|
|
||||||
if (int c = skipChars([](int d) { return d != EOF && !isNewline(d); }); c == EOF) {
|
|
||||||
error("Unterminated %s", name);
|
error("Unterminated %s", name);
|
||||||
capture.span = {.ptr = nullptr, .size = lexerState->captureSize};
|
capture.span = {.ptr = nullptr, .size = lexerState->captureSize};
|
||||||
break;
|
break;
|
||||||
} else {
|
} else if (size_t endTokenLength = callback(token.type); endTokenLength > 0) {
|
||||||
assume(isNewline(c));
|
if (!capture.span.ptr) {
|
||||||
shiftChar();
|
// Retrieve the capture buffer now that we're done capturing
|
||||||
handleCRLF(c);
|
capture.span.ptr = lexerState->makeSharedCaptureBufPtr();
|
||||||
|
}
|
||||||
|
// Subtract the length of the ending token; we know we have read it exactly,
|
||||||
|
// not e.g. an interpolation or EQUS expansion, since those are disabled.
|
||||||
|
capture.span.size = lexerState->captureSize - endTokenLength;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
lexerState->atLineStart = false; // The ending token or EOF puts us past the start of the line
|
assume(!lexerState->atLineStart); // `skipToLeadingKeyword` moves past the start of the line
|
||||||
|
|
||||||
lexerState->capturing = false;
|
lexerState->capturing = false;
|
||||||
lexerState->captureBuf = nullptr;
|
lexerState->captureBuf = nullptr;
|
||||||
return capture;
|
return capture;
|
||||||
|
|||||||
@@ -0,0 +1,34 @@
|
|||||||
|
MACRO m
|
||||||
|
ENDM
|
||||||
|
|
||||||
|
IF 0
|
||||||
|
m
|
||||||
|
ENDC
|
||||||
|
|
||||||
|
IF 0
|
||||||
|
m \
|
||||||
|
ENDC
|
||||||
|
|
||||||
|
IF 1
|
||||||
|
m
|
||||||
|
ELSE
|
||||||
|
m
|
||||||
|
ENDC
|
||||||
|
|
||||||
|
IF 1
|
||||||
|
m
|
||||||
|
ELSE
|
||||||
|
m \
|
||||||
|
ENDC
|
||||||
|
|
||||||
|
IF 1
|
||||||
|
m
|
||||||
|
ELIF 0
|
||||||
|
m
|
||||||
|
ENDC
|
||||||
|
|
||||||
|
IF 1
|
||||||
|
m
|
||||||
|
ELIF 0
|
||||||
|
m \
|
||||||
|
ENDC
|
||||||
@@ -0,0 +1,20 @@
|
|||||||
|
section "test", rom0
|
||||||
|
|
||||||
|
if 0
|
||||||
|
section.local "oops"
|
||||||
|
else
|
||||||
|
println "*sips coffee*"
|
||||||
|
endc
|
||||||
|
|
||||||
|
rept 0
|
||||||
|
assert.local "lol"
|
||||||
|
endr
|
||||||
|
rept 1
|
||||||
|
println "this is fine"
|
||||||
|
endr
|
||||||
|
|
||||||
|
macro m
|
||||||
|
db.local 42
|
||||||
|
endm
|
||||||
|
|
||||||
|
db.local 123
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
error: Identifier "db.local" begins with a keyword; did you mean to put a space between them?
|
||||||
|
at local-after-keyword.asm(20)
|
||||||
|
error: syntax error, unexpected number
|
||||||
|
at local-after-keyword.asm(20)
|
||||||
|
Assembly aborted with 2 errors
|
||||||
@@ -0,0 +1,2 @@
|
|||||||
|
*sips coffee*
|
||||||
|
this is fine
|
||||||
Reference in New Issue
Block a user