diff --git a/src/asm/lexer.cpp b/src/asm/lexer.cpp index 8801093d..49d5f4fc 100644 --- a/src/asm/lexer.cpp +++ b/src/asm/lexer.cpp @@ -2142,42 +2142,39 @@ finish: // Can't `break` out of a nested `for`-`switch` return Token(T_(YYEOF)); } -static int skipPastEOL() { - if (lexerState->atLineStart) { - lexerState->atLineStart = false; - return skipChars(isBlankSpace); - } - +// This function is called when capturing `REPT`/`FOR` loops and `MACRO` bodies, +// and when skipping unexecuted `IF`/`ELIF`/`ELSE` blocks and `REPT`/`FOR` loops. +// It expects that these constructs' `ENDC`/`ENDR`/`ENDM` closing tokens are only +// valid at the start of their lines, which enables ignoring everything except +// the leading keyword in lines that have one (as well as line continuations). +// +// Note that when these constructs are *evaluated*, they can perform expansions +// (for macro args, interpolations, and macro invocations) which may produce +// tokens that would change how these constructs were captured or skipped, if +// they had been produced during the capture/skip non-evaluating phase. +static Token skipToLeadingKeyword() { for (;;) { + if (lexerState->atLineStart) { + lexerState->atLineStart = false; + if (int c = skipChars(isBlankSpace); c == EOF) { + return Token(T_(YYEOF)); + } else if (startsIdentifier(c) && c != '.') { + shiftChar(); + std::string keyword(1, c); + for (c = peek(); continuesIdentifier(c) && c != '.'; c = nextChar()) { + keyword += c; + } + if (auto search = keywords.find(keyword); search != keywords.end()) { + return Token(search->second); + } + } + } if (int c = bumpChar(); c == EOF) { - return EOF; + return Token(T_(YYEOF)); } else if (isNewline(c)) { handleCRLF(c); nextLine(); - return skipChars(isBlankSpace); - } else if (c == '\\') { - // Unconditionally skip the next char, including line continuations - c = bumpChar(); - if (isNewline(c)) { - handleCRLF(c); - nextLine(); - } - } - } -} - -// This function uses the fact that `IF` and `REPT` constructs are only valid -// when there's nothing before them on their lines. This enables filtering -// "meaningful" tokens (at line start) vs. "meaningless" (everything else) ones. -// It's especially important due to macro args not being handled in this -// state, and lexing them in "normal" mode potentially producing such tokens. -static Token skipToLeadingIdentifier() { - for (;;) { - if (int c = skipPastEOL(); c == EOF) { - return Token(T_(YYEOF)); - } else if (startsIdentifier(c)) { - shiftChar(); - return readIdentifier(c, false); + lexerState->atLineStart = true; } } } @@ -2187,7 +2184,7 @@ static Token skipIfBlock(bool toEndc) { Defer reenableExpansions = scopedDisableExpansions(); for (uint32_t startingDepth = lexer_GetIFDepth();;) { - switch (Token token = skipToLeadingIdentifier(); token.type) { + switch (Token token = skipToLeadingKeyword(); token.type) { case T_(YYEOF): return token; @@ -2241,7 +2238,7 @@ static Token yylex_SKIP_TO_ENDR() { // context, which yields an EOF. Defer reenableExpansions = scopedDisableExpansions(); for (;;) { - switch (Token token = skipToLeadingIdentifier(); token.type) { + switch (Token token = skipToLeadingKeyword(); token.type) { case T_(YYEOF): return token; @@ -2323,38 +2320,28 @@ static Capture makeCapture(char const *name, CallbackFnT callback) { assume(capture.span.ptr == nullptr); } + nextLine(); + Defer reenableExpansions = scopedDisableExpansions(); for (;;) { - nextLine(); - - if (int c = skipChars(isBlankSpace); startsIdentifier(c)) { - shiftChar(); - int tokenType = readIdentifier(c, false).type; - if (size_t endTokenLength = callback(tokenType); endTokenLength > 0) { - if (!capture.span.ptr) { - // Retrieve the capture buffer now that we're done capturing - capture.span.ptr = lexerState->makeSharedCaptureBufPtr(); - } - // Subtract the length of the ending token; we know we have read it exactly, not - // e.g. an interpolation or EQUS expansion, since those are disabled. - capture.span.size = lexerState->captureSize - endTokenLength; - break; - } - } - - // Just consume characters until EOL or EOF - if (int c = skipChars([](int d) { return d != EOF && !isNewline(d); }); c == EOF) { + if (Token token = skipToLeadingKeyword(); token.type == T_(YYEOF)) { error("Unterminated %s", name); capture.span = {.ptr = nullptr, .size = lexerState->captureSize}; break; - } else { - assume(isNewline(c)); - shiftChar(); - handleCRLF(c); + } else if (size_t endTokenLength = callback(token.type); endTokenLength > 0) { + if (!capture.span.ptr) { + // Retrieve the capture buffer now that we're done capturing + capture.span.ptr = lexerState->makeSharedCaptureBufPtr(); + } + // Subtract the length of the ending token; we know we have read it exactly, + // not e.g. an interpolation or EQUS expansion, since those are disabled. + capture.span.size = lexerState->captureSize - endTokenLength; + break; } } - lexerState->atLineStart = false; // The ending token or EOF puts us past the start of the line + assume(!lexerState->atLineStart); // `skipToLeadingKeyword` moves past the start of the line + lexerState->capturing = false; lexerState->captureBuf = nullptr; return capture; diff --git a/test/asm/line-continuation-if.asm b/test/asm/line-continuation-if.asm new file mode 100644 index 00000000..ff809e8f --- /dev/null +++ b/test/asm/line-continuation-if.asm @@ -0,0 +1,34 @@ +MACRO m +ENDM + +IF 0 + m +ENDC + +IF 0 + m \ +ENDC + +IF 1 + m +ELSE + m +ENDC + +IF 1 + m +ELSE + m \ +ENDC + +IF 1 + m +ELIF 0 + m +ENDC + +IF 1 + m +ELIF 0 + m \ +ENDC diff --git a/test/asm/local-after-keyword.asm b/test/asm/local-after-keyword.asm new file mode 100644 index 00000000..296aad95 --- /dev/null +++ b/test/asm/local-after-keyword.asm @@ -0,0 +1,20 @@ +section "test", rom0 + +if 0 + section.local "oops" +else + println "*sips coffee*" +endc + +rept 0 + assert.local "lol" +endr +rept 1 + println "this is fine" +endr + +macro m + db.local 42 +endm + +db.local 123 diff --git a/test/asm/local-after-keyword.err b/test/asm/local-after-keyword.err new file mode 100644 index 00000000..75548ae1 --- /dev/null +++ b/test/asm/local-after-keyword.err @@ -0,0 +1,5 @@ +error: Identifier "db.local" begins with a keyword; did you mean to put a space between them? + at local-after-keyword.asm(20) +error: syntax error, unexpected number + at local-after-keyword.asm(20) +Assembly aborted with 2 errors diff --git a/test/asm/local-after-keyword.out b/test/asm/local-after-keyword.out new file mode 100644 index 00000000..1822e886 --- /dev/null +++ b/test/asm/local-after-keyword.out @@ -0,0 +1,2 @@ +*sips coffee* +this is fine