mirror of
https://github.com/gbdev/rgbds.git
synced 2025-11-20 10:12:06 +00:00
Refactor some redundant lexer code
This commit is contained in:
@@ -257,6 +257,10 @@ static bool isWhitespace(int c) {
|
|||||||
return c == ' ' || c == '\t';
|
return c == ' ' || c == '\t';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool isNewline(int c) {
|
||||||
|
return c == '\r' || c == '\n';
|
||||||
|
}
|
||||||
|
|
||||||
static LexerState *lexerState = nullptr;
|
static LexerState *lexerState = nullptr;
|
||||||
static LexerState *lexerStateEOL = nullptr;
|
static LexerState *lexerStateEOL = nullptr;
|
||||||
|
|
||||||
@@ -288,6 +292,13 @@ static void nextLine() {
|
|||||||
++lexerState->lineNo;
|
++lexerState->lineNo;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void nextLineOutsideExpansion() {
|
||||||
|
// Newlines read within an expansion should not increase the line count
|
||||||
|
if (lexerState->expansions.empty()) {
|
||||||
|
nextLine();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
uint32_t lexer_GetIFDepth() {
|
uint32_t lexer_GetIFDepth() {
|
||||||
return lexerState->ifStack.size();
|
return lexerState->ifStack.size();
|
||||||
}
|
}
|
||||||
@@ -757,7 +768,9 @@ static int peek() {
|
|||||||
static void shiftChar() {
|
static void shiftChar() {
|
||||||
if (lexerState->capturing) {
|
if (lexerState->capturing) {
|
||||||
if (lexerState->captureBuf) {
|
if (lexerState->captureBuf) {
|
||||||
lexerState->captureBuf->push_back(peek());
|
int c = peek();
|
||||||
|
assume(c != EOF); // Avoid calling `shiftChar()` when it could be EOF while capturing
|
||||||
|
lexerState->captureBuf->push_back(c);
|
||||||
}
|
}
|
||||||
++lexerState->captureSize;
|
++lexerState->captureSize;
|
||||||
}
|
}
|
||||||
@@ -801,9 +814,7 @@ static bool consumeChar(int c) {
|
|||||||
|
|
||||||
static int bumpChar() {
|
static int bumpChar() {
|
||||||
int c = peek();
|
int c = peek();
|
||||||
if (c != EOF) {
|
|
||||||
shiftChar();
|
shiftChar();
|
||||||
}
|
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -872,9 +883,7 @@ static void discardBlockComment() {
|
|||||||
handleCRLF(c);
|
handleCRLF(c);
|
||||||
[[fallthrough]];
|
[[fallthrough]];
|
||||||
case '\n':
|
case '\n':
|
||||||
if (lexerState->expansions.empty()) {
|
nextLineOutsideExpansion();
|
||||||
nextLine();
|
|
||||||
}
|
|
||||||
continue;
|
continue;
|
||||||
case '/':
|
case '/':
|
||||||
if (peek() == '*') {
|
if (peek() == '*') {
|
||||||
@@ -895,23 +904,17 @@ static void discardBlockComment() {
|
|||||||
|
|
||||||
static void discardComment() {
|
static void discardComment() {
|
||||||
Defer reenableExpansions = scopedDisableExpansions();
|
Defer reenableExpansions = scopedDisableExpansions();
|
||||||
for (;; shiftChar()) {
|
skipChars([](int c) { return c != EOF && !isNewline(c); });
|
||||||
if (int c = peek(); c == EOF || c == '\r' || c == '\n') {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void discardLineContinuation() {
|
static void discardLineContinuation() {
|
||||||
for (;;) {
|
for (;;) {
|
||||||
if (int c = peek(); isWhitespace(c)) {
|
if (int c = peek(); isWhitespace(c)) {
|
||||||
shiftChar();
|
shiftChar();
|
||||||
} else if (c == '\r' || c == '\n') {
|
} else if (isNewline(c)) {
|
||||||
shiftChar();
|
shiftChar();
|
||||||
handleCRLF(c);
|
handleCRLF(c);
|
||||||
if (lexerState->expansions.empty()) {
|
nextLineOutsideExpansion();
|
||||||
nextLine();
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
} else if (c == ';') {
|
} else if (c == ';') {
|
||||||
discardComment();
|
discardComment();
|
||||||
@@ -1244,7 +1247,7 @@ static std::pair<Symbol const *, std::shared_ptr<std::string>> readInterpolation
|
|||||||
beginExpansion(interp.second, interp.first->name);
|
beginExpansion(interp.second, interp.first->name);
|
||||||
}
|
}
|
||||||
continue; // Restart, reading from the new buffer
|
continue; // Restart, reading from the new buffer
|
||||||
} else if (int c = peek(); c == EOF || c == '\r' || c == '\n' || c == '"') {
|
} else if (int c = peek(); c == EOF || isNewline(c) || c == '"') {
|
||||||
error("Missing }");
|
error("Missing }");
|
||||||
break;
|
break;
|
||||||
} else if (c == '}') {
|
} else if (c == '}') {
|
||||||
@@ -1464,7 +1467,7 @@ static void readString(std::string &str, bool rawString) {
|
|||||||
int c = peek();
|
int c = peek();
|
||||||
|
|
||||||
// '\r', '\n' or EOF ends a single-line string early
|
// '\r', '\n' or EOF ends a single-line string early
|
||||||
if (c == EOF || (!multiline && (c == '\r' || c == '\n'))) {
|
if (c == EOF || (!multiline && isNewline(c))) {
|
||||||
error("Unterminated string");
|
error("Unterminated string");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -1473,7 +1476,7 @@ static void readString(std::string &str, bool rawString) {
|
|||||||
shiftChar();
|
shiftChar();
|
||||||
|
|
||||||
// Handle '\r' or '\n' (in multiline strings only, already handled above otherwise)
|
// Handle '\r' or '\n' (in multiline strings only, already handled above otherwise)
|
||||||
if (c == '\r' || c == '\n') {
|
if (isNewline(c)) {
|
||||||
handleCRLF(c);
|
handleCRLF(c);
|
||||||
nextLine();
|
nextLine();
|
||||||
str += '\n';
|
str += '\n';
|
||||||
@@ -1948,7 +1951,7 @@ static Token yylex_RAW() {
|
|||||||
} else if (c == '\\') {
|
} else if (c == '\\') {
|
||||||
c = nextChar();
|
c = nextChar();
|
||||||
// If not a line continuation, handle as a normal char
|
// If not a line continuation, handle as a normal char
|
||||||
if (!isWhitespace(c) && c != '\n' && c != '\r') {
|
if (!isWhitespace(c) && !isNewline(c)) {
|
||||||
goto backslash;
|
goto backslash;
|
||||||
}
|
}
|
||||||
// Line continuations count as "whitespace"
|
// Line continuations count as "whitespace"
|
||||||
@@ -2096,9 +2099,10 @@ finish: // Can't `break` out of a nested `for`-`switch`
|
|||||||
if (!str.empty()) {
|
if (!str.empty()) {
|
||||||
return Token(T_(STRING), str);
|
return Token(T_(STRING), str);
|
||||||
}
|
}
|
||||||
|
|
||||||
lexer_SetMode(LEXER_NORMAL);
|
lexer_SetMode(LEXER_NORMAL);
|
||||||
|
|
||||||
if (c == '\r' || c == '\n') {
|
if (isNewline(c)) {
|
||||||
shiftChar();
|
shiftChar();
|
||||||
handleCRLF(c);
|
handleCRLF(c);
|
||||||
return Token(T_(NEWLINE));
|
return Token(T_(NEWLINE));
|
||||||
@@ -2107,54 +2111,57 @@ finish: // Can't `break` out of a nested `for`-`switch`
|
|||||||
return Token(T_(YYEOF));
|
return Token(T_(YYEOF));
|
||||||
}
|
}
|
||||||
|
|
||||||
// This function uses the fact that `if`, etc. constructs are only valid when
|
static int skipPastEOL() {
|
||||||
// there's nothing before them on their lines. This enables filtering
|
if (lexerState->atLineStart) {
|
||||||
// "meaningful" (= at line start) vs. "meaningless" (everything else) tokens.
|
lexerState->atLineStart = false;
|
||||||
// It's especially important due to macro args not being handled in this
|
return skipChars(isWhitespace);
|
||||||
// state, and lexing them in "normal" mode potentially producing such tokens.
|
}
|
||||||
static Token skipIfBlock(bool toEndc) {
|
|
||||||
lexer_SetMode(LEXER_NORMAL);
|
|
||||||
uint32_t startingDepth = lexer_GetIFDepth();
|
|
||||||
|
|
||||||
bool atLineStart = lexerState->atLineStart;
|
for (;;) {
|
||||||
Defer notAtLineStart{[&] { lexerState->atLineStart = false; }};
|
if (int c = bumpChar(); c == EOF) {
|
||||||
|
return EOF;
|
||||||
Defer reenableExpansions = scopedDisableExpansions();
|
} else if (isNewline(c)) {
|
||||||
|
handleCRLF(c);
|
||||||
for (int c;; atLineStart = false) {
|
nextLine();
|
||||||
// Read chars until EOL
|
return skipChars(isWhitespace);
|
||||||
while (!atLineStart) {
|
|
||||||
c = bumpChar();
|
|
||||||
|
|
||||||
if (c == EOF) {
|
|
||||||
return Token(T_(YYEOF));
|
|
||||||
} else if (c == '\\') {
|
} else if (c == '\\') {
|
||||||
// Unconditionally skip the next char, including line continuations
|
// Unconditionally skip the next char, including line continuations
|
||||||
c = bumpChar();
|
c = bumpChar();
|
||||||
} else if (c == '\r' || c == '\n') {
|
if (isNewline(c)) {
|
||||||
atLineStart = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (c == '\r' || c == '\n') {
|
|
||||||
handleCRLF(c);
|
handleCRLF(c);
|
||||||
// Do this both on line continuations and plain EOLs
|
|
||||||
nextLine();
|
nextLine();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip leading whitespace
|
|
||||||
for (;; shiftChar()) {
|
|
||||||
c = peek();
|
|
||||||
if (!isWhitespace(c)) {
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!startsIdentifier(c)) {
|
// This function uses the fact that `IF` and `REPT` constructs are only valid
|
||||||
continue;
|
// when there's nothing before them on their lines. This enables filtering
|
||||||
}
|
// "meaningful" tokens (at line start) vs. "meaningless" (everything else) ones.
|
||||||
|
// It's especially important due to macro args not being handled in this
|
||||||
|
// state, and lexing them in "normal" mode potentially producing such tokens.
|
||||||
|
static Token skipToLeadingIdentifier() {
|
||||||
|
for (;;) {
|
||||||
|
if (int c = skipPastEOL(); c == EOF) {
|
||||||
|
return Token(T_(YYEOF));
|
||||||
|
} else if (startsIdentifier(c)) {
|
||||||
shiftChar();
|
shiftChar();
|
||||||
switch (Token token = readIdentifier(c, false); token.type) {
|
return readIdentifier(c, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static Token skipIfBlock(bool toEndc) {
|
||||||
|
uint32_t startingDepth = lexer_GetIFDepth();
|
||||||
|
|
||||||
|
lexer_SetMode(LEXER_NORMAL);
|
||||||
|
|
||||||
|
Defer reenableExpansions = scopedDisableExpansions();
|
||||||
|
for (;;) {
|
||||||
|
switch (Token token = skipToLeadingIdentifier(); token.type) {
|
||||||
|
case T_(YYEOF):
|
||||||
|
return token;
|
||||||
|
|
||||||
case T_(POP_IF):
|
case T_(POP_IF):
|
||||||
lexer_IncIFDepth();
|
lexer_IncIFDepth();
|
||||||
break;
|
break;
|
||||||
@@ -2185,9 +2192,6 @@ static Token skipIfBlock(bool toEndc) {
|
|||||||
}
|
}
|
||||||
lexer_DecIFDepth();
|
lexer_DecIFDepth();
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2203,39 +2207,15 @@ static Token yylex_SKIP_TO_ENDC() {
|
|||||||
static Token yylex_SKIP_TO_ENDR() {
|
static Token yylex_SKIP_TO_ENDR() {
|
||||||
lexer_SetMode(LEXER_NORMAL);
|
lexer_SetMode(LEXER_NORMAL);
|
||||||
|
|
||||||
bool atLineStart = lexerState->atLineStart;
|
// This does not have to look for an `ENDR` token because the entire `REPT` or `FOR` body has
|
||||||
Defer notAtLineStart{[&] { lexerState->atLineStart = false; }};
|
// been captured into the current fstack context, so it can just skip to the end of that
|
||||||
|
// context, which yields an EOF.
|
||||||
Defer reenableExpansions = scopedDisableExpansions();
|
Defer reenableExpansions = scopedDisableExpansions();
|
||||||
|
for (;;) {
|
||||||
|
switch (Token token = skipToLeadingIdentifier(); token.type) {
|
||||||
|
case T_(YYEOF):
|
||||||
|
return token;
|
||||||
|
|
||||||
for (int c;; atLineStart = false) {
|
|
||||||
// Read chars until EOL
|
|
||||||
while (!atLineStart) {
|
|
||||||
c = bumpChar();
|
|
||||||
|
|
||||||
if (c == EOF) {
|
|
||||||
return Token(T_(YYEOF));
|
|
||||||
} else if (c == '\\') {
|
|
||||||
// Unconditionally skip the next char, including line continuations
|
|
||||||
c = bumpChar();
|
|
||||||
} else if (c == '\r' || c == '\n') {
|
|
||||||
atLineStart = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (c == '\r' || c == '\n') {
|
|
||||||
handleCRLF(c);
|
|
||||||
// Do this both on line continuations and plain EOLs
|
|
||||||
nextLine();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
c = skipChars(isWhitespace);
|
|
||||||
|
|
||||||
if (!startsIdentifier(c)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
shiftChar();
|
|
||||||
switch (readIdentifier(c, false).type) {
|
|
||||||
case T_(POP_IF):
|
case T_(POP_IF):
|
||||||
lexer_IncIFDepth();
|
lexer_IncIFDepth();
|
||||||
break;
|
break;
|
||||||
@@ -2243,9 +2223,6 @@ static Token yylex_SKIP_TO_ENDR() {
|
|||||||
case T_(POP_ENDC):
|
case T_(POP_ENDC):
|
||||||
lexer_DecIFDepth();
|
lexer_DecIFDepth();
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2258,9 +2235,8 @@ yy::parser::symbol_type yylex() {
|
|||||||
if (lexerState->lastToken == T_(EOB) && yywrap()) {
|
if (lexerState->lastToken == T_(EOB) && yywrap()) {
|
||||||
return yy::parser::make_YYEOF();
|
return yy::parser::make_YYEOF();
|
||||||
}
|
}
|
||||||
// Newlines read within an expansion should not increase the line count
|
if (lexerState->atLineStart) {
|
||||||
if (lexerState->atLineStart && lexerState->expansions.empty()) {
|
nextLineOutsideExpansion();
|
||||||
nextLine();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static Token (* const lexerModeFuncs[NB_LEXER_MODES])() = {
|
static Token (* const lexerModeFuncs[NB_LEXER_MODES])() = {
|
||||||
@@ -2338,23 +2314,20 @@ Capture lexer_CaptureRept() {
|
|||||||
Capture capture = startCapture();
|
Capture capture = startCapture();
|
||||||
|
|
||||||
Defer reenableExpansions = scopedDisableExpansions();
|
Defer reenableExpansions = scopedDisableExpansions();
|
||||||
|
for (size_t depth = 0;;) {
|
||||||
size_t depth = 0;
|
|
||||||
|
|
||||||
for (int c;;) {
|
|
||||||
nextLine();
|
nextLine();
|
||||||
// We're at line start, so attempt to match a `REPT` or `ENDR` token
|
int c = skipChars(isWhitespace);
|
||||||
do { // Discard initial whitespace
|
if (c != EOF) {
|
||||||
c = bumpChar();
|
shiftChar();
|
||||||
} while (isWhitespace(c));
|
}
|
||||||
// Now, try to match `REPT`, `FOR` or `ENDR` as a **whole** keyword
|
|
||||||
|
// We're at line start, so attempt to match a `REPT`, `FOR`, or `ENDR` token
|
||||||
if (startsIdentifier(c)) {
|
if (startsIdentifier(c)) {
|
||||||
switch (readIdentifier(c, false).type) {
|
switch (readIdentifier(c, false).type) {
|
||||||
case T_(POP_REPT):
|
case T_(POP_REPT):
|
||||||
case T_(POP_FOR):
|
case T_(POP_FOR):
|
||||||
++depth;
|
++depth;
|
||||||
break; // Ignore the rest of that line
|
break; // Ignore the rest of that line
|
||||||
|
|
||||||
case T_(POP_ENDR):
|
case T_(POP_ENDR):
|
||||||
if (depth) {
|
if (depth) {
|
||||||
--depth;
|
--depth;
|
||||||
@@ -2365,23 +2338,24 @@ Capture lexer_CaptureRept() {
|
|||||||
// We know we have read exactly "ENDR", not e.g. an EQUS
|
// We know we have read exactly "ENDR", not e.g. an EQUS
|
||||||
capture.span.size -= literal_strlen("ENDR");
|
capture.span.size -= literal_strlen("ENDR");
|
||||||
return capture;
|
return capture;
|
||||||
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Just consume characters until EOL or EOF
|
// Just consume characters until EOL or EOF
|
||||||
for (;; c = bumpChar()) {
|
for (;;) {
|
||||||
if (c == EOF) {
|
if (c == EOF) {
|
||||||
error("Unterminated REPT/FOR block");
|
error("Unterminated REPT/FOR block");
|
||||||
endCapture(capture);
|
endCapture(capture);
|
||||||
capture.span.ptr = nullptr; // Indicates that it reached EOF before an ENDR
|
capture.span.ptr = nullptr; // Indicates that it reached EOF before an ENDR
|
||||||
return capture;
|
return capture;
|
||||||
} else if (c == '\n' || c == '\r') {
|
} else if (isNewline(c)) {
|
||||||
handleCRLF(c);
|
handleCRLF(c);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
c = peek();
|
||||||
|
if (c != EOF) {
|
||||||
|
shiftChar();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2390,14 +2364,14 @@ Capture lexer_CaptureMacro() {
|
|||||||
Capture capture = startCapture();
|
Capture capture = startCapture();
|
||||||
|
|
||||||
Defer reenableExpansions = scopedDisableExpansions();
|
Defer reenableExpansions = scopedDisableExpansions();
|
||||||
|
for (;;) {
|
||||||
for (int c;;) {
|
|
||||||
nextLine();
|
nextLine();
|
||||||
|
int c = skipChars(isWhitespace);
|
||||||
|
if (c != EOF) {
|
||||||
|
shiftChar();
|
||||||
|
}
|
||||||
|
|
||||||
// We're at line start, so attempt to match an `ENDM` token
|
// We're at line start, so attempt to match an `ENDM` token
|
||||||
do { // Discard initial whitespace
|
|
||||||
c = bumpChar();
|
|
||||||
} while (isWhitespace(c));
|
|
||||||
// Now, try to match `ENDM` as a **whole** keyword
|
|
||||||
if (startsIdentifier(c) && readIdentifier(c, false).type == T_(POP_ENDM)) {
|
if (startsIdentifier(c) && readIdentifier(c, false).type == T_(POP_ENDM)) {
|
||||||
endCapture(capture);
|
endCapture(capture);
|
||||||
// The ENDM has been captured, but we don't want it!
|
// The ENDM has been captured, but we don't want it!
|
||||||
@@ -2407,16 +2381,20 @@ Capture lexer_CaptureMacro() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Just consume characters until EOL or EOF
|
// Just consume characters until EOL or EOF
|
||||||
for (;; c = bumpChar()) {
|
for (;;) {
|
||||||
if (c == EOF) {
|
if (c == EOF) {
|
||||||
error("Unterminated macro definition");
|
error("Unterminated macro definition");
|
||||||
endCapture(capture);
|
endCapture(capture);
|
||||||
capture.span.ptr = nullptr; // Indicates that it reached EOF before an ENDM
|
capture.span.ptr = nullptr; // Indicates that it reached EOF before an ENDM
|
||||||
return capture;
|
return capture;
|
||||||
} else if (c == '\n' || c == '\r') {
|
} else if (isNewline(c)) {
|
||||||
handleCRLF(c);
|
handleCRLF(c);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
c = peek();
|
||||||
|
if (c != EOF) {
|
||||||
|
shiftChar();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user