Return a marker token at the end of any buffer

Removes the lexer hack mentioned in #778
This commit is contained in:
Rangi
2021-04-18 20:25:09 -04:00
committed by Eldred Habert
parent be2572edca
commit 7ac8bd6e24
6 changed files with 52 additions and 42 deletions

View File

@@ -354,6 +354,7 @@ struct LexerState {
uint32_t colNo;
int lastToken;
int nextToken;
bool isAtEOF;
struct IfStack *ifStack;
@@ -378,6 +379,7 @@ static void initState(struct LexerState *state)
state->atLineStart = true; /* yylex() will init colNo due to this */
state->lastToken = T_EOF;
state->nextToken = 0;
state->isAtEOF = false;
state->ifStack = NULL;
@@ -2278,11 +2280,13 @@ finish:
int yylex(void)
{
restart:
if (lexerState->atLineStart && lexerStateEOL) {
if (lexerStateEOL) {
lexer_SetState(lexerStateEOL);
lexerStateEOL = NULL;
}
/* `lexer_SetState` updates `lexerState`, so check for EOF after it */
if (lexerState->isAtEOF)
return T_EOF;
if (lexerState->atLineStart) {
/* Newlines read within an expansion should not increase the line count */
if (!lexerState->expansions)
@@ -2299,23 +2303,19 @@ restart:
int token = lexerModeFuncs[lexerState->mode]();
if (token == T_EOF) {
if (lexerState->lastToken != T_NEWLINE) {
dbgPrint("Forcing EOL at EOF\n");
token = T_NEWLINE;
} else {
/* Try to switch to new buffer; if it succeeds, scan again */
dbgPrint("Reached EOF!\n");
/* Captures end at their buffer's boundary no matter what */
if (!lexerState->capturing) {
if (!yywrap())
goto restart;
/* Try to switch to new buffer; if it succeeds, scan again */
dbgPrint("Reached EOB!\n");
/* Captures end at their buffer's boundary no matter what */
if (!lexerState->capturing) {
if (yywrap()) {
dbgPrint("Reached end of input.\n");
return T_EOF;
lexerState->isAtEOF = true;
}
token = T_EOB;
}
}
lexerState->lastToken = token;
lexerState->atLineStart = token == T_NEWLINE;
lexerState->atLineStart = token == T_NEWLINE || token == T_EOB;
return token;
}
@@ -2338,6 +2338,7 @@ static char *startCapture(void)
void lexer_CaptureRept(struct CaptureBody *capture)
{
capture->unterminated = false;
capture->lineNo = lexer_GetLineNo();
char *captureStart = startCapture();
@@ -2372,7 +2373,6 @@ void lexer_CaptureRept(struct CaptureBody *capture)
* We know we have read exactly "ENDR", not e.g. an EQUS
*/
lexerState->captureSize -= strlen("ENDR");
lexerState->lastToken = T_POP_ENDR; // Force EOL at EOF
goto finish;
}
level--;
@@ -2383,6 +2383,7 @@ void lexer_CaptureRept(struct CaptureBody *capture)
for (;;) {
if (c == EOF) {
error("Unterminated REPT/FOR block\n");
capture->unterminated = true;
goto finish;
} else if (c == '\n' || c == '\r') {
handleCRLF(c);
@@ -2404,6 +2405,7 @@ finish:
void lexer_CaptureMacroBody(struct CaptureBody *capture)
{
capture->unterminated = false;
capture->lineNo = lexer_GetLineNo();
char *captureStart = startCapture();
@@ -2434,7 +2436,6 @@ void lexer_CaptureMacroBody(struct CaptureBody *capture)
* We know we have read exactly "ENDM", not e.g. an EQUS
*/
lexerState->captureSize -= strlen("ENDM");
lexerState->lastToken = T_POP_ENDM; // Force EOL at EOF
goto finish;
}
}
@@ -2443,6 +2444,7 @@ void lexer_CaptureMacroBody(struct CaptureBody *capture)
for (;;) {
if (c == EOF) {
error("Unterminated macro definition\n");
capture->unterminated = true;
goto finish;
} else if (c == '\n' || c == '\r') {
handleCRLF(c);