Restore the "EOF-newline" lexer hack

This was removed in b3c0db218d
(along with two unrelated changes).

Removing this hack introduced issue #742, whereby INCLUDing
a file without a trailing newline can cause a syntax error.

A more proper fix would involve Bison's tracking locations,
but for now the EOF-newline hack fixes the issue while only
affecting some reported errors (expecting "newline"
instead of "end of file").

Fixes #742
This commit is contained in:
Rangi
2021-03-02 19:35:24 -05:00
committed by Rangi
parent 40c6b840f8
commit 6655e04ef0
4 changed files with 31 additions and 24 deletions

View File

@@ -349,6 +349,7 @@ struct LexerState {
bool atLineStart; bool atLineStart;
uint32_t lineNo; uint32_t lineNo;
uint32_t colNo; uint32_t colNo;
int lastToken;
struct IfStack *ifStack; struct IfStack *ifStack;
@@ -372,6 +373,7 @@ static void initState(struct LexerState *state)
{ {
state->mode = LEXER_NORMAL; state->mode = LEXER_NORMAL;
state->atLineStart = true; /* yylex() will init colNo due to this */ state->atLineStart = true; /* yylex() will init colNo due to this */
state->lastToken = T_EOF;
state->ifStack = NULL; state->ifStack = NULL;
@@ -2235,19 +2237,17 @@ finish:
return T_STRING; return T_STRING;
lexer_SetMode(LEXER_NORMAL); lexer_SetMode(LEXER_NORMAL);
// If a macro is invoked on the last line of a file, with no blank
// line afterwards, returning EOF afterwards will cause Bison to
// stop parsing, despite the lexer being ready to output more.
// To avoid this, return T_NEWLINE for EOF as well.
if (c == '\r' || c == '\n') { if (c == '\r' || c == '\n') {
shiftChars(1); shiftChars(1);
/* Handle CRLF */ /* Handle CRLF */
if (c == '\r' && peek(0) == '\n') if (c == '\r' && peek(0) == '\n')
shiftChars(1); shiftChars(1);
}
return T_NEWLINE; return T_NEWLINE;
} }
return T_EOF;
}
#undef append_yylval_tzString #undef append_yylval_tzString
/* /*
@@ -2456,8 +2456,12 @@ restart:
int token = lexerModeFuncs[lexerState->mode](); int token = lexerModeFuncs[lexerState->mode]();
if (token == T_EOF) { if (token == T_EOF) {
if (lexerState->lastToken != T_NEWLINE) {
dbgPrint("Forcing EOL at EOF\n");
token = T_NEWLINE;
} else {
/* Try to switch to new buffer; if it succeeds, scan again */ /* Try to switch to new buffer; if it succeeds, scan again */
dbgPrint("Reached EOB!\n"); dbgPrint("Reached EOF!\n");
/* Captures end at their buffer's boundary no matter what */ /* Captures end at their buffer's boundary no matter what */
if (!lexerState->capturing) { if (!lexerState->capturing) {
if (!yywrap()) if (!yywrap())
@@ -2466,6 +2470,8 @@ restart:
return T_EOF; return T_EOF;
} }
} }
}
lexerState->lastToken = token;
lexerState->atLineStart = token == T_NEWLINE; lexerState->atLineStart = token == T_NEWLINE;
@@ -2524,6 +2530,7 @@ void lexer_CaptureRept(struct CaptureBody *capture)
* We know we have read exactly "ENDR", not e.g. an EQUS * We know we have read exactly "ENDR", not e.g. an EQUS
*/ */
lexerState->captureSize -= strlen("ENDR"); lexerState->captureSize -= strlen("ENDR");
lexerState->lastToken = T_POP_ENDR; // Force EOL at EOF
goto finish; goto finish;
} }
level--; level--;
@@ -2586,6 +2593,7 @@ void lexer_CaptureMacroBody(struct CaptureBody *capture)
* We know we have read exactly "ENDM", not e.g. an EQUS * We know we have read exactly "ENDM", not e.g. an EQUS
*/ */
lexerState->captureSize -= strlen("ENDM"); lexerState->captureSize -= strlen("ENDM");
lexerState->lastToken = T_POP_ENDM; // Force EOL at EOF
goto finish; goto finish;
} }
} }

View File

@@ -603,20 +603,19 @@ enum {
%% %%
asmfile : lines last_line asmfile : lines
; ;
/* Note: The lexer adds T_NEWLINE at the end of the input */
lines : %empty lines : %empty
| lines line | lines line
; ;
last_line : label line : label T_NEWLINE
| label cpu_command | label cpu_command T_NEWLINE
| label macro | label macro T_NEWLINE
| label directive | label directive T_NEWLINE
| assignment_directive | assignment_directive T_NEWLINE
;
line : last_line T_NEWLINE
| line_directive /* Directives that manage newlines themselves */ | line_directive /* Directives that manage newlines themselves */
| error T_NEWLINE { /* Continue parsing the next line on a syntax error */ | error T_NEWLINE { /* Continue parsing the next line on a syntax error */
fstk_StopRept(); fstk_StopRept();

View File

@@ -1,5 +1,5 @@
ERROR: block-comment-termination-error.asm(1): ERROR: block-comment-termination-error.asm(1):
Unterminated block comment Unterminated block comment
ERROR: block-comment-termination-error.asm(1): ERROR: block-comment-termination-error.asm(1):
syntax error, unexpected end of file syntax error, unexpected newline
error: Assembly aborted (2 errors)! error: Assembly aborted (2 errors)!

View File

@@ -7,9 +7,9 @@ ERROR: code-after-endm-endr-endc.asm(12):
ERROR: code-after-endm-endr-endc.asm(17): ERROR: code-after-endm-endr-endc.asm(17):
syntax error, unexpected PRINTLN, expecting newline syntax error, unexpected PRINTLN, expecting newline
ERROR: code-after-endm-endr-endc.asm(19): ERROR: code-after-endm-endr-endc.asm(19):
syntax error, unexpected PRINTLN, expecting end of file or newline syntax error, unexpected PRINTLN, expecting newline
ERROR: code-after-endm-endr-endc.asm(23): ERROR: code-after-endm-endr-endc.asm(23):
syntax error, unexpected PRINTLN, expecting newline syntax error, unexpected PRINTLN, expecting newline
ERROR: code-after-endm-endr-endc.asm(25): ERROR: code-after-endm-endr-endc.asm(25):
syntax error, unexpected PRINTLN, expecting end of file or newline syntax error, unexpected PRINTLN, expecting newline
error: Assembly aborted (7 errors)! error: Assembly aborted (7 errors)!