Return a marker token at the end of any buffer

Removes the lexer hack mentioned in #778
This commit is contained in:
Rangi
2021-04-18 20:25:09 -04:00
committed by Eldred Habert
parent be2572edca
commit 7ac8bd6e24
6 changed files with 52 additions and 42 deletions

View File

@@ -81,6 +81,7 @@ struct CaptureBody {
uint32_t lineNo; uint32_t lineNo;
char *body; char *body;
size_t size; size_t size;
bool unterminated;
}; };
char const *lexer_GetFileName(void); char const *lexer_GetFileName(void);

View File

@@ -354,6 +354,7 @@ struct LexerState {
uint32_t colNo; uint32_t colNo;
int lastToken; int lastToken;
int nextToken; int nextToken;
bool isAtEOF;
struct IfStack *ifStack; struct IfStack *ifStack;
@@ -378,6 +379,7 @@ static void initState(struct LexerState *state)
state->atLineStart = true; /* yylex() will init colNo due to this */ state->atLineStart = true; /* yylex() will init colNo due to this */
state->lastToken = T_EOF; state->lastToken = T_EOF;
state->nextToken = 0; state->nextToken = 0;
state->isAtEOF = false;
state->ifStack = NULL; state->ifStack = NULL;
@@ -2278,11 +2280,13 @@ finish:
int yylex(void) int yylex(void)
{ {
restart: if (lexerStateEOL) {
if (lexerState->atLineStart && lexerStateEOL) {
lexer_SetState(lexerStateEOL); lexer_SetState(lexerStateEOL);
lexerStateEOL = NULL; lexerStateEOL = NULL;
} }
/* `lexer_SetState` updates `lexerState`, so check for EOF after it */
if (lexerState->isAtEOF)
return T_EOF;
if (lexerState->atLineStart) { if (lexerState->atLineStart) {
/* Newlines read within an expansion should not increase the line count */ /* Newlines read within an expansion should not increase the line count */
if (!lexerState->expansions) if (!lexerState->expansions)
@@ -2299,23 +2303,19 @@ restart:
int token = lexerModeFuncs[lexerState->mode](); int token = lexerModeFuncs[lexerState->mode]();
if (token == T_EOF) { if (token == T_EOF) {
if (lexerState->lastToken != T_NEWLINE) {
dbgPrint("Forcing EOL at EOF\n");
token = T_NEWLINE;
} else {
/* Try to switch to new buffer; if it succeeds, scan again */ /* Try to switch to new buffer; if it succeeds, scan again */
dbgPrint("Reached EOF!\n"); dbgPrint("Reached EOB!\n");
/* Captures end at their buffer's boundary no matter what */ /* Captures end at their buffer's boundary no matter what */
if (!lexerState->capturing) { if (!lexerState->capturing) {
if (!yywrap()) if (yywrap()) {
goto restart;
dbgPrint("Reached end of input.\n"); dbgPrint("Reached end of input.\n");
return T_EOF; lexerState->isAtEOF = true;
} }
token = T_EOB;
} }
} }
lexerState->lastToken = token; lexerState->lastToken = token;
lexerState->atLineStart = token == T_NEWLINE; lexerState->atLineStart = token == T_NEWLINE || token == T_EOB;
return token; return token;
} }
@@ -2338,6 +2338,7 @@ static char *startCapture(void)
void lexer_CaptureRept(struct CaptureBody *capture) void lexer_CaptureRept(struct CaptureBody *capture)
{ {
capture->unterminated = false;
capture->lineNo = lexer_GetLineNo(); capture->lineNo = lexer_GetLineNo();
char *captureStart = startCapture(); char *captureStart = startCapture();
@@ -2372,7 +2373,6 @@ void lexer_CaptureRept(struct CaptureBody *capture)
* We know we have read exactly "ENDR", not e.g. an EQUS * We know we have read exactly "ENDR", not e.g. an EQUS
*/ */
lexerState->captureSize -= strlen("ENDR"); lexerState->captureSize -= strlen("ENDR");
lexerState->lastToken = T_POP_ENDR; // Force EOL at EOF
goto finish; goto finish;
} }
level--; level--;
@@ -2383,6 +2383,7 @@ void lexer_CaptureRept(struct CaptureBody *capture)
for (;;) { for (;;) {
if (c == EOF) { if (c == EOF) {
error("Unterminated REPT/FOR block\n"); error("Unterminated REPT/FOR block\n");
capture->unterminated = true;
goto finish; goto finish;
} else if (c == '\n' || c == '\r') { } else if (c == '\n' || c == '\r') {
handleCRLF(c); handleCRLF(c);
@@ -2404,6 +2405,7 @@ finish:
void lexer_CaptureMacroBody(struct CaptureBody *capture) void lexer_CaptureMacroBody(struct CaptureBody *capture)
{ {
capture->unterminated = false;
capture->lineNo = lexer_GetLineNo(); capture->lineNo = lexer_GetLineNo();
char *captureStart = startCapture(); char *captureStart = startCapture();
@@ -2434,7 +2436,6 @@ void lexer_CaptureMacroBody(struct CaptureBody *capture)
* We know we have read exactly "ENDM", not e.g. an EQUS * We know we have read exactly "ENDM", not e.g. an EQUS
*/ */
lexerState->captureSize -= strlen("ENDM"); lexerState->captureSize -= strlen("ENDM");
lexerState->lastToken = T_POP_ENDM; // Force EOL at EOF
goto finish; goto finish;
} }
} }
@@ -2443,6 +2444,7 @@ void lexer_CaptureMacroBody(struct CaptureBody *capture)
for (;;) { for (;;) {
if (c == EOF) { if (c == EOF) {
error("Unterminated macro definition\n"); error("Unterminated macro definition\n");
capture->unterminated = true;
goto finish; goto finish;
} else if (c == '\n' || c == '\r') { } else if (c == '\n' || c == '\r') {
handleCRLF(c); handleCRLF(c);

View File

@@ -646,6 +646,7 @@ enum {
%type <expr> op_mem_ind %type <expr> op_mem_ind
%type <assertType> assert_type %type <assertType> assert_type
%token T_EOB "end of buffer"
%token T_EOF 0 "end of file" %token T_EOF 0 "end of file"
%start asmfile %start asmfile
@@ -654,14 +655,13 @@ enum {
asmfile : lines asmfile : lines
; ;
/*
* The lexer adds T_NEWLINE at the end of the file if one was not
* already present, so we can rely on it to end a line.
*/
lines : %empty lines : %empty
| lines line | lines line
; ;
endofline : T_NEWLINE | T_EOB
;
plain_directive : label plain_directive : label
| label cpu_command | label cpu_command
| label macro | label macro
@@ -669,9 +669,9 @@ plain_directive : label
| assignment_directive | assignment_directive
; ;
line : plain_directive T_NEWLINE line : plain_directive endofline
| line_directive /* Directives that manage newlines themselves */ | line_directive /* Directives that manage newlines themselves */
| error T_NEWLINE { /* Continue parsing the next line on a syntax error */ | error endofline { /* Continue parsing the next line on a syntax error */
fstk_StopRept(); fstk_StopRept();
} }
; ;
@@ -686,6 +686,7 @@ line_directive : macrodef
| rept | rept
| for | for
| break | break
| include
| if | if
/* It's important that all of these require being at line start for `skipIfBlock` */ /* It's important that all of these require being at line start for `skipIfBlock` */
| elif | elif
@@ -807,8 +808,7 @@ assignment_directive : equ
| equs | equs
; ;
directive : include directive : endc
| endc
| print | print
| println | println
| printf | printf
@@ -988,8 +988,10 @@ load : T_POP_LOAD sectmod string T_COMMA sectiontype sectorg sectattrs {
rept : T_POP_REPT uconst T_NEWLINE { rept : T_POP_REPT uconst T_NEWLINE {
lexer_CaptureRept(&captureBody); lexer_CaptureRept(&captureBody);
} T_NEWLINE { } endofline {
fstk_RunRept($2, captureBody.lineNo, captureBody.body, captureBody.size); if (!captureBody.unterminated)
fstk_RunRept($2, captureBody.lineNo, captureBody.body,
captureBody.size);
} }
; ;
@@ -999,7 +1001,8 @@ for : T_POP_FOR {
lexer_ToggleStringExpansion(true); lexer_ToggleStringExpansion(true);
} T_COMMA for_args T_NEWLINE { } T_COMMA for_args T_NEWLINE {
lexer_CaptureRept(&captureBody); lexer_CaptureRept(&captureBody);
} T_NEWLINE { } endofline {
if (!captureBody.unterminated)
fstk_RunFor($3, $6.start, $6.stop, $6.step, captureBody.lineNo, fstk_RunFor($3, $6.start, $6.stop, $6.step, captureBody.lineNo,
captureBody.body, captureBody.size); captureBody.body, captureBody.size);
} }
@@ -1021,7 +1024,7 @@ for_args : const {
} }
; ;
break : T_POP_BREAK T_NEWLINE { break : label T_POP_BREAK endofline {
if (fstk_Break()) if (fstk_Break())
lexer_SetMode(LEXER_SKIP_TO_ENDR); lexer_SetMode(LEXER_SKIP_TO_ENDR);
} }
@@ -1033,13 +1036,17 @@ macrodef : T_POP_MACRO {
lexer_ToggleStringExpansion(true); lexer_ToggleStringExpansion(true);
} T_NEWLINE { } T_NEWLINE {
lexer_CaptureMacroBody(&captureBody); lexer_CaptureMacroBody(&captureBody);
} T_NEWLINE { } endofline {
sym_AddMacro($3, captureBody.lineNo, captureBody.body, captureBody.size); if (!captureBody.unterminated)
sym_AddMacro($3, captureBody.lineNo, captureBody.body,
captureBody.size);
} }
| T_LABEL T_COLON T_POP_MACRO T_NEWLINE { | T_LABEL T_COLON T_POP_MACRO T_NEWLINE {
lexer_CaptureMacroBody(&captureBody); lexer_CaptureMacroBody(&captureBody);
} T_NEWLINE { } endofline {
sym_AddMacro($1, captureBody.lineNo, captureBody.body, captureBody.size); if (!captureBody.unterminated)
sym_AddMacro($1, captureBody.lineNo, captureBody.body,
captureBody.size);
} }
; ;
@@ -1162,8 +1169,8 @@ export_list : export_list_entry
export_list_entry : scoped_id { sym_Export($1); } export_list_entry : scoped_id { sym_Export($1); }
; ;
include : T_POP_INCLUDE string { include : label T_POP_INCLUDE string endofline {
fstk_RunInclude($2); fstk_RunInclude($3);
if (failedOnMissingInclude) if (failedOnMissingInclude)
YYACCEPT; YYACCEPT;
} }

View File

@@ -1,5 +1,5 @@
ERROR: block-comment-termination-error.asm(1): ERROR: block-comment-termination-error.asm(1):
Unterminated block comment Unterminated block comment
ERROR: block-comment-termination-error.asm(1): ERROR: block-comment-termination-error.asm(1):
syntax error, unexpected newline syntax error, unexpected end of buffer
error: Assembly aborted (2 errors)! error: Assembly aborted (2 errors)!

View File

@@ -1,15 +1,15 @@
ERROR: code-after-endm-endr-endc.asm(6): ERROR: code-after-endm-endr-endc.asm(6):
syntax error, unexpected PRINTLN, expecting newline syntax error, unexpected PRINTLN, expecting newline or end of buffer
ERROR: code-after-endm-endr-endc.asm(7): ERROR: code-after-endm-endr-endc.asm(7):
Macro "mac" not defined Macro "mac" not defined
ERROR: code-after-endm-endr-endc.asm(12): ERROR: code-after-endm-endr-endc.asm(12):
syntax error, unexpected PRINTLN, expecting newline syntax error, unexpected PRINTLN, expecting newline or end of buffer
ERROR: code-after-endm-endr-endc.asm(17): ERROR: code-after-endm-endr-endc.asm(17):
syntax error, unexpected PRINTLN, expecting newline syntax error, unexpected PRINTLN, expecting newline
ERROR: code-after-endm-endr-endc.asm(19): ERROR: code-after-endm-endr-endc.asm(19):
syntax error, unexpected PRINTLN, expecting newline syntax error, unexpected PRINTLN, expecting newline or end of buffer
ERROR: code-after-endm-endr-endc.asm(23): ERROR: code-after-endm-endr-endc.asm(23):
syntax error, unexpected PRINTLN, expecting newline syntax error, unexpected PRINTLN, expecting newline
ERROR: code-after-endm-endr-endc.asm(25): ERROR: code-after-endm-endr-endc.asm(25):
syntax error, unexpected PRINTLN, expecting newline syntax error, unexpected PRINTLN, expecting newline or end of buffer
error: Assembly aborted (7 errors)! error: Assembly aborted (7 errors)!

View File

@@ -3,5 +3,5 @@ warning: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(22): [-Wuser]
ERROR: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(24): ERROR: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(24):
Unterminated macro definition Unterminated macro definition
ERROR: nested-macrodef.asm(27): ERROR: nested-macrodef.asm(27):
syntax error, unexpected identifier, expecting newline Macro "inner" not defined
error: Assembly aborted (2 errors)! error: Assembly aborted (2 errors)!