diff --git a/include/asm/lexer.h b/include/asm/lexer.h index 1f497f0a..31bd951c 100644 --- a/include/asm/lexer.h +++ b/include/asm/lexer.h @@ -63,12 +63,18 @@ enum LexerMode { void lexer_SetMode(enum LexerMode mode); void lexer_ToggleStringExpansion(bool enable); +struct CaptureBody { + uint32_t lineNo; + char *body; + size_t size; +}; + char const *lexer_GetFileName(void); uint32_t lexer_GetLineNo(void); uint32_t lexer_GetColNo(void); void lexer_DumpStringExpansions(void); int yylex(void); -void lexer_CaptureRept(char **capture, size_t *size); -void lexer_CaptureMacroBody(char **capture, size_t *size); +void lexer_CaptureRept(struct CaptureBody *capture); +void lexer_CaptureMacroBody(struct CaptureBody *capture); #endif /* RGBDS_ASM_LEXER_H */ diff --git a/src/asm/fstack.c b/src/asm/fstack.c index 444e53a8..7416a667 100644 --- a/src/asm/fstack.c +++ b/src/asm/fstack.c @@ -413,9 +413,8 @@ void fstk_RunMacro(char const *macroName, struct MacroArgs *args) memcpy(dest, macro->name, macroNameLen + 1); newContext((struct FileStackNode *)fileInfo); - /* Line minus 1 because buffer begins with a newline */ contextStack->lexerState = lexer_OpenFileView(macro->macro, macro->macroSize, - macro->fileLine - 1); + macro->fileLine); if (!contextStack->lexerState) fatalerror("Failed to set up lexer for macro invocation\n"); lexer_SetStateAtEOL(contextStack->lexerState); diff --git a/src/asm/lexer.c b/src/asm/lexer.c index 520ca071..627829a1 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -995,10 +995,21 @@ static void discardBlockComment(void) lexerState->disableMacroArgs = true; lexerState->disableInterpolation = true; for (;;) { - switch (nextChar()) { + int c = nextChar(); + + switch (c) { case EOF: error("Unterminated block comment\n"); goto finish; + case '\r': + /* Handle CRLF before nextLine() since shiftChars updates colNo */ + if (peek(0) == '\n') + shiftChars(1); + /* fallthrough */ + case '\n': + if (!lexerState->expansions || lexerState->expansions->distance) + nextLine(); + continue; case '/': if (peek(0) == '*') { warning(WARNING_NESTED_COMMENT, @@ -2194,8 +2205,10 @@ static char *startCapture(void) } } -void lexer_CaptureRept(char **capture, size_t *size) +void lexer_CaptureRept(struct CaptureBody *capture) { + capture->lineNo = lexer_GetLineNo(); + char *captureStart = startCapture(); unsigned int level = 0; int c; @@ -2228,14 +2241,7 @@ void lexer_CaptureRept(char **capture, size_t *size) * We know we have read exactly "ENDR", not e.g. an EQUS */ lexerState->captureSize -= strlen("ENDR"); - /* Read (but don't capture) until EOL or EOF */ - lexerState->capturing = false; - do { - c = nextChar(); - } while (c != EOF && c != '\r' && c != '\n'); - /* Handle Windows CRLF */ - if (c == '\r' && peek(0) == '\n') - shiftChars(1); + lexerState->lastToken = T_POP_ENDR; // Force EOL at EOF goto finish; } level--; @@ -2246,7 +2252,6 @@ void lexer_CaptureRept(char **capture, size_t *size) for (;;) { if (c == EOF) { error("Unterminated REPT/FOR block\n"); - lexerState->capturing = false; goto finish; } else if (c == '\n' || c == '\r') { if (c == '\r' && peek(0) == '\n') @@ -2258,76 +2263,72 @@ void lexer_CaptureRept(char **capture, size_t *size) } finish: - assert(!lexerState->capturing); - *capture = captureStart; - *size = lexerState->captureSize; + capture->body = captureStart; + capture->size = lexerState->captureSize; + lexerState->capturing = false; lexerState->captureBuf = NULL; lexerState->disableMacroArgs = false; lexerState->disableInterpolation = false; + lexerState->atLineStart = false; } -void lexer_CaptureMacroBody(char **capture, size_t *size) +void lexer_CaptureMacroBody(struct CaptureBody *capture) { + capture->lineNo = lexer_GetLineNo(); + char *captureStart = startCapture(); - int c = peek(0); + int c; /* If the file is `mmap`ed, we need not to unmap it to keep access to the macro */ if (lexerState->isMmapped) lexerState->isReferenced = true; /* - * Due to parser internals, it does not read the EOL after the T_POP_MACRO before calling - * this. Thus, we need to keep one in the buffer afterwards. - * (Note that this also means the captured buffer begins with a newline and maybe comment) + * Due to parser internals, it reads the EOL after the expression before calling this. + * Thus, we don't need to keep one in the buffer afterwards. * The following assertion checks that. */ - assert(!lexerState->atLineStart); + assert(lexerState->atLineStart); for (;;) { - /* Just consume characters until EOL or EOF */ - for (;;) { - if (c == EOF) { - error("Unterminated macro definition\n"); - lexerState->capturing = false; - goto finish; - } else if (c == '\n') { - break; - } else if (c == '\r') { - if (peek(0) == '\n') - shiftChars(1); - break; - } - c = nextChar(); - } - - /* We're at line start, attempt to match a `label: MACRO` line or `ENDM` token */ + nextLine(); + /* We're at line start, so attempt to match an `ENDM` token */ do { /* Discard initial whitespace */ c = nextChar(); } while (isWhitespace(c)); /* Now, try to match `ENDM` as a **whole** identifier */ if (startsIdentifier(c)) { - if (readIdentifier(c) == T_POP_ENDM) { - /* Read (but don't capture) until EOL or EOF */ - lexerState->capturing = false; - do { - c = peek(0); - if (c == EOF || c == '\r' || c == '\n') - break; - shiftChars(1); - } while (c != EOF && c != '\r' && c != '\n'); - /* Handle Windows CRLF */ - if (c == '\r' && peek(1) == '\n') - shiftChars(1); + switch (readIdentifier(c)) { + case T_POP_ENDM: + /* + * The ENDM has been captured, but we don't want it! + * We know we have read exactly "ENDM", not e.g. an EQUS + */ + lexerState->captureSize -= strlen("ENDM"); + lexerState->lastToken = T_POP_ENDM; // Force EOL at EOF goto finish; } } - nextLine(); + + /* Just consume characters until EOL or EOF */ + for (;;) { + if (c == EOF) { + error("Unterminated macro definition\n"); + goto finish; + } else if (c == '\n' || c == '\r') { + if (c == '\r' && peek(0) == '\n') + shiftChars(1); + break; + } + c = nextChar(); + } } finish: - assert(!lexerState->capturing); - *capture = captureStart; - *size = lexerState->captureSize - strlen("ENDM"); + capture->body = captureStart; + capture->size = lexerState->captureSize; + lexerState->capturing = false; lexerState->captureBuf = NULL; lexerState->disableMacroArgs = false; lexerState->disableInterpolation = false; + lexerState->atLineStart = false; } diff --git a/src/asm/parser.y b/src/asm/parser.y index 45bd52db..2c80ab9b 100644 --- a/src/asm/parser.y +++ b/src/asm/parser.y @@ -36,9 +36,11 @@ #include "linkdefs.h" #include "platform.h" // strncasecmp, strdup -uint32_t nListCountEmpty; -int32_t nPCOffset; -bool executeElseBlock; /* If this is set, ELIFs cannot be executed anymore */ +int32_t nPCOffset; /* Read by rpn_Symbol */ + +static uint32_t nListCountEmpty; +static bool executeElseBlock; /* If this is set, ELIFs cannot be executed anymore */ +static struct CaptureBody captureBody; /* Captures a REPT/FOR or MACRO */ static void upperstring(char *dest, char const *src) { @@ -596,17 +598,21 @@ line : label T_NEWLINE | label cpu_command T_NEWLINE | label macro T_NEWLINE | label simple_pseudoop T_NEWLINE - | pseudoop T_NEWLINE - | conditional /* May not necessarily be followed by a newline, see below */ + | assignment_pseudoop T_NEWLINE + | entire_line /* Commands that manage newlines themselves */ ; /* - * For "logistical" reasons, conditionals must manage newlines themselves. + * For "logistical" reasons, these commands must manage newlines themselves. * This is because we need to switch the lexer's mode *after* the newline has been read, * and to avoid causing some grammar conflicts (token reducing is finicky). * This is DEFINITELY one of the more FRAGILE parts of the codebase, handle with care. */ -conditional : if +entire_line : macrodef + | rept + | for + | break + | if /* It's important that all of these require being at line start for `skipIfBlock` */ | elif | else @@ -699,13 +705,13 @@ macroargs : /* empty */ { } ; -pseudoop : equ +/* These commands start with a T_LABEL. */ +assignment_pseudoop : equ | set | rb | rw | rl | equs - | macrodef ; simple_pseudoop : include @@ -733,10 +739,7 @@ simple_pseudoop : include | pushc | popc | load - | rept - | for | shift - | break | fail | warn | assert @@ -851,21 +854,18 @@ load : T_POP_LOAD string T_COMMA sectiontype sectorg sectattrs { | T_POP_ENDL { out_EndLoadSection(); } ; -rept : T_POP_REPT uconst { - uint32_t nDefinitionLineNo = lexer_GetLineNo(); - char *body; - size_t size; - lexer_CaptureRept(&body, &size); - fstk_RunRept($2, nDefinitionLineNo, body, size); +rept : T_POP_REPT uconst T_NEWLINE { + lexer_CaptureRept(&captureBody); + } T_NEWLINE { + fstk_RunRept($2, captureBody.lineNo, captureBody.body, captureBody.size); } ; -for : T_POP_FOR T_ID T_COMMA for_args { - uint32_t nDefinitionLineNo = lexer_GetLineNo(); - char *body; - size_t size; - lexer_CaptureRept(&body, &size); - fstk_RunFor($2, $4.start, $4.stop, $4.step, nDefinitionLineNo, body, size); +for : T_POP_FOR T_ID T_COMMA for_args T_NEWLINE { + lexer_CaptureRept(&captureBody); + } T_NEWLINE { + fstk_RunFor($2, $4.start, $4.stop, $4.step, captureBody.lineNo, + captureBody.body, captureBody.size); } for_args : const { @@ -885,18 +885,16 @@ for_args : const { } ; -break : T_POP_BREAK { +break : T_POP_BREAK T_NEWLINE { if (fstk_Break()) lexer_SetMode(LEXER_SKIP_TO_ENDR); } ; -macrodef : T_LABEL T_COLON T_POP_MACRO { - int32_t nDefinitionLineNo = lexer_GetLineNo(); - char *body; - size_t size; - lexer_CaptureMacroBody(&body, &size); - sym_AddMacro($1, nDefinitionLineNo, body, size); +macrodef : T_LABEL T_COLON T_POP_MACRO T_NEWLINE { + lexer_CaptureMacroBody(&captureBody); + } T_NEWLINE { + sym_AddMacro($1, captureBody.lineNo, captureBody.body, captureBody.size); } ; diff --git a/test/asm/break.err b/test/asm/break.err index 11acbf2c..2cbec5d6 100644 --- a/test/asm/break.err +++ b/test/asm/break.err @@ -2,5 +2,5 @@ warning: break.asm(9): [-Wuser] done 5 warning: break.asm(17): [-Wuser] OK -FATAL: break.asm(18) -> break.asm::REPT~1(23): +FATAL: break.asm(18) -> break.asm::REPT~1(22): Ended block with 1 unterminated IF construct diff --git a/test/asm/invalid-utf-8.asm b/test/asm/invalid-utf-8.asm index fbc3745c..f27de3c6 100644 --- a/test/asm/invalid-utf-8.asm +++ b/test/asm/invalid-utf-8.asm @@ -1,5 +1,6 @@ ; This test tries to pass invalid UTF-8 through a macro argument ; to exercise the lexer's reportGarbageChar -m:MACRO \1 +m:MACRO + \1 ENDM m ΟΣ diff --git a/test/asm/invalid-utf-8.err b/test/asm/invalid-utf-8.err index e342cc92..25352e6e 100644 --- a/test/asm/invalid-utf-8.err +++ b/test/asm/invalid-utf-8.err @@ -1,5 +1,5 @@ -ERROR: invalid-utf-8.asm(4) -> invalid-utf-8.asm::m(3): +ERROR: invalid-utf-8.asm(6) -> invalid-utf-8.asm::m(4): Unknown character 0xCF -ERROR: invalid-utf-8.asm(4) -> invalid-utf-8.asm::m(3): +ERROR: invalid-utf-8.asm(6) -> invalid-utf-8.asm::m(4): Unknown character 0xD3 error: Assembly aborted (2 errors)! diff --git a/test/asm/nested-macrodef.err b/test/asm/nested-macrodef.err index eea37314..e0e0473b 100644 --- a/test/asm/nested-macrodef.err +++ b/test/asm/nested-macrodef.err @@ -1,5 +1,7 @@ warning: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(22): [-Wuser] Nested macros shouldn't work, whose argument would be \1? -ERROR: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(25): +ERROR: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(24): Unterminated macro definition -error: Assembly aborted (1 errors)! +ERROR: nested-macrodef.asm(27): + syntax error, unexpected identifier, expecting newline +error: Assembly aborted (2 errors)! diff --git a/test/asm/nested-macrodef.simple.err b/test/asm/nested-macrodef.simple.err new file mode 100644 index 00000000..8f797ded --- /dev/null +++ b/test/asm/nested-macrodef.simple.err @@ -0,0 +1,7 @@ +warning: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(22): [-Wuser] + Nested macros shouldn't work, whose argument would be \1? +ERROR: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(24): + Unterminated macro definition +ERROR: nested-macrodef.asm(27): + syntax error +error: Assembly aborted (2 errors)!