Handle MACRO and REPT/FOR bodies differently

Fixes #697
This commit is contained in:
Rangi
2021-01-12 21:30:13 -05:00
committed by Eldred Habert
parent a4ebb87858
commit df16e64fc6
9 changed files with 108 additions and 94 deletions

View File

@@ -413,9 +413,8 @@ void fstk_RunMacro(char const *macroName, struct MacroArgs *args)
memcpy(dest, macro->name, macroNameLen + 1);
newContext((struct FileStackNode *)fileInfo);
/* Line minus 1 because buffer begins with a newline */
contextStack->lexerState = lexer_OpenFileView(macro->macro, macro->macroSize,
macro->fileLine - 1);
macro->fileLine);
if (!contextStack->lexerState)
fatalerror("Failed to set up lexer for macro invocation\n");
lexer_SetStateAtEOL(contextStack->lexerState);

View File

@@ -995,10 +995,21 @@ static void discardBlockComment(void)
lexerState->disableMacroArgs = true;
lexerState->disableInterpolation = true;
for (;;) {
switch (nextChar()) {
int c = nextChar();
switch (c) {
case EOF:
error("Unterminated block comment\n");
goto finish;
case '\r':
/* Handle CRLF before nextLine() since shiftChars updates colNo */
if (peek(0) == '\n')
shiftChars(1);
/* fallthrough */
case '\n':
if (!lexerState->expansions || lexerState->expansions->distance)
nextLine();
continue;
case '/':
if (peek(0) == '*') {
warning(WARNING_NESTED_COMMENT,
@@ -2194,8 +2205,10 @@ static char *startCapture(void)
}
}
void lexer_CaptureRept(char **capture, size_t *size)
void lexer_CaptureRept(struct CaptureBody *capture)
{
capture->lineNo = lexer_GetLineNo();
char *captureStart = startCapture();
unsigned int level = 0;
int c;
@@ -2228,14 +2241,7 @@ void lexer_CaptureRept(char **capture, size_t *size)
* We know we have read exactly "ENDR", not e.g. an EQUS
*/
lexerState->captureSize -= strlen("ENDR");
/* Read (but don't capture) until EOL or EOF */
lexerState->capturing = false;
do {
c = nextChar();
} while (c != EOF && c != '\r' && c != '\n');
/* Handle Windows CRLF */
if (c == '\r' && peek(0) == '\n')
shiftChars(1);
lexerState->lastToken = T_POP_ENDR; // Force EOL at EOF
goto finish;
}
level--;
@@ -2246,7 +2252,6 @@ void lexer_CaptureRept(char **capture, size_t *size)
for (;;) {
if (c == EOF) {
error("Unterminated REPT/FOR block\n");
lexerState->capturing = false;
goto finish;
} else if (c == '\n' || c == '\r') {
if (c == '\r' && peek(0) == '\n')
@@ -2258,76 +2263,72 @@ void lexer_CaptureRept(char **capture, size_t *size)
}
finish:
assert(!lexerState->capturing);
*capture = captureStart;
*size = lexerState->captureSize;
capture->body = captureStart;
capture->size = lexerState->captureSize;
lexerState->capturing = false;
lexerState->captureBuf = NULL;
lexerState->disableMacroArgs = false;
lexerState->disableInterpolation = false;
lexerState->atLineStart = false;
}
void lexer_CaptureMacroBody(char **capture, size_t *size)
void lexer_CaptureMacroBody(struct CaptureBody *capture)
{
capture->lineNo = lexer_GetLineNo();
char *captureStart = startCapture();
int c = peek(0);
int c;
/* If the file is `mmap`ed, we need not to unmap it to keep access to the macro */
if (lexerState->isMmapped)
lexerState->isReferenced = true;
/*
* Due to parser internals, it does not read the EOL after the T_POP_MACRO before calling
* this. Thus, we need to keep one in the buffer afterwards.
* (Note that this also means the captured buffer begins with a newline and maybe comment)
* Due to parser internals, it reads the EOL after the expression before calling this.
* Thus, we don't need to keep one in the buffer afterwards.
* The following assertion checks that.
*/
assert(!lexerState->atLineStart);
assert(lexerState->atLineStart);
for (;;) {
/* Just consume characters until EOL or EOF */
for (;;) {
if (c == EOF) {
error("Unterminated macro definition\n");
lexerState->capturing = false;
goto finish;
} else if (c == '\n') {
break;
} else if (c == '\r') {
if (peek(0) == '\n')
shiftChars(1);
break;
}
c = nextChar();
}
/* We're at line start, attempt to match a `label: MACRO` line or `ENDM` token */
nextLine();
/* We're at line start, so attempt to match an `ENDM` token */
do { /* Discard initial whitespace */
c = nextChar();
} while (isWhitespace(c));
/* Now, try to match `ENDM` as a **whole** identifier */
if (startsIdentifier(c)) {
if (readIdentifier(c) == T_POP_ENDM) {
/* Read (but don't capture) until EOL or EOF */
lexerState->capturing = false;
do {
c = peek(0);
if (c == EOF || c == '\r' || c == '\n')
break;
shiftChars(1);
} while (c != EOF && c != '\r' && c != '\n');
/* Handle Windows CRLF */
if (c == '\r' && peek(1) == '\n')
shiftChars(1);
switch (readIdentifier(c)) {
case T_POP_ENDM:
/*
* The ENDM has been captured, but we don't want it!
* We know we have read exactly "ENDM", not e.g. an EQUS
*/
lexerState->captureSize -= strlen("ENDM");
lexerState->lastToken = T_POP_ENDM; // Force EOL at EOF
goto finish;
}
}
nextLine();
/* Just consume characters until EOL or EOF */
for (;;) {
if (c == EOF) {
error("Unterminated macro definition\n");
goto finish;
} else if (c == '\n' || c == '\r') {
if (c == '\r' && peek(0) == '\n')
shiftChars(1);
break;
}
c = nextChar();
}
}
finish:
assert(!lexerState->capturing);
*capture = captureStart;
*size = lexerState->captureSize - strlen("ENDM");
capture->body = captureStart;
capture->size = lexerState->captureSize;
lexerState->capturing = false;
lexerState->captureBuf = NULL;
lexerState->disableMacroArgs = false;
lexerState->disableInterpolation = false;
lexerState->atLineStart = false;
}

View File

@@ -36,9 +36,11 @@
#include "linkdefs.h"
#include "platform.h" // strncasecmp, strdup
uint32_t nListCountEmpty;
int32_t nPCOffset;
bool executeElseBlock; /* If this is set, ELIFs cannot be executed anymore */
int32_t nPCOffset; /* Read by rpn_Symbol */
static uint32_t nListCountEmpty;
static bool executeElseBlock; /* If this is set, ELIFs cannot be executed anymore */
static struct CaptureBody captureBody; /* Captures a REPT/FOR or MACRO */
static void upperstring(char *dest, char const *src)
{
@@ -596,17 +598,21 @@ line : label T_NEWLINE
| label cpu_command T_NEWLINE
| label macro T_NEWLINE
| label simple_pseudoop T_NEWLINE
| pseudoop T_NEWLINE
| conditional /* May not necessarily be followed by a newline, see below */
| assignment_pseudoop T_NEWLINE
| entire_line /* Commands that manage newlines themselves */
;
/*
* For "logistical" reasons, conditionals must manage newlines themselves.
* For "logistical" reasons, these commands must manage newlines themselves.
* This is because we need to switch the lexer's mode *after* the newline has been read,
* and to avoid causing some grammar conflicts (token reducing is finicky).
* This is DEFINITELY one of the more FRAGILE parts of the codebase, handle with care.
*/
conditional : if
entire_line : macrodef
| rept
| for
| break
| if
/* It's important that all of these require being at line start for `skipIfBlock` */
| elif
| else
@@ -699,13 +705,13 @@ macroargs : /* empty */ {
}
;
pseudoop : equ
/* These commands start with a T_LABEL. */
assignment_pseudoop : equ
| set
| rb
| rw
| rl
| equs
| macrodef
;
simple_pseudoop : include
@@ -733,10 +739,7 @@ simple_pseudoop : include
| pushc
| popc
| load
| rept
| for
| shift
| break
| fail
| warn
| assert
@@ -851,21 +854,18 @@ load : T_POP_LOAD string T_COMMA sectiontype sectorg sectattrs {
| T_POP_ENDL { out_EndLoadSection(); }
;
rept : T_POP_REPT uconst {
uint32_t nDefinitionLineNo = lexer_GetLineNo();
char *body;
size_t size;
lexer_CaptureRept(&body, &size);
fstk_RunRept($2, nDefinitionLineNo, body, size);
rept : T_POP_REPT uconst T_NEWLINE {
lexer_CaptureRept(&captureBody);
} T_NEWLINE {
fstk_RunRept($2, captureBody.lineNo, captureBody.body, captureBody.size);
}
;
for : T_POP_FOR T_ID T_COMMA for_args {
uint32_t nDefinitionLineNo = lexer_GetLineNo();
char *body;
size_t size;
lexer_CaptureRept(&body, &size);
fstk_RunFor($2, $4.start, $4.stop, $4.step, nDefinitionLineNo, body, size);
for : T_POP_FOR T_ID T_COMMA for_args T_NEWLINE {
lexer_CaptureRept(&captureBody);
} T_NEWLINE {
fstk_RunFor($2, $4.start, $4.stop, $4.step, captureBody.lineNo,
captureBody.body, captureBody.size);
}
for_args : const {
@@ -885,18 +885,16 @@ for_args : const {
}
;
break : T_POP_BREAK {
break : T_POP_BREAK T_NEWLINE {
if (fstk_Break())
lexer_SetMode(LEXER_SKIP_TO_ENDR);
}
;
macrodef : T_LABEL T_COLON T_POP_MACRO {
int32_t nDefinitionLineNo = lexer_GetLineNo();
char *body;
size_t size;
lexer_CaptureMacroBody(&body, &size);
sym_AddMacro($1, nDefinitionLineNo, body, size);
macrodef : T_LABEL T_COLON T_POP_MACRO T_NEWLINE {
lexer_CaptureMacroBody(&captureBody);
} T_NEWLINE {
sym_AddMacro($1, captureBody.lineNo, captureBody.body, captureBody.size);
}
;