From 81a77a9b884eb3208ff6c9a38062e184301ae1f6 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Sat, 1 Aug 2020 17:50:59 +0200 Subject: [PATCH] Re-implement block copy to avoid expanding macro args They were expanded during the capture, and there was no easy way to avoid expanding them (believe me, after three hours and somehow an OOM, I gave up trying). --- include/asm/lexer.h | 4 +- src/asm/asmy.y | 10 +-- src/asm/lexer.c | 187 +++++++++++++++++++++++++++++++++++++------- 3 files changed, 163 insertions(+), 38 deletions(-) diff --git a/include/asm/lexer.h b/include/asm/lexer.h index d1cfd08b..0f33c916 100644 --- a/include/asm/lexer.h +++ b/include/asm/lexer.h @@ -51,7 +51,7 @@ uint32_t lexer_GetLineNo(void); uint32_t lexer_GetColNo(void); void lexer_DumpStringExpansions(void); int yylex(void); -void lexer_CaptureBlock(int blockStartToken, int blockEndToken, char **capture, size_t *size, - char const *name); +void lexer_CaptureRept(char **capture, size_t *size); +void lexer_CaptureMacroBody(char **capture, size_t *size); #endif /* RGBDS_ASM_LEXER_H */ diff --git a/src/asm/asmy.y b/src/asm/asmy.y index ac08843d..81093c72 100644 --- a/src/asm/asmy.y +++ b/src/asm/asmy.y @@ -599,9 +599,8 @@ rept : T_POP_REPT uconst { uint32_t nDefinitionLineNo = lexer_GetLineNo(); char *body; size_t size; - lexer_CaptureBlock(T_POP_REPT, T_POP_ENDR, &body, &size, - "REPT block"); - fstk_RunRept($2, nDefinitionLineNo, body, size - strlen("ENDR")); + lexer_CaptureRept(&body, &size); + fstk_RunRept($2, nDefinitionLineNo, body, size); } ; @@ -609,9 +608,8 @@ macrodef : T_LABEL ':' T_POP_MACRO { int32_t nDefinitionLineNo = lexer_GetLineNo(); char *body; size_t size; - lexer_CaptureBlock(T_POP_MACRO, T_POP_ENDM, &body, &size, - "macro definition"); - sym_AddMacro($1, nDefinitionLineNo, body, size - strlen("ENDM")); + lexer_CaptureMacroBody(&body, &size); + sym_AddMacro($1, nDefinitionLineNo, body, size); } ; diff --git a/src/asm/lexer.c b/src/asm/lexer.c index 26c4a107..cfb6ce51 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -626,19 +626,19 @@ static int peek(uint8_t distance) /* Do not perform expansions while capturing */ if (!lexerState->capturing) { - /* Scan the newly-inserted chars for any macro args */ - bool escaped = false; + /* Scan the new chars for any macro args */ +#define BUF_OFS (lexerState->offset + lexerState->nbChars) + while (lexerState->nbChars <= distance) { + char c = lexerState->ptr[BUF_OFS]; - while (lexerState->nbChars < distance && !escaped) { - char c = lexerState->ptr[lexerState->offset - + lexerState->nbChars++]; - - if (escaped) { - escaped = false; + lexerState->nbChars++; + if (c == '\\') { + if (lexerState->size <= BUF_OFS) + break; /* This was the last char in the buffer */ + c = lexerState->ptr[BUF_OFS]; + lexerState->nbChars++; if ((c >= '1' && c <= '9') || c == '@') fatalerror("Macro arg expansion is not implemented yet\n"); - } else if (c == '\\') { - escaped = true; } } } @@ -774,13 +774,13 @@ nextExpansion: if (lexerState->isMmapped) { lexerState->offset += distance; } else { - lexerState->nbChars -= distance; lexerState->index += distance; /* Wrap around if necessary */ if (lexerState->index >= LEXER_BUF_SIZE) lexerState->index %= LEXER_BUF_SIZE; } + lexerState->nbChars -= distance; lexerState->colNo += distance; } @@ -975,6 +975,11 @@ static void readGfxConstant(void) /* Function to read identifiers & keywords */ +static bool startsIdentifier(int c) +{ + return (c <= 'Z' && c >= 'A') || (c <= 'z' && c >= 'a') || c == '.' || c == '_'; +} + static int readIdentifier(char firstChar) { /* Lex while checking for a keyword */ @@ -1449,9 +1454,7 @@ static int yylex_NORMAL(void) /* Handle identifiers... or error out */ default: - if ((c <= 'Z' && c >= 'A') - || (c <= 'z' && c >= 'a') - || c == '.' || c == '_') { + if (startsIdentifier(c)) { int tokenType = readIdentifier(c); /* If a keyword, don't try to expand */ @@ -1672,39 +1675,163 @@ restart: return token; } -void lexer_CaptureBlock(int blockStartToken, int blockEndToken, char **capture, size_t *size, - char const *name) +static char *startCapture(void) { assert(!lexerState->expansions); lexerState->capturing = true; lexerState->captureSize = 0; - unsigned int level = 0; - char *captureStart; if (lexerState->isMmapped) { - captureStart = &lexerState->ptr[lexerState->offset]; + return &lexerState->ptr[lexerState->offset]; } else { lexerState->captureCapacity = 128; /* The initial size will be twice that */ reallocCaptureBuf(); - captureStart = lexerState->captureBuf; + return lexerState->captureBuf; } +} +void lexer_CaptureRept(char **capture, size_t *size) +{ + char *captureStart = startCapture(); + unsigned int level = 0; + int c; + + /* + * Due to parser internals, it reads the EOL after the expression before calling this. + * Thus, we don't need to keep one in the buffer afterwards. + * The following assertion checks that. + */ + assert(lexerState->atLineStart); for (;;) { - int token = yylex(); + /* We're at line start, so attempt to match a `REPT` or `ENDR` token */ + do { /* Discard initial whitespace */ + c = nextChar(); + } while (isWhitespace(c)); + /* Now, try to match either `REPT` or `ENDR` as a **whole** identifier */ + if (startsIdentifier(c)) { + switch (readIdentifier(c)) { + case T_POP_REPT: + level++; + /* Ignore the rest of that line */ + break; - if (level == 0 && token == blockEndToken) - break; + case T_POP_ENDR: + if (!level) { + /* Read (but don't capture) until EOL or EOF */ + lexerState->capturing = false; + do { + c = nextChar(); + } while (c != EOF && c != '\r' && c != '\n'); + /* Handle Windows CRLF */ + if (c == '\r' && peek(0) == '\n') + shiftChars(1); + goto finish; + } + level--; + } + } + lexerState->lineNo++; - if (token == EOF) - error("Unterminated %s\n", name); - else if (token == blockStartToken) - level++; - else if (token == blockEndToken) - level--; + /* Just consume characters until EOL or EOF */ + for (;;) { + if (c == EOF) { + error("Unterminated REPT block\n"); + goto finish; + } else if (c == '\n') { + break; + } else if (c == '\r') { + if (peek(0) == '\n') + shiftChars(1); + break; + } + c = nextChar(); + } } +finish: *capture = captureStart; - *size = lexerState->captureSize; + *size = lexerState->captureSize - strlen("ENDR"); + lexerState->captureBuf = NULL; +} + +void lexer_CaptureMacroBody(char **capture, size_t *size) +{ + char *captureStart = startCapture(); + unsigned int level = 0; + int c = peek(0); + + /* + * Due to parser internals, it does not read the EOL after the T_POP_MACRO before calling + * this. Thus, we need to keep one in the buffer afterwards. + * (Note that this also means the captured buffer begins with a newline and maybe comment) + * The following assertion checks that. + */ + assert(!lexerState->atLineStart); + for (;;) { + /* Just consume characters until EOL or EOF */ + for (;;) { + if (c == EOF) { + error("Unterminated macro definition\n"); + goto finish; + } else if (c == '\n') { + break; + } else if (c == '\r') { + if (peek(0) == '\n') + shiftChars(1); + break; + } + c = nextChar(); + } + + /* We're at line start, attempt to match a `label: MACRO` line or `ENDM` token */ + do { /* Discard initial whitespace */ + c = nextChar(); + } while (isWhitespace(c)); + /* Now, try to match either `REPT` or `ENDR` as a **whole** identifier */ + if (startsIdentifier(c)) { + switch (readIdentifier(c)) { + case T_ID: + /* We have an initial label, look for a single colon */ + do { + c = nextChar(); + } while (isWhitespace(c)); + if (c != ':') /* If not a colon, give up */ + break; + /* And finally, a `MACRO` token */ + do { + c = nextChar(); + } while (isWhitespace(c)); + if (!startsIdentifier(c)) + break; + if (readIdentifier(c) != T_POP_MACRO) + break; + level++; + break; + + case T_POP_ENDM: + if (!level) { + /* Read (but don't capture) until EOL or EOF */ + lexerState->capturing = false; + do { + c = peek(0); + if (c == EOF || c == '\r' || c == '\n') + break; + shiftChars(1); + } while (c != EOF && c != '\r' && c != '\n'); + /* Handle Windows CRLF */ + if (c == '\r' && peek(1) == '\n') + shiftChars(1); + goto finish; + } + level--; + } + } + lexerState->lineNo++; + } + +finish: + *capture = captureStart; + *size = lexerState->captureSize - strlen("ENDM"); lexerState->captureBuf = NULL; }