mirror of
https://github.com/gbdev/rgbds.git
synced 2025-11-20 18:22:07 +00:00
Re-implement block copy to avoid expanding macro args
They were expanded during the capture, and there was no easy way to avoid expanding them (believe me, after three hours and somehow an OOM, I gave up trying).
This commit is contained in:
@@ -51,7 +51,7 @@ uint32_t lexer_GetLineNo(void);
|
|||||||
uint32_t lexer_GetColNo(void);
|
uint32_t lexer_GetColNo(void);
|
||||||
void lexer_DumpStringExpansions(void);
|
void lexer_DumpStringExpansions(void);
|
||||||
int yylex(void);
|
int yylex(void);
|
||||||
void lexer_CaptureBlock(int blockStartToken, int blockEndToken, char **capture, size_t *size,
|
void lexer_CaptureRept(char **capture, size_t *size);
|
||||||
char const *name);
|
void lexer_CaptureMacroBody(char **capture, size_t *size);
|
||||||
|
|
||||||
#endif /* RGBDS_ASM_LEXER_H */
|
#endif /* RGBDS_ASM_LEXER_H */
|
||||||
|
|||||||
@@ -599,9 +599,8 @@ rept : T_POP_REPT uconst {
|
|||||||
uint32_t nDefinitionLineNo = lexer_GetLineNo();
|
uint32_t nDefinitionLineNo = lexer_GetLineNo();
|
||||||
char *body;
|
char *body;
|
||||||
size_t size;
|
size_t size;
|
||||||
lexer_CaptureBlock(T_POP_REPT, T_POP_ENDR, &body, &size,
|
lexer_CaptureRept(&body, &size);
|
||||||
"REPT block");
|
fstk_RunRept($2, nDefinitionLineNo, body, size);
|
||||||
fstk_RunRept($2, nDefinitionLineNo, body, size - strlen("ENDR"));
|
|
||||||
}
|
}
|
||||||
;
|
;
|
||||||
|
|
||||||
@@ -609,9 +608,8 @@ macrodef : T_LABEL ':' T_POP_MACRO {
|
|||||||
int32_t nDefinitionLineNo = lexer_GetLineNo();
|
int32_t nDefinitionLineNo = lexer_GetLineNo();
|
||||||
char *body;
|
char *body;
|
||||||
size_t size;
|
size_t size;
|
||||||
lexer_CaptureBlock(T_POP_MACRO, T_POP_ENDM, &body, &size,
|
lexer_CaptureMacroBody(&body, &size);
|
||||||
"macro definition");
|
sym_AddMacro($1, nDefinitionLineNo, body, size);
|
||||||
sym_AddMacro($1, nDefinitionLineNo, body, size - strlen("ENDM"));
|
|
||||||
}
|
}
|
||||||
;
|
;
|
||||||
|
|
||||||
|
|||||||
185
src/asm/lexer.c
185
src/asm/lexer.c
@@ -626,19 +626,19 @@ static int peek(uint8_t distance)
|
|||||||
|
|
||||||
/* Do not perform expansions while capturing */
|
/* Do not perform expansions while capturing */
|
||||||
if (!lexerState->capturing) {
|
if (!lexerState->capturing) {
|
||||||
/* Scan the newly-inserted chars for any macro args */
|
/* Scan the new chars for any macro args */
|
||||||
bool escaped = false;
|
#define BUF_OFS (lexerState->offset + lexerState->nbChars)
|
||||||
|
while (lexerState->nbChars <= distance) {
|
||||||
|
char c = lexerState->ptr[BUF_OFS];
|
||||||
|
|
||||||
while (lexerState->nbChars < distance && !escaped) {
|
lexerState->nbChars++;
|
||||||
char c = lexerState->ptr[lexerState->offset
|
if (c == '\\') {
|
||||||
+ lexerState->nbChars++];
|
if (lexerState->size <= BUF_OFS)
|
||||||
|
break; /* This was the last char in the buffer */
|
||||||
if (escaped) {
|
c = lexerState->ptr[BUF_OFS];
|
||||||
escaped = false;
|
lexerState->nbChars++;
|
||||||
if ((c >= '1' && c <= '9') || c == '@')
|
if ((c >= '1' && c <= '9') || c == '@')
|
||||||
fatalerror("Macro arg expansion is not implemented yet\n");
|
fatalerror("Macro arg expansion is not implemented yet\n");
|
||||||
} else if (c == '\\') {
|
|
||||||
escaped = true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -774,13 +774,13 @@ nextExpansion:
|
|||||||
if (lexerState->isMmapped) {
|
if (lexerState->isMmapped) {
|
||||||
lexerState->offset += distance;
|
lexerState->offset += distance;
|
||||||
} else {
|
} else {
|
||||||
lexerState->nbChars -= distance;
|
|
||||||
lexerState->index += distance;
|
lexerState->index += distance;
|
||||||
/* Wrap around if necessary */
|
/* Wrap around if necessary */
|
||||||
if (lexerState->index >= LEXER_BUF_SIZE)
|
if (lexerState->index >= LEXER_BUF_SIZE)
|
||||||
lexerState->index %= LEXER_BUF_SIZE;
|
lexerState->index %= LEXER_BUF_SIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
lexerState->nbChars -= distance;
|
||||||
lexerState->colNo += distance;
|
lexerState->colNo += distance;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -975,6 +975,11 @@ static void readGfxConstant(void)
|
|||||||
|
|
||||||
/* Function to read identifiers & keywords */
|
/* Function to read identifiers & keywords */
|
||||||
|
|
||||||
|
static bool startsIdentifier(int c)
|
||||||
|
{
|
||||||
|
return (c <= 'Z' && c >= 'A') || (c <= 'z' && c >= 'a') || c == '.' || c == '_';
|
||||||
|
}
|
||||||
|
|
||||||
static int readIdentifier(char firstChar)
|
static int readIdentifier(char firstChar)
|
||||||
{
|
{
|
||||||
/* Lex while checking for a keyword */
|
/* Lex while checking for a keyword */
|
||||||
@@ -1449,9 +1454,7 @@ static int yylex_NORMAL(void)
|
|||||||
/* Handle identifiers... or error out */
|
/* Handle identifiers... or error out */
|
||||||
|
|
||||||
default:
|
default:
|
||||||
if ((c <= 'Z' && c >= 'A')
|
if (startsIdentifier(c)) {
|
||||||
|| (c <= 'z' && c >= 'a')
|
|
||||||
|| c == '.' || c == '_') {
|
|
||||||
int tokenType = readIdentifier(c);
|
int tokenType = readIdentifier(c);
|
||||||
|
|
||||||
/* If a keyword, don't try to expand */
|
/* If a keyword, don't try to expand */
|
||||||
@@ -1672,39 +1675,163 @@ restart:
|
|||||||
return token;
|
return token;
|
||||||
}
|
}
|
||||||
|
|
||||||
void lexer_CaptureBlock(int blockStartToken, int blockEndToken, char **capture, size_t *size,
|
static char *startCapture(void)
|
||||||
char const *name)
|
|
||||||
{
|
{
|
||||||
assert(!lexerState->expansions);
|
assert(!lexerState->expansions);
|
||||||
|
|
||||||
lexerState->capturing = true;
|
lexerState->capturing = true;
|
||||||
lexerState->captureSize = 0;
|
lexerState->captureSize = 0;
|
||||||
unsigned int level = 0;
|
|
||||||
char *captureStart;
|
|
||||||
|
|
||||||
if (lexerState->isMmapped) {
|
if (lexerState->isMmapped) {
|
||||||
captureStart = &lexerState->ptr[lexerState->offset];
|
return &lexerState->ptr[lexerState->offset];
|
||||||
} else {
|
} else {
|
||||||
lexerState->captureCapacity = 128; /* The initial size will be twice that */
|
lexerState->captureCapacity = 128; /* The initial size will be twice that */
|
||||||
reallocCaptureBuf();
|
reallocCaptureBuf();
|
||||||
captureStart = lexerState->captureBuf;
|
return lexerState->captureBuf;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void lexer_CaptureRept(char **capture, size_t *size)
|
||||||
|
{
|
||||||
|
char *captureStart = startCapture();
|
||||||
|
unsigned int level = 0;
|
||||||
|
int c;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Due to parser internals, it reads the EOL after the expression before calling this.
|
||||||
|
* Thus, we don't need to keep one in the buffer afterwards.
|
||||||
|
* The following assertion checks that.
|
||||||
|
*/
|
||||||
|
assert(lexerState->atLineStart);
|
||||||
for (;;) {
|
for (;;) {
|
||||||
int token = yylex();
|
/* We're at line start, so attempt to match a `REPT` or `ENDR` token */
|
||||||
|
do { /* Discard initial whitespace */
|
||||||
if (level == 0 && token == blockEndToken)
|
c = nextChar();
|
||||||
|
} while (isWhitespace(c));
|
||||||
|
/* Now, try to match either `REPT` or `ENDR` as a **whole** identifier */
|
||||||
|
if (startsIdentifier(c)) {
|
||||||
|
switch (readIdentifier(c)) {
|
||||||
|
case T_POP_REPT:
|
||||||
|
level++;
|
||||||
|
/* Ignore the rest of that line */
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (token == EOF)
|
case T_POP_ENDR:
|
||||||
error("Unterminated %s\n", name);
|
if (!level) {
|
||||||
else if (token == blockStartToken)
|
/* Read (but don't capture) until EOL or EOF */
|
||||||
level++;
|
lexerState->capturing = false;
|
||||||
else if (token == blockEndToken)
|
do {
|
||||||
|
c = nextChar();
|
||||||
|
} while (c != EOF && c != '\r' && c != '\n');
|
||||||
|
/* Handle Windows CRLF */
|
||||||
|
if (c == '\r' && peek(0) == '\n')
|
||||||
|
shiftChars(1);
|
||||||
|
goto finish;
|
||||||
|
}
|
||||||
level--;
|
level--;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
lexerState->lineNo++;
|
||||||
|
|
||||||
|
/* Just consume characters until EOL or EOF */
|
||||||
|
for (;;) {
|
||||||
|
if (c == EOF) {
|
||||||
|
error("Unterminated REPT block\n");
|
||||||
|
goto finish;
|
||||||
|
} else if (c == '\n') {
|
||||||
|
break;
|
||||||
|
} else if (c == '\r') {
|
||||||
|
if (peek(0) == '\n')
|
||||||
|
shiftChars(1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
c = nextChar();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
finish:
|
||||||
*capture = captureStart;
|
*capture = captureStart;
|
||||||
*size = lexerState->captureSize;
|
*size = lexerState->captureSize - strlen("ENDR");
|
||||||
|
lexerState->captureBuf = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
void lexer_CaptureMacroBody(char **capture, size_t *size)
|
||||||
|
{
|
||||||
|
char *captureStart = startCapture();
|
||||||
|
unsigned int level = 0;
|
||||||
|
int c = peek(0);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Due to parser internals, it does not read the EOL after the T_POP_MACRO before calling
|
||||||
|
* this. Thus, we need to keep one in the buffer afterwards.
|
||||||
|
* (Note that this also means the captured buffer begins with a newline and maybe comment)
|
||||||
|
* The following assertion checks that.
|
||||||
|
*/
|
||||||
|
assert(!lexerState->atLineStart);
|
||||||
|
for (;;) {
|
||||||
|
/* Just consume characters until EOL or EOF */
|
||||||
|
for (;;) {
|
||||||
|
if (c == EOF) {
|
||||||
|
error("Unterminated macro definition\n");
|
||||||
|
goto finish;
|
||||||
|
} else if (c == '\n') {
|
||||||
|
break;
|
||||||
|
} else if (c == '\r') {
|
||||||
|
if (peek(0) == '\n')
|
||||||
|
shiftChars(1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
c = nextChar();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We're at line start, attempt to match a `label: MACRO` line or `ENDM` token */
|
||||||
|
do { /* Discard initial whitespace */
|
||||||
|
c = nextChar();
|
||||||
|
} while (isWhitespace(c));
|
||||||
|
/* Now, try to match either `REPT` or `ENDR` as a **whole** identifier */
|
||||||
|
if (startsIdentifier(c)) {
|
||||||
|
switch (readIdentifier(c)) {
|
||||||
|
case T_ID:
|
||||||
|
/* We have an initial label, look for a single colon */
|
||||||
|
do {
|
||||||
|
c = nextChar();
|
||||||
|
} while (isWhitespace(c));
|
||||||
|
if (c != ':') /* If not a colon, give up */
|
||||||
|
break;
|
||||||
|
/* And finally, a `MACRO` token */
|
||||||
|
do {
|
||||||
|
c = nextChar();
|
||||||
|
} while (isWhitespace(c));
|
||||||
|
if (!startsIdentifier(c))
|
||||||
|
break;
|
||||||
|
if (readIdentifier(c) != T_POP_MACRO)
|
||||||
|
break;
|
||||||
|
level++;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case T_POP_ENDM:
|
||||||
|
if (!level) {
|
||||||
|
/* Read (but don't capture) until EOL or EOF */
|
||||||
|
lexerState->capturing = false;
|
||||||
|
do {
|
||||||
|
c = peek(0);
|
||||||
|
if (c == EOF || c == '\r' || c == '\n')
|
||||||
|
break;
|
||||||
|
shiftChars(1);
|
||||||
|
} while (c != EOF && c != '\r' && c != '\n');
|
||||||
|
/* Handle Windows CRLF */
|
||||||
|
if (c == '\r' && peek(1) == '\n')
|
||||||
|
shiftChars(1);
|
||||||
|
goto finish;
|
||||||
|
}
|
||||||
|
level--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
lexerState->lineNo++;
|
||||||
|
}
|
||||||
|
|
||||||
|
finish:
|
||||||
|
*capture = captureStart;
|
||||||
|
*size = lexerState->captureSize - strlen("ENDM");
|
||||||
lexerState->captureBuf = NULL;
|
lexerState->captureBuf = NULL;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user