Re-implement block copy to avoid expanding macro args

They were expanded during the capture, and there was no easy way to avoid expanding them (believe me, after three hours and somehow an OOM, I gave up trying).
2026-03-03 11:33:04 +00:00 · 2020-08-01 17:50:59 +02:00
parent 6e805cd318
commit 81a77a9b88
3 changed files with 163 additions and 38 deletions
--- a/include/asm/lexer.h
+++ b/include/asm/lexer.h
@@ -51,7 +51,7 @@ uint32_t lexer_GetLineNo(void);
 uint32_t lexer_GetColNo(void);
 void lexer_DumpStringExpansions(void);
 int yylex(void);
-void lexer_CaptureBlock(int blockStartToken, int blockEndToken, char **capture, size_t *size,
+void lexer_CaptureRept(char **capture, size_t *size);
-			char const *name);
+void lexer_CaptureMacroBody(char **capture, size_t *size);
 #endif /* RGBDS_ASM_LEXER_H */
--- a/src/asm/asmy.y
+++ b/src/asm/asmy.y
@@ -599,9 +599,8 @@ rept		: T_POP_REPT uconst {
 			uint32_t nDefinitionLineNo = lexer_GetLineNo();
 			char *body;
 			size_t size;
-			lexer_CaptureBlock(T_POP_REPT, T_POP_ENDR, &body, &size,
+			lexer_CaptureRept(&body, &size);
-					   "REPT block");
+			fstk_RunRept($2, nDefinitionLineNo, body, size);
 			fstk_RunRept($2, nDefinitionLineNo, body, size - strlen("ENDR"));
 		}
 ;
@@ -609,9 +608,8 @@ macrodef	: T_LABEL ':' T_POP_MACRO {
 			int32_t nDefinitionLineNo = lexer_GetLineNo();
 			char *body;
 			size_t size;
-			lexer_CaptureBlock(T_POP_MACRO, T_POP_ENDM, &body, &size,
+			lexer_CaptureMacroBody(&body, &size);
-					   "macro definition");
+			sym_AddMacro($1, nDefinitionLineNo, body, size);
 			sym_AddMacro($1, nDefinitionLineNo, body, size - strlen("ENDM"));
 		}
 ;
--- a/src/asm/lexer.c
+++ b/src/asm/lexer.c
@@ -626,19 +626,19 @@ static int peek(uint8_t distance)
 		/* Do not perform expansions while capturing */
 		if (!lexerState->capturing) {
-			/* Scan the newly-inserted chars for any macro args */
+			/* Scan the new chars for any macro args */
-			bool escaped = false;
+#define BUF_OFS (lexerState->offset + lexerState->nbChars)
 			while (lexerState->nbChars <= distance) {
 				char c = lexerState->ptr[BUF_OFS];
-			while (lexerState->nbChars < distance && !escaped) {
+				lexerState->nbChars++;
-				char c = lexerState->ptr[lexerState->offset
+				if (c == '\\') {
-							 + lexerState->nbChars++];
+					if (lexerState->size <= BUF_OFS)
-
+						break; /* This was the last char in the buffer */
-				if (escaped) {
+					c = lexerState->ptr[BUF_OFS];
-					escaped = false;
+					lexerState->nbChars++;
 					if ((c >= '1' && c <= '9') || c == '@')
 						fatalerror("Macro arg expansion is not implemented yet\n");
 				} else if (c == '\\') {
 					escaped = true;
 				}
 			}
 		}
@@ -774,13 +774,13 @@ nextExpansion:
 	if (lexerState->isMmapped) {
 		lexerState->offset += distance;
 	} else {
 		lexerState->nbChars -= distance;
 		lexerState->index += distance;
 		/* Wrap around if necessary */
 		if (lexerState->index >= LEXER_BUF_SIZE)
 			lexerState->index %= LEXER_BUF_SIZE;
 	}
 	lexerState->nbChars -= distance;
 	lexerState->colNo += distance;
 }
@@ -975,6 +975,11 @@ static void readGfxConstant(void)
 /* Function to read identifiers & keywords */
 static bool startsIdentifier(int c)
 {
 	return (c <= 'Z' && c >= 'A') || (c <= 'z' && c >= 'a') || c == '.' || c == '_';
 }
 static int readIdentifier(char firstChar)
 {
 	/* Lex while checking for a keyword */
@@ -1449,9 +1454,7 @@ static int yylex_NORMAL(void)
 		/* Handle identifiers... or error out */
 		default:
-			if ((c <= 'Z' && c >= 'A')
+			if (startsIdentifier(c)) {
 			 || (c <= 'z' && c >= 'a')
 			 || c == '.' || c == '_') {
 				int tokenType = readIdentifier(c);
 				/* If a keyword, don't try to expand */
@@ -1672,39 +1675,163 @@ restart:
 	return token;
 }
-void lexer_CaptureBlock(int blockStartToken, int blockEndToken, char **capture, size_t *size,
+static char *startCapture(void)
 			char const *name)
 {
 	assert(!lexerState->expansions);
 	lexerState->capturing = true;
 	lexerState->captureSize = 0;
 	unsigned int level = 0;
 	char *captureStart;
 	if (lexerState->isMmapped) {
-		captureStart = &lexerState->ptr[lexerState->offset];
+		return &lexerState->ptr[lexerState->offset];
 	} else {
 		lexerState->captureCapacity = 128; /* The initial size will be twice that */
 		reallocCaptureBuf();
-		captureStart = lexerState->captureBuf;
+		return lexerState->captureBuf;
 	}
 }
 void lexer_CaptureRept(char **capture, size_t *size)
 {
 	char *captureStart = startCapture();
 	unsigned int level = 0;
 	int c;
 	/*
 	 * Due to parser internals, it reads the EOL after the expression before calling this.
 	 * Thus, we don't need to keep one in the buffer afterwards.
 	 * The following assertion checks that.
 	 */
 	assert(lexerState->atLineStart);
 	for (;;) {
-		int token = yylex();
+		/* We're at line start, so attempt to match a `REPT` or `ENDR` token */
-
+		do { /* Discard initial whitespace */
-		if (level == 0 && token == blockEndToken)
+			c = nextChar();
 		} while (isWhitespace(c));
 		/* Now, try to match either `REPT` or `ENDR` as a **whole** identifier */
 		if (startsIdentifier(c)) {
 			switch (readIdentifier(c)) {
 			case T_POP_REPT:
 				level++;
 				/* Ignore the rest of that line */
 				break;
-		if (token == EOF)
+			case T_POP_ENDR:
-			error("Unterminated %s\n", name);
+				if (!level) {
-		else if (token == blockStartToken)
+					/* Read (but don't capture) until EOL or EOF */
-			level++;
+					lexerState->capturing = false;
-		else if (token == blockEndToken)
+					do {
 						c = nextChar();
 					} while (c != EOF && c != '\r' && c != '\n');
 					/* Handle Windows CRLF */
 					if (c == '\r' && peek(0) == '\n')
 						shiftChars(1);
 					goto finish;
 				}
 				level--;
 			}
 		}
 		lexerState->lineNo++;
 		/* Just consume characters until EOL or EOF */
 		for (;;) {
 			if (c == EOF) {
 				error("Unterminated REPT block\n");
 				goto finish;
 			} else if (c == '\n') {
 				break;
 			} else if (c == '\r') {
 				if (peek(0) == '\n')
 					shiftChars(1);
 				break;
 			}
 			c = nextChar();
 		}
 	}
 finish:
 	*capture = captureStart;
-	*size = lexerState->captureSize;
+	*size = lexerState->captureSize - strlen("ENDR");
 	lexerState->captureBuf = NULL;
 }
 void lexer_CaptureMacroBody(char **capture, size_t *size)
 {
 	char *captureStart = startCapture();
 	unsigned int level = 0;
 	int c = peek(0);
 	/*
 	 * Due to parser internals, it does not read the EOL after the T_POP_MACRO before calling
 	 * this. Thus, we need to keep one in the buffer afterwards.
 	 * (Note that this also means the captured buffer begins with a newline and maybe comment)
 	 * The following assertion checks that.
 	 */
 	assert(!lexerState->atLineStart);
 	for (;;) {
 		/* Just consume characters until EOL or EOF */
 		for (;;) {
 			if (c == EOF) {
 				error("Unterminated macro definition\n");
 				goto finish;
 			} else if (c == '\n') {
 				break;
 			} else if (c == '\r') {
 				if (peek(0) == '\n')
 					shiftChars(1);
 				break;
 			}
 			c = nextChar();
 		}
 		/* We're at line start, attempt to match a `label: MACRO` line or `ENDM` token */
 		do { /* Discard initial whitespace */
 			c = nextChar();
 		} while (isWhitespace(c));
 		/* Now, try to match either `REPT` or `ENDR` as a **whole** identifier */
 		if (startsIdentifier(c)) {
 			switch (readIdentifier(c)) {
 			case T_ID:
 				/* We have an initial label, look for a single colon */
 				do {
 					c = nextChar();
 				} while (isWhitespace(c));
 				if (c != ':') /* If not a colon, give up */
 					break;
 				/* And finally, a `MACRO` token */
 				do {
 					c = nextChar();
 				} while (isWhitespace(c));
 				if (!startsIdentifier(c))
 					break;
 				if (readIdentifier(c) != T_POP_MACRO)
 					break;
 				level++;
 				break;
 			case T_POP_ENDM:
 				if (!level) {
 					/* Read (but don't capture) until EOL or EOF */
 					lexerState->capturing = false;
 					do {
 						c = peek(0);
 						if (c == EOF || c == '\r' || c == '\n')
 							break;
 						shiftChars(1);
 					} while (c != EOF && c != '\r' && c != '\n');
 					/* Handle Windows CRLF */
 					if (c == '\r' && peek(1) == '\n')
 						shiftChars(1);
 					goto finish;
 				}
 				level--;
 			}
 		}
 		lexerState->lineNo++;
 	}
 finish:
 	*capture = captureStart;
 	*size = lexerState->captureSize - strlen("ENDM");
 	lexerState->captureBuf = NULL;
 }