Handle MACRO and REPT/FOR bodies differently

Fixes #697
2026-03-03 11:33:04 +00:00 · 2021-01-12 21:30:13 -05:00
parent a4ebb87858
commit df16e64fc6
9 changed files with 108 additions and 94 deletions
--- a/include/asm/lexer.h
+++ b/include/asm/lexer.h
@@ -63,12 +63,18 @@ enum LexerMode {
 void lexer_SetMode(enum LexerMode mode);
 void lexer_ToggleStringExpansion(bool enable);
 struct CaptureBody {
 	uint32_t lineNo;
 	char *body;
 	size_t size;
 };
 char const *lexer_GetFileName(void);
 uint32_t lexer_GetLineNo(void);
 uint32_t lexer_GetColNo(void);
 void lexer_DumpStringExpansions(void);
 int yylex(void);
-void lexer_CaptureRept(char **capture, size_t *size);
+void lexer_CaptureRept(struct CaptureBody *capture);
-void lexer_CaptureMacroBody(char **capture, size_t *size);
+void lexer_CaptureMacroBody(struct CaptureBody *capture);
 #endif /* RGBDS_ASM_LEXER_H */
--- a/src/asm/fstack.c
+++ b/src/asm/fstack.c
@@ -413,9 +413,8 @@ void fstk_RunMacro(char const *macroName, struct MacroArgs *args)
 	memcpy(dest, macro->name, macroNameLen + 1);
 	newContext((struct FileStackNode *)fileInfo);
 	/* Line minus 1 because buffer begins with a newline */
 	contextStack->lexerState = lexer_OpenFileView(macro->macro, macro->macroSize,
-						      macro->fileLine - 1);
+						      macro->fileLine);
 	if (!contextStack->lexerState)
 		fatalerror("Failed to set up lexer for macro invocation\n");
 	lexer_SetStateAtEOL(contextStack->lexerState);
--- a/src/asm/lexer.c
+++ b/src/asm/lexer.c
@@ -995,10 +995,21 @@ static void discardBlockComment(void)
 	lexerState->disableMacroArgs = true;
 	lexerState->disableInterpolation = true;
 	for (;;) {
-		switch (nextChar()) {
+		int c = nextChar();
 		switch (c) {
 		case EOF:
 			error("Unterminated block comment\n");
 			goto finish;
 		case '\r':
 			/* Handle CRLF before nextLine() since shiftChars updates colNo */
 			if (peek(0) == '\n')
 				shiftChars(1);
 			/* fallthrough */
 		case '\n':
 			if (!lexerState->expansions || lexerState->expansions->distance)
 				nextLine();
 			continue;
 		case '/':
 			if (peek(0) == '*') {
 				warning(WARNING_NESTED_COMMENT,
@@ -2194,8 +2205,10 @@ static char *startCapture(void)
 	}
 }
-void lexer_CaptureRept(char **capture, size_t *size)
+void lexer_CaptureRept(struct CaptureBody *capture)
 {
 	capture->lineNo = lexer_GetLineNo();
 	char *captureStart = startCapture();
 	unsigned int level = 0;
 	int c;
@@ -2228,14 +2241,7 @@ void lexer_CaptureRept(char **capture, size_t *size)
 					 * We know we have read exactly "ENDR", not e.g. an EQUS
 					 */
 					lexerState->captureSize -= strlen("ENDR");
-					/* Read (but don't capture) until EOL or EOF */
+					lexerState->lastToken = T_POP_ENDR; // Force EOL at EOF
 					lexerState->capturing = false;
 					do {
 						c = nextChar();
 					} while (c != EOF && c != '\r' && c != '\n');
 					/* Handle Windows CRLF */
 					if (c == '\r' && peek(0) == '\n')
 						shiftChars(1);
 					goto finish;
 				}
 				level--;
@@ -2246,7 +2252,6 @@ void lexer_CaptureRept(char **capture, size_t *size)
 		for (;;) {
 			if (c == EOF) {
 				error("Unterminated REPT/FOR block\n");
 				lexerState->capturing = false;
 				goto finish;
 			} else if (c == '\n' || c == '\r') {
 				if (c == '\r' && peek(0) == '\n')
@@ -2258,76 +2263,72 @@ void lexer_CaptureRept(char **capture, size_t *size)
 	}
 finish:
-	assert(!lexerState->capturing);
+	capture->body = captureStart;
-	*capture = captureStart;
+	capture->size = lexerState->captureSize;
-	*size = lexerState->captureSize;
+	lexerState->capturing = false;
 	lexerState->captureBuf = NULL;
 	lexerState->disableMacroArgs = false;
 	lexerState->disableInterpolation = false;
 	lexerState->atLineStart = false;
 }
-void lexer_CaptureMacroBody(char **capture, size_t *size)
+void lexer_CaptureMacroBody(struct CaptureBody *capture)
 {
 	capture->lineNo = lexer_GetLineNo();
 	char *captureStart = startCapture();
-	int c = peek(0);
+	int c;
 	/* If the file is `mmap`ed, we need not to unmap it to keep access to the macro */
 	if (lexerState->isMmapped)
 		lexerState->isReferenced = true;
 	/*
-	 * Due to parser internals, it does not read the EOL after the T_POP_MACRO before calling
+	 * Due to parser internals, it reads the EOL after the expression before calling this.
-	 * this. Thus, we need to keep one in the buffer afterwards.
+	 * Thus, we don't need to keep one in the buffer afterwards.
 	 * (Note that this also means the captured buffer begins with a newline and maybe comment)
 	 * The following assertion checks that.
 	 */
-	assert(!lexerState->atLineStart);
+	assert(lexerState->atLineStart);
 	for (;;) {
-		/* Just consume characters until EOL or EOF */
+		nextLine();
-		for (;;) {
+		/* We're at line start, so attempt to match an `ENDM` token */
 			if (c == EOF) {
 				error("Unterminated macro definition\n");
 				lexerState->capturing = false;
 				goto finish;
 			} else if (c == '\n') {
 				break;
 			} else if (c == '\r') {
 				if (peek(0) == '\n')
 					shiftChars(1);
 				break;
 			}
 			c = nextChar();
 		}
 		/* We're at line start, attempt to match a `label: MACRO` line or `ENDM` token */
 		do { /* Discard initial whitespace */
 			c = nextChar();
 		} while (isWhitespace(c));
 		/* Now, try to match `ENDM` as a **whole** identifier */
 		if (startsIdentifier(c)) {
-			if (readIdentifier(c) == T_POP_ENDM) {
+			switch (readIdentifier(c)) {
-				/* Read (but don't capture) until EOL or EOF */
+			case T_POP_ENDM:
-				lexerState->capturing = false;
+				/*
-				do {
+				 * The ENDM has been captured, but we don't want it!
-					c = peek(0);
+				 * We know we have read exactly "ENDM", not e.g. an EQUS
-					if (c == EOF || c == '\r' || c == '\n')
+				 */
-						break;
+				lexerState->captureSize -= strlen("ENDM");
-					shiftChars(1);
+				lexerState->lastToken = T_POP_ENDM; // Force EOL at EOF
 				} while (c != EOF && c != '\r' && c != '\n');
 				/* Handle Windows CRLF */
 				if (c == '\r' && peek(1) == '\n')
 					shiftChars(1);
 				goto finish;
 			}
 		}
-		nextLine();
+
 		/* Just consume characters until EOL or EOF */
 		for (;;) {
 			if (c == EOF) {
 				error("Unterminated macro definition\n");
 				goto finish;
 			} else if (c == '\n' || c == '\r') {
 				if (c == '\r' && peek(0) == '\n')
 					shiftChars(1);
 				break;
 			}
 			c = nextChar();
 		}
 	}
 finish:
-	assert(!lexerState->capturing);
+	capture->body = captureStart;
-	*capture = captureStart;
+	capture->size = lexerState->captureSize;
-	*size = lexerState->captureSize - strlen("ENDM");
+	lexerState->capturing = false;
 	lexerState->captureBuf = NULL;
 	lexerState->disableMacroArgs = false;
 	lexerState->disableInterpolation = false;
 	lexerState->atLineStart = false;
 }
--- a/src/asm/parser.y
+++ b/src/asm/parser.y
@@ -36,9 +36,11 @@
 #include "linkdefs.h"
 #include "platform.h" // strncasecmp, strdup
-uint32_t nListCountEmpty;
+int32_t nPCOffset; /* Read by rpn_Symbol */
-int32_t nPCOffset;
+
-bool executeElseBlock; /* If this is set, ELIFs cannot be executed anymore */
+static uint32_t nListCountEmpty;
 static bool executeElseBlock; /* If this is set, ELIFs cannot be executed anymore */
 static struct CaptureBody captureBody; /* Captures a REPT/FOR or MACRO */
 static void upperstring(char *dest, char const *src)
 {
@@ -596,17 +598,21 @@ line		: label T_NEWLINE
 		| label cpu_command T_NEWLINE
 		| label macro T_NEWLINE
 		| label simple_pseudoop T_NEWLINE
-		| pseudoop T_NEWLINE
+		| assignment_pseudoop T_NEWLINE
-		| conditional /* May not necessarily be followed by a newline, see below */
+		| entire_line /* Commands that manage newlines themselves */
 ;
 /*
- * For "logistical" reasons, conditionals must manage newlines themselves.
+ * For "logistical" reasons, these commands must manage newlines themselves.
 * This is because we need to switch the lexer's mode *after* the newline has been read,
 * and to avoid causing some grammar conflicts (token reducing is finicky).
 * This is DEFINITELY one of the more FRAGILE parts of the codebase, handle with care.
 */
-conditional	: if
+entire_line	: macrodef
 		| rept
 		| for
 		| break
 		| if
 		/* It's important that all of these require being at line start for `skipIfBlock` */
 		| elif
 		| else
@@ -699,13 +705,13 @@ macroargs	: /* empty */ {
 		}
 ;
-pseudoop	: equ
+/* These commands start with a T_LABEL. */
 assignment_pseudoop	: equ
 		| set
 		| rb
 		| rw
 		| rl
 		| equs
 		| macrodef
 ;
 simple_pseudoop : include
@@ -733,10 +739,7 @@ simple_pseudoop : include
 		| pushc
 		| popc
 		| load
 		| rept
 		| for
 		| shift
 		| break
 		| fail
 		| warn
 		| assert
@@ -851,21 +854,18 @@ load		: T_POP_LOAD string T_COMMA sectiontype sectorg sectattrs {
 		| T_POP_ENDL	{ out_EndLoadSection(); }
 ;
-rept		: T_POP_REPT uconst {
+rept		: T_POP_REPT uconst T_NEWLINE {
-			uint32_t nDefinitionLineNo = lexer_GetLineNo();
+			lexer_CaptureRept(&captureBody);
-			char *body;
+		} T_NEWLINE {
-			size_t size;
+			fstk_RunRept($2, captureBody.lineNo, captureBody.body, captureBody.size);
 			lexer_CaptureRept(&body, &size);
 			fstk_RunRept($2, nDefinitionLineNo, body, size);
 		}
 ;
-for		: T_POP_FOR T_ID T_COMMA for_args {
+for		: T_POP_FOR T_ID T_COMMA for_args T_NEWLINE {
-			uint32_t nDefinitionLineNo = lexer_GetLineNo();
+			lexer_CaptureRept(&captureBody);
-			char *body;
+		} T_NEWLINE {
-			size_t size;
+			fstk_RunFor($2, $4.start, $4.stop, $4.step, captureBody.lineNo,
-			lexer_CaptureRept(&body, &size);
+				    captureBody.body, captureBody.size);
 			fstk_RunFor($2, $4.start, $4.stop, $4.step, nDefinitionLineNo, body, size);
 		}
 for_args	: const {
@@ -885,18 +885,16 @@ for_args	: const {
 		}
 ;
-break		: T_POP_BREAK		{
+break		: T_POP_BREAK T_NEWLINE {
 			if (fstk_Break())
 				lexer_SetMode(LEXER_SKIP_TO_ENDR);
 		}
 ;
-macrodef	: T_LABEL T_COLON T_POP_MACRO {
+macrodef	: T_LABEL T_COLON T_POP_MACRO T_NEWLINE {
-			int32_t nDefinitionLineNo = lexer_GetLineNo();
+			lexer_CaptureMacroBody(&captureBody);
-			char *body;
+		} T_NEWLINE {
-			size_t size;
+			sym_AddMacro($1, captureBody.lineNo, captureBody.body, captureBody.size);
 			lexer_CaptureMacroBody(&body, &size);
 			sym_AddMacro($1, nDefinitionLineNo, body, size);
 		}
 ;
--- a/test/asm/break.err
+++ b/test/asm/break.err
@@ -2,5 +2,5 @@ warning: break.asm(9): [-Wuser]
    done 5
 warning: break.asm(17): [-Wuser]
    OK
-FATAL: break.asm(18) -> break.asm::REPT~1(23):
+FATAL: break.asm(18) -> break.asm::REPT~1(22):
    Ended block with 1 unterminated IF construct
--- a/test/asm/invalid-utf-8.asm
+++ b/test/asm/invalid-utf-8.asm
@@ -1,5 +1,6 @@
 ; This test tries to pass invalid UTF-8 through a macro argument
 ; to exercise the lexer's reportGarbageChar
-m:MACRO \1
+m:MACRO
 	\1
 ENDM
 	m ÏÓ
--- a/test/asm/invalid-utf-8.err
+++ b/test/asm/invalid-utf-8.err
@@ -1,5 +1,5 @@
-ERROR: invalid-utf-8.asm(4) -> invalid-utf-8.asm::m(3):
+ERROR: invalid-utf-8.asm(6) -> invalid-utf-8.asm::m(4):
    Unknown character 0xCF
-ERROR: invalid-utf-8.asm(4) -> invalid-utf-8.asm::m(3):
+ERROR: invalid-utf-8.asm(6) -> invalid-utf-8.asm::m(4):
    Unknown character 0xD3
 error: Assembly aborted (2 errors)!
--- a/test/asm/nested-macrodef.err
+++ b/test/asm/nested-macrodef.err
@@ -1,5 +1,7 @@
 warning: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(22): [-Wuser]
    Nested macros shouldn't work, whose argument would be \1?
-ERROR: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(25):
+ERROR: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(24):
    Unterminated macro definition
-error: Assembly aborted (1 errors)!
+ERROR: nested-macrodef.asm(27):
    syntax error, unexpected identifier, expecting newline
 error: Assembly aborted (2 errors)!
--- a/test/asm/nested-macrodef.simple.err
+++ b/test/asm/nested-macrodef.simple.err
@@ -0,0 +1,7 @@
 warning: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(22): [-Wuser]
    Nested macros shouldn't work, whose argument would be \1?
 ERROR: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(24):
    Unterminated macro definition
 ERROR: nested-macrodef.asm(27):
    syntax error
 error: Assembly aborted (2 errors)!