mirror of
https://github.com/gbdev/rgbds.git
synced 2025-11-20 18:22:07 +00:00
@@ -63,12 +63,18 @@ enum LexerMode {
|
||||
void lexer_SetMode(enum LexerMode mode);
|
||||
void lexer_ToggleStringExpansion(bool enable);
|
||||
|
||||
struct CaptureBody {
|
||||
uint32_t lineNo;
|
||||
char *body;
|
||||
size_t size;
|
||||
};
|
||||
|
||||
char const *lexer_GetFileName(void);
|
||||
uint32_t lexer_GetLineNo(void);
|
||||
uint32_t lexer_GetColNo(void);
|
||||
void lexer_DumpStringExpansions(void);
|
||||
int yylex(void);
|
||||
void lexer_CaptureRept(char **capture, size_t *size);
|
||||
void lexer_CaptureMacroBody(char **capture, size_t *size);
|
||||
void lexer_CaptureRept(struct CaptureBody *capture);
|
||||
void lexer_CaptureMacroBody(struct CaptureBody *capture);
|
||||
|
||||
#endif /* RGBDS_ASM_LEXER_H */
|
||||
|
||||
@@ -413,9 +413,8 @@ void fstk_RunMacro(char const *macroName, struct MacroArgs *args)
|
||||
memcpy(dest, macro->name, macroNameLen + 1);
|
||||
|
||||
newContext((struct FileStackNode *)fileInfo);
|
||||
/* Line minus 1 because buffer begins with a newline */
|
||||
contextStack->lexerState = lexer_OpenFileView(macro->macro, macro->macroSize,
|
||||
macro->fileLine - 1);
|
||||
macro->fileLine);
|
||||
if (!contextStack->lexerState)
|
||||
fatalerror("Failed to set up lexer for macro invocation\n");
|
||||
lexer_SetStateAtEOL(contextStack->lexerState);
|
||||
|
||||
107
src/asm/lexer.c
107
src/asm/lexer.c
@@ -995,10 +995,21 @@ static void discardBlockComment(void)
|
||||
lexerState->disableMacroArgs = true;
|
||||
lexerState->disableInterpolation = true;
|
||||
for (;;) {
|
||||
switch (nextChar()) {
|
||||
int c = nextChar();
|
||||
|
||||
switch (c) {
|
||||
case EOF:
|
||||
error("Unterminated block comment\n");
|
||||
goto finish;
|
||||
case '\r':
|
||||
/* Handle CRLF before nextLine() since shiftChars updates colNo */
|
||||
if (peek(0) == '\n')
|
||||
shiftChars(1);
|
||||
/* fallthrough */
|
||||
case '\n':
|
||||
if (!lexerState->expansions || lexerState->expansions->distance)
|
||||
nextLine();
|
||||
continue;
|
||||
case '/':
|
||||
if (peek(0) == '*') {
|
||||
warning(WARNING_NESTED_COMMENT,
|
||||
@@ -2194,8 +2205,10 @@ static char *startCapture(void)
|
||||
}
|
||||
}
|
||||
|
||||
void lexer_CaptureRept(char **capture, size_t *size)
|
||||
void lexer_CaptureRept(struct CaptureBody *capture)
|
||||
{
|
||||
capture->lineNo = lexer_GetLineNo();
|
||||
|
||||
char *captureStart = startCapture();
|
||||
unsigned int level = 0;
|
||||
int c;
|
||||
@@ -2228,14 +2241,7 @@ void lexer_CaptureRept(char **capture, size_t *size)
|
||||
* We know we have read exactly "ENDR", not e.g. an EQUS
|
||||
*/
|
||||
lexerState->captureSize -= strlen("ENDR");
|
||||
/* Read (but don't capture) until EOL or EOF */
|
||||
lexerState->capturing = false;
|
||||
do {
|
||||
c = nextChar();
|
||||
} while (c != EOF && c != '\r' && c != '\n');
|
||||
/* Handle Windows CRLF */
|
||||
if (c == '\r' && peek(0) == '\n')
|
||||
shiftChars(1);
|
||||
lexerState->lastToken = T_POP_ENDR; // Force EOL at EOF
|
||||
goto finish;
|
||||
}
|
||||
level--;
|
||||
@@ -2246,7 +2252,6 @@ void lexer_CaptureRept(char **capture, size_t *size)
|
||||
for (;;) {
|
||||
if (c == EOF) {
|
||||
error("Unterminated REPT/FOR block\n");
|
||||
lexerState->capturing = false;
|
||||
goto finish;
|
||||
} else if (c == '\n' || c == '\r') {
|
||||
if (c == '\r' && peek(0) == '\n')
|
||||
@@ -2258,76 +2263,72 @@ void lexer_CaptureRept(char **capture, size_t *size)
|
||||
}
|
||||
|
||||
finish:
|
||||
assert(!lexerState->capturing);
|
||||
*capture = captureStart;
|
||||
*size = lexerState->captureSize;
|
||||
capture->body = captureStart;
|
||||
capture->size = lexerState->captureSize;
|
||||
lexerState->capturing = false;
|
||||
lexerState->captureBuf = NULL;
|
||||
lexerState->disableMacroArgs = false;
|
||||
lexerState->disableInterpolation = false;
|
||||
lexerState->atLineStart = false;
|
||||
}
|
||||
|
||||
void lexer_CaptureMacroBody(char **capture, size_t *size)
|
||||
void lexer_CaptureMacroBody(struct CaptureBody *capture)
|
||||
{
|
||||
capture->lineNo = lexer_GetLineNo();
|
||||
|
||||
char *captureStart = startCapture();
|
||||
int c = peek(0);
|
||||
int c;
|
||||
|
||||
/* If the file is `mmap`ed, we need not to unmap it to keep access to the macro */
|
||||
if (lexerState->isMmapped)
|
||||
lexerState->isReferenced = true;
|
||||
|
||||
/*
|
||||
* Due to parser internals, it does not read the EOL after the T_POP_MACRO before calling
|
||||
* this. Thus, we need to keep one in the buffer afterwards.
|
||||
* (Note that this also means the captured buffer begins with a newline and maybe comment)
|
||||
* Due to parser internals, it reads the EOL after the expression before calling this.
|
||||
* Thus, we don't need to keep one in the buffer afterwards.
|
||||
* The following assertion checks that.
|
||||
*/
|
||||
assert(!lexerState->atLineStart);
|
||||
assert(lexerState->atLineStart);
|
||||
for (;;) {
|
||||
/* Just consume characters until EOL or EOF */
|
||||
for (;;) {
|
||||
if (c == EOF) {
|
||||
error("Unterminated macro definition\n");
|
||||
lexerState->capturing = false;
|
||||
goto finish;
|
||||
} else if (c == '\n') {
|
||||
break;
|
||||
} else if (c == '\r') {
|
||||
if (peek(0) == '\n')
|
||||
shiftChars(1);
|
||||
break;
|
||||
}
|
||||
c = nextChar();
|
||||
}
|
||||
|
||||
/* We're at line start, attempt to match a `label: MACRO` line or `ENDM` token */
|
||||
nextLine();
|
||||
/* We're at line start, so attempt to match an `ENDM` token */
|
||||
do { /* Discard initial whitespace */
|
||||
c = nextChar();
|
||||
} while (isWhitespace(c));
|
||||
/* Now, try to match `ENDM` as a **whole** identifier */
|
||||
if (startsIdentifier(c)) {
|
||||
if (readIdentifier(c) == T_POP_ENDM) {
|
||||
/* Read (but don't capture) until EOL or EOF */
|
||||
lexerState->capturing = false;
|
||||
do {
|
||||
c = peek(0);
|
||||
if (c == EOF || c == '\r' || c == '\n')
|
||||
break;
|
||||
shiftChars(1);
|
||||
} while (c != EOF && c != '\r' && c != '\n');
|
||||
/* Handle Windows CRLF */
|
||||
if (c == '\r' && peek(1) == '\n')
|
||||
shiftChars(1);
|
||||
switch (readIdentifier(c)) {
|
||||
case T_POP_ENDM:
|
||||
/*
|
||||
* The ENDM has been captured, but we don't want it!
|
||||
* We know we have read exactly "ENDM", not e.g. an EQUS
|
||||
*/
|
||||
lexerState->captureSize -= strlen("ENDM");
|
||||
lexerState->lastToken = T_POP_ENDM; // Force EOL at EOF
|
||||
goto finish;
|
||||
}
|
||||
}
|
||||
nextLine();
|
||||
|
||||
/* Just consume characters until EOL or EOF */
|
||||
for (;;) {
|
||||
if (c == EOF) {
|
||||
error("Unterminated macro definition\n");
|
||||
goto finish;
|
||||
} else if (c == '\n' || c == '\r') {
|
||||
if (c == '\r' && peek(0) == '\n')
|
||||
shiftChars(1);
|
||||
break;
|
||||
}
|
||||
c = nextChar();
|
||||
}
|
||||
}
|
||||
|
||||
finish:
|
||||
assert(!lexerState->capturing);
|
||||
*capture = captureStart;
|
||||
*size = lexerState->captureSize - strlen("ENDM");
|
||||
capture->body = captureStart;
|
||||
capture->size = lexerState->captureSize;
|
||||
lexerState->capturing = false;
|
||||
lexerState->captureBuf = NULL;
|
||||
lexerState->disableMacroArgs = false;
|
||||
lexerState->disableInterpolation = false;
|
||||
lexerState->atLineStart = false;
|
||||
}
|
||||
|
||||
@@ -36,9 +36,11 @@
|
||||
#include "linkdefs.h"
|
||||
#include "platform.h" // strncasecmp, strdup
|
||||
|
||||
uint32_t nListCountEmpty;
|
||||
int32_t nPCOffset;
|
||||
bool executeElseBlock; /* If this is set, ELIFs cannot be executed anymore */
|
||||
int32_t nPCOffset; /* Read by rpn_Symbol */
|
||||
|
||||
static uint32_t nListCountEmpty;
|
||||
static bool executeElseBlock; /* If this is set, ELIFs cannot be executed anymore */
|
||||
static struct CaptureBody captureBody; /* Captures a REPT/FOR or MACRO */
|
||||
|
||||
static void upperstring(char *dest, char const *src)
|
||||
{
|
||||
@@ -596,17 +598,21 @@ line : label T_NEWLINE
|
||||
| label cpu_command T_NEWLINE
|
||||
| label macro T_NEWLINE
|
||||
| label simple_pseudoop T_NEWLINE
|
||||
| pseudoop T_NEWLINE
|
||||
| conditional /* May not necessarily be followed by a newline, see below */
|
||||
| assignment_pseudoop T_NEWLINE
|
||||
| entire_line /* Commands that manage newlines themselves */
|
||||
;
|
||||
|
||||
/*
|
||||
* For "logistical" reasons, conditionals must manage newlines themselves.
|
||||
* For "logistical" reasons, these commands must manage newlines themselves.
|
||||
* This is because we need to switch the lexer's mode *after* the newline has been read,
|
||||
* and to avoid causing some grammar conflicts (token reducing is finicky).
|
||||
* This is DEFINITELY one of the more FRAGILE parts of the codebase, handle with care.
|
||||
*/
|
||||
conditional : if
|
||||
entire_line : macrodef
|
||||
| rept
|
||||
| for
|
||||
| break
|
||||
| if
|
||||
/* It's important that all of these require being at line start for `skipIfBlock` */
|
||||
| elif
|
||||
| else
|
||||
@@ -699,13 +705,13 @@ macroargs : /* empty */ {
|
||||
}
|
||||
;
|
||||
|
||||
pseudoop : equ
|
||||
/* These commands start with a T_LABEL. */
|
||||
assignment_pseudoop : equ
|
||||
| set
|
||||
| rb
|
||||
| rw
|
||||
| rl
|
||||
| equs
|
||||
| macrodef
|
||||
;
|
||||
|
||||
simple_pseudoop : include
|
||||
@@ -733,10 +739,7 @@ simple_pseudoop : include
|
||||
| pushc
|
||||
| popc
|
||||
| load
|
||||
| rept
|
||||
| for
|
||||
| shift
|
||||
| break
|
||||
| fail
|
||||
| warn
|
||||
| assert
|
||||
@@ -851,21 +854,18 @@ load : T_POP_LOAD string T_COMMA sectiontype sectorg sectattrs {
|
||||
| T_POP_ENDL { out_EndLoadSection(); }
|
||||
;
|
||||
|
||||
rept : T_POP_REPT uconst {
|
||||
uint32_t nDefinitionLineNo = lexer_GetLineNo();
|
||||
char *body;
|
||||
size_t size;
|
||||
lexer_CaptureRept(&body, &size);
|
||||
fstk_RunRept($2, nDefinitionLineNo, body, size);
|
||||
rept : T_POP_REPT uconst T_NEWLINE {
|
||||
lexer_CaptureRept(&captureBody);
|
||||
} T_NEWLINE {
|
||||
fstk_RunRept($2, captureBody.lineNo, captureBody.body, captureBody.size);
|
||||
}
|
||||
;
|
||||
|
||||
for : T_POP_FOR T_ID T_COMMA for_args {
|
||||
uint32_t nDefinitionLineNo = lexer_GetLineNo();
|
||||
char *body;
|
||||
size_t size;
|
||||
lexer_CaptureRept(&body, &size);
|
||||
fstk_RunFor($2, $4.start, $4.stop, $4.step, nDefinitionLineNo, body, size);
|
||||
for : T_POP_FOR T_ID T_COMMA for_args T_NEWLINE {
|
||||
lexer_CaptureRept(&captureBody);
|
||||
} T_NEWLINE {
|
||||
fstk_RunFor($2, $4.start, $4.stop, $4.step, captureBody.lineNo,
|
||||
captureBody.body, captureBody.size);
|
||||
}
|
||||
|
||||
for_args : const {
|
||||
@@ -885,18 +885,16 @@ for_args : const {
|
||||
}
|
||||
;
|
||||
|
||||
break : T_POP_BREAK {
|
||||
break : T_POP_BREAK T_NEWLINE {
|
||||
if (fstk_Break())
|
||||
lexer_SetMode(LEXER_SKIP_TO_ENDR);
|
||||
}
|
||||
;
|
||||
|
||||
macrodef : T_LABEL T_COLON T_POP_MACRO {
|
||||
int32_t nDefinitionLineNo = lexer_GetLineNo();
|
||||
char *body;
|
||||
size_t size;
|
||||
lexer_CaptureMacroBody(&body, &size);
|
||||
sym_AddMacro($1, nDefinitionLineNo, body, size);
|
||||
macrodef : T_LABEL T_COLON T_POP_MACRO T_NEWLINE {
|
||||
lexer_CaptureMacroBody(&captureBody);
|
||||
} T_NEWLINE {
|
||||
sym_AddMacro($1, captureBody.lineNo, captureBody.body, captureBody.size);
|
||||
}
|
||||
;
|
||||
|
||||
|
||||
@@ -2,5 +2,5 @@ warning: break.asm(9): [-Wuser]
|
||||
done 5
|
||||
warning: break.asm(17): [-Wuser]
|
||||
OK
|
||||
FATAL: break.asm(18) -> break.asm::REPT~1(23):
|
||||
FATAL: break.asm(18) -> break.asm::REPT~1(22):
|
||||
Ended block with 1 unterminated IF construct
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
; This test tries to pass invalid UTF-8 through a macro argument
|
||||
; to exercise the lexer's reportGarbageChar
|
||||
m:MACRO \1
|
||||
m:MACRO
|
||||
\1
|
||||
ENDM
|
||||
m <EFBFBD><EFBFBD>
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
ERROR: invalid-utf-8.asm(4) -> invalid-utf-8.asm::m(3):
|
||||
ERROR: invalid-utf-8.asm(6) -> invalid-utf-8.asm::m(4):
|
||||
Unknown character 0xCF
|
||||
ERROR: invalid-utf-8.asm(4) -> invalid-utf-8.asm::m(3):
|
||||
ERROR: invalid-utf-8.asm(6) -> invalid-utf-8.asm::m(4):
|
||||
Unknown character 0xD3
|
||||
error: Assembly aborted (2 errors)!
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
warning: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(22): [-Wuser]
|
||||
Nested macros shouldn't work, whose argument would be \1?
|
||||
ERROR: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(25):
|
||||
ERROR: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(24):
|
||||
Unterminated macro definition
|
||||
error: Assembly aborted (1 errors)!
|
||||
ERROR: nested-macrodef.asm(27):
|
||||
syntax error, unexpected identifier, expecting newline
|
||||
error: Assembly aborted (2 errors)!
|
||||
|
||||
7
test/asm/nested-macrodef.simple.err
Normal file
7
test/asm/nested-macrodef.simple.err
Normal file
@@ -0,0 +1,7 @@
|
||||
warning: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(22): [-Wuser]
|
||||
Nested macros shouldn't work, whose argument would be \1?
|
||||
ERROR: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(24):
|
||||
Unterminated macro definition
|
||||
ERROR: nested-macrodef.asm(27):
|
||||
syntax error
|
||||
error: Assembly aborted (2 errors)!
|
||||
Reference in New Issue
Block a user