mirror of
https://github.com/gbdev/rgbds.git
synced 2025-11-20 18:22:07 +00:00
@@ -63,12 +63,18 @@ enum LexerMode {
|
|||||||
void lexer_SetMode(enum LexerMode mode);
|
void lexer_SetMode(enum LexerMode mode);
|
||||||
void lexer_ToggleStringExpansion(bool enable);
|
void lexer_ToggleStringExpansion(bool enable);
|
||||||
|
|
||||||
|
struct CaptureBody {
|
||||||
|
uint32_t lineNo;
|
||||||
|
char *body;
|
||||||
|
size_t size;
|
||||||
|
};
|
||||||
|
|
||||||
char const *lexer_GetFileName(void);
|
char const *lexer_GetFileName(void);
|
||||||
uint32_t lexer_GetLineNo(void);
|
uint32_t lexer_GetLineNo(void);
|
||||||
uint32_t lexer_GetColNo(void);
|
uint32_t lexer_GetColNo(void);
|
||||||
void lexer_DumpStringExpansions(void);
|
void lexer_DumpStringExpansions(void);
|
||||||
int yylex(void);
|
int yylex(void);
|
||||||
void lexer_CaptureRept(char **capture, size_t *size);
|
void lexer_CaptureRept(struct CaptureBody *capture);
|
||||||
void lexer_CaptureMacroBody(char **capture, size_t *size);
|
void lexer_CaptureMacroBody(struct CaptureBody *capture);
|
||||||
|
|
||||||
#endif /* RGBDS_ASM_LEXER_H */
|
#endif /* RGBDS_ASM_LEXER_H */
|
||||||
|
|||||||
@@ -413,9 +413,8 @@ void fstk_RunMacro(char const *macroName, struct MacroArgs *args)
|
|||||||
memcpy(dest, macro->name, macroNameLen + 1);
|
memcpy(dest, macro->name, macroNameLen + 1);
|
||||||
|
|
||||||
newContext((struct FileStackNode *)fileInfo);
|
newContext((struct FileStackNode *)fileInfo);
|
||||||
/* Line minus 1 because buffer begins with a newline */
|
|
||||||
contextStack->lexerState = lexer_OpenFileView(macro->macro, macro->macroSize,
|
contextStack->lexerState = lexer_OpenFileView(macro->macro, macro->macroSize,
|
||||||
macro->fileLine - 1);
|
macro->fileLine);
|
||||||
if (!contextStack->lexerState)
|
if (!contextStack->lexerState)
|
||||||
fatalerror("Failed to set up lexer for macro invocation\n");
|
fatalerror("Failed to set up lexer for macro invocation\n");
|
||||||
lexer_SetStateAtEOL(contextStack->lexerState);
|
lexer_SetStateAtEOL(contextStack->lexerState);
|
||||||
|
|||||||
107
src/asm/lexer.c
107
src/asm/lexer.c
@@ -995,10 +995,21 @@ static void discardBlockComment(void)
|
|||||||
lexerState->disableMacroArgs = true;
|
lexerState->disableMacroArgs = true;
|
||||||
lexerState->disableInterpolation = true;
|
lexerState->disableInterpolation = true;
|
||||||
for (;;) {
|
for (;;) {
|
||||||
switch (nextChar()) {
|
int c = nextChar();
|
||||||
|
|
||||||
|
switch (c) {
|
||||||
case EOF:
|
case EOF:
|
||||||
error("Unterminated block comment\n");
|
error("Unterminated block comment\n");
|
||||||
goto finish;
|
goto finish;
|
||||||
|
case '\r':
|
||||||
|
/* Handle CRLF before nextLine() since shiftChars updates colNo */
|
||||||
|
if (peek(0) == '\n')
|
||||||
|
shiftChars(1);
|
||||||
|
/* fallthrough */
|
||||||
|
case '\n':
|
||||||
|
if (!lexerState->expansions || lexerState->expansions->distance)
|
||||||
|
nextLine();
|
||||||
|
continue;
|
||||||
case '/':
|
case '/':
|
||||||
if (peek(0) == '*') {
|
if (peek(0) == '*') {
|
||||||
warning(WARNING_NESTED_COMMENT,
|
warning(WARNING_NESTED_COMMENT,
|
||||||
@@ -2194,8 +2205,10 @@ static char *startCapture(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void lexer_CaptureRept(char **capture, size_t *size)
|
void lexer_CaptureRept(struct CaptureBody *capture)
|
||||||
{
|
{
|
||||||
|
capture->lineNo = lexer_GetLineNo();
|
||||||
|
|
||||||
char *captureStart = startCapture();
|
char *captureStart = startCapture();
|
||||||
unsigned int level = 0;
|
unsigned int level = 0;
|
||||||
int c;
|
int c;
|
||||||
@@ -2228,14 +2241,7 @@ void lexer_CaptureRept(char **capture, size_t *size)
|
|||||||
* We know we have read exactly "ENDR", not e.g. an EQUS
|
* We know we have read exactly "ENDR", not e.g. an EQUS
|
||||||
*/
|
*/
|
||||||
lexerState->captureSize -= strlen("ENDR");
|
lexerState->captureSize -= strlen("ENDR");
|
||||||
/* Read (but don't capture) until EOL or EOF */
|
lexerState->lastToken = T_POP_ENDR; // Force EOL at EOF
|
||||||
lexerState->capturing = false;
|
|
||||||
do {
|
|
||||||
c = nextChar();
|
|
||||||
} while (c != EOF && c != '\r' && c != '\n');
|
|
||||||
/* Handle Windows CRLF */
|
|
||||||
if (c == '\r' && peek(0) == '\n')
|
|
||||||
shiftChars(1);
|
|
||||||
goto finish;
|
goto finish;
|
||||||
}
|
}
|
||||||
level--;
|
level--;
|
||||||
@@ -2246,7 +2252,6 @@ void lexer_CaptureRept(char **capture, size_t *size)
|
|||||||
for (;;) {
|
for (;;) {
|
||||||
if (c == EOF) {
|
if (c == EOF) {
|
||||||
error("Unterminated REPT/FOR block\n");
|
error("Unterminated REPT/FOR block\n");
|
||||||
lexerState->capturing = false;
|
|
||||||
goto finish;
|
goto finish;
|
||||||
} else if (c == '\n' || c == '\r') {
|
} else if (c == '\n' || c == '\r') {
|
||||||
if (c == '\r' && peek(0) == '\n')
|
if (c == '\r' && peek(0) == '\n')
|
||||||
@@ -2258,76 +2263,72 @@ void lexer_CaptureRept(char **capture, size_t *size)
|
|||||||
}
|
}
|
||||||
|
|
||||||
finish:
|
finish:
|
||||||
assert(!lexerState->capturing);
|
capture->body = captureStart;
|
||||||
*capture = captureStart;
|
capture->size = lexerState->captureSize;
|
||||||
*size = lexerState->captureSize;
|
lexerState->capturing = false;
|
||||||
lexerState->captureBuf = NULL;
|
lexerState->captureBuf = NULL;
|
||||||
lexerState->disableMacroArgs = false;
|
lexerState->disableMacroArgs = false;
|
||||||
lexerState->disableInterpolation = false;
|
lexerState->disableInterpolation = false;
|
||||||
|
lexerState->atLineStart = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void lexer_CaptureMacroBody(char **capture, size_t *size)
|
void lexer_CaptureMacroBody(struct CaptureBody *capture)
|
||||||
{
|
{
|
||||||
|
capture->lineNo = lexer_GetLineNo();
|
||||||
|
|
||||||
char *captureStart = startCapture();
|
char *captureStart = startCapture();
|
||||||
int c = peek(0);
|
int c;
|
||||||
|
|
||||||
/* If the file is `mmap`ed, we need not to unmap it to keep access to the macro */
|
/* If the file is `mmap`ed, we need not to unmap it to keep access to the macro */
|
||||||
if (lexerState->isMmapped)
|
if (lexerState->isMmapped)
|
||||||
lexerState->isReferenced = true;
|
lexerState->isReferenced = true;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Due to parser internals, it does not read the EOL after the T_POP_MACRO before calling
|
* Due to parser internals, it reads the EOL after the expression before calling this.
|
||||||
* this. Thus, we need to keep one in the buffer afterwards.
|
* Thus, we don't need to keep one in the buffer afterwards.
|
||||||
* (Note that this also means the captured buffer begins with a newline and maybe comment)
|
|
||||||
* The following assertion checks that.
|
* The following assertion checks that.
|
||||||
*/
|
*/
|
||||||
assert(!lexerState->atLineStart);
|
assert(lexerState->atLineStart);
|
||||||
for (;;) {
|
for (;;) {
|
||||||
/* Just consume characters until EOL or EOF */
|
nextLine();
|
||||||
for (;;) {
|
/* We're at line start, so attempt to match an `ENDM` token */
|
||||||
if (c == EOF) {
|
|
||||||
error("Unterminated macro definition\n");
|
|
||||||
lexerState->capturing = false;
|
|
||||||
goto finish;
|
|
||||||
} else if (c == '\n') {
|
|
||||||
break;
|
|
||||||
} else if (c == '\r') {
|
|
||||||
if (peek(0) == '\n')
|
|
||||||
shiftChars(1);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
c = nextChar();
|
|
||||||
}
|
|
||||||
|
|
||||||
/* We're at line start, attempt to match a `label: MACRO` line or `ENDM` token */
|
|
||||||
do { /* Discard initial whitespace */
|
do { /* Discard initial whitespace */
|
||||||
c = nextChar();
|
c = nextChar();
|
||||||
} while (isWhitespace(c));
|
} while (isWhitespace(c));
|
||||||
/* Now, try to match `ENDM` as a **whole** identifier */
|
/* Now, try to match `ENDM` as a **whole** identifier */
|
||||||
if (startsIdentifier(c)) {
|
if (startsIdentifier(c)) {
|
||||||
if (readIdentifier(c) == T_POP_ENDM) {
|
switch (readIdentifier(c)) {
|
||||||
/* Read (but don't capture) until EOL or EOF */
|
case T_POP_ENDM:
|
||||||
lexerState->capturing = false;
|
/*
|
||||||
do {
|
* The ENDM has been captured, but we don't want it!
|
||||||
c = peek(0);
|
* We know we have read exactly "ENDM", not e.g. an EQUS
|
||||||
if (c == EOF || c == '\r' || c == '\n')
|
*/
|
||||||
break;
|
lexerState->captureSize -= strlen("ENDM");
|
||||||
shiftChars(1);
|
lexerState->lastToken = T_POP_ENDM; // Force EOL at EOF
|
||||||
} while (c != EOF && c != '\r' && c != '\n');
|
|
||||||
/* Handle Windows CRLF */
|
|
||||||
if (c == '\r' && peek(1) == '\n')
|
|
||||||
shiftChars(1);
|
|
||||||
goto finish;
|
goto finish;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
nextLine();
|
|
||||||
|
/* Just consume characters until EOL or EOF */
|
||||||
|
for (;;) {
|
||||||
|
if (c == EOF) {
|
||||||
|
error("Unterminated macro definition\n");
|
||||||
|
goto finish;
|
||||||
|
} else if (c == '\n' || c == '\r') {
|
||||||
|
if (c == '\r' && peek(0) == '\n')
|
||||||
|
shiftChars(1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
c = nextChar();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
finish:
|
finish:
|
||||||
assert(!lexerState->capturing);
|
capture->body = captureStart;
|
||||||
*capture = captureStart;
|
capture->size = lexerState->captureSize;
|
||||||
*size = lexerState->captureSize - strlen("ENDM");
|
lexerState->capturing = false;
|
||||||
lexerState->captureBuf = NULL;
|
lexerState->captureBuf = NULL;
|
||||||
lexerState->disableMacroArgs = false;
|
lexerState->disableMacroArgs = false;
|
||||||
lexerState->disableInterpolation = false;
|
lexerState->disableInterpolation = false;
|
||||||
|
lexerState->atLineStart = false;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -36,9 +36,11 @@
|
|||||||
#include "linkdefs.h"
|
#include "linkdefs.h"
|
||||||
#include "platform.h" // strncasecmp, strdup
|
#include "platform.h" // strncasecmp, strdup
|
||||||
|
|
||||||
uint32_t nListCountEmpty;
|
int32_t nPCOffset; /* Read by rpn_Symbol */
|
||||||
int32_t nPCOffset;
|
|
||||||
bool executeElseBlock; /* If this is set, ELIFs cannot be executed anymore */
|
static uint32_t nListCountEmpty;
|
||||||
|
static bool executeElseBlock; /* If this is set, ELIFs cannot be executed anymore */
|
||||||
|
static struct CaptureBody captureBody; /* Captures a REPT/FOR or MACRO */
|
||||||
|
|
||||||
static void upperstring(char *dest, char const *src)
|
static void upperstring(char *dest, char const *src)
|
||||||
{
|
{
|
||||||
@@ -596,17 +598,21 @@ line : label T_NEWLINE
|
|||||||
| label cpu_command T_NEWLINE
|
| label cpu_command T_NEWLINE
|
||||||
| label macro T_NEWLINE
|
| label macro T_NEWLINE
|
||||||
| label simple_pseudoop T_NEWLINE
|
| label simple_pseudoop T_NEWLINE
|
||||||
| pseudoop T_NEWLINE
|
| assignment_pseudoop T_NEWLINE
|
||||||
| conditional /* May not necessarily be followed by a newline, see below */
|
| entire_line /* Commands that manage newlines themselves */
|
||||||
;
|
;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For "logistical" reasons, conditionals must manage newlines themselves.
|
* For "logistical" reasons, these commands must manage newlines themselves.
|
||||||
* This is because we need to switch the lexer's mode *after* the newline has been read,
|
* This is because we need to switch the lexer's mode *after* the newline has been read,
|
||||||
* and to avoid causing some grammar conflicts (token reducing is finicky).
|
* and to avoid causing some grammar conflicts (token reducing is finicky).
|
||||||
* This is DEFINITELY one of the more FRAGILE parts of the codebase, handle with care.
|
* This is DEFINITELY one of the more FRAGILE parts of the codebase, handle with care.
|
||||||
*/
|
*/
|
||||||
conditional : if
|
entire_line : macrodef
|
||||||
|
| rept
|
||||||
|
| for
|
||||||
|
| break
|
||||||
|
| if
|
||||||
/* It's important that all of these require being at line start for `skipIfBlock` */
|
/* It's important that all of these require being at line start for `skipIfBlock` */
|
||||||
| elif
|
| elif
|
||||||
| else
|
| else
|
||||||
@@ -699,13 +705,13 @@ macroargs : /* empty */ {
|
|||||||
}
|
}
|
||||||
;
|
;
|
||||||
|
|
||||||
pseudoop : equ
|
/* These commands start with a T_LABEL. */
|
||||||
|
assignment_pseudoop : equ
|
||||||
| set
|
| set
|
||||||
| rb
|
| rb
|
||||||
| rw
|
| rw
|
||||||
| rl
|
| rl
|
||||||
| equs
|
| equs
|
||||||
| macrodef
|
|
||||||
;
|
;
|
||||||
|
|
||||||
simple_pseudoop : include
|
simple_pseudoop : include
|
||||||
@@ -733,10 +739,7 @@ simple_pseudoop : include
|
|||||||
| pushc
|
| pushc
|
||||||
| popc
|
| popc
|
||||||
| load
|
| load
|
||||||
| rept
|
|
||||||
| for
|
|
||||||
| shift
|
| shift
|
||||||
| break
|
|
||||||
| fail
|
| fail
|
||||||
| warn
|
| warn
|
||||||
| assert
|
| assert
|
||||||
@@ -851,21 +854,18 @@ load : T_POP_LOAD string T_COMMA sectiontype sectorg sectattrs {
|
|||||||
| T_POP_ENDL { out_EndLoadSection(); }
|
| T_POP_ENDL { out_EndLoadSection(); }
|
||||||
;
|
;
|
||||||
|
|
||||||
rept : T_POP_REPT uconst {
|
rept : T_POP_REPT uconst T_NEWLINE {
|
||||||
uint32_t nDefinitionLineNo = lexer_GetLineNo();
|
lexer_CaptureRept(&captureBody);
|
||||||
char *body;
|
} T_NEWLINE {
|
||||||
size_t size;
|
fstk_RunRept($2, captureBody.lineNo, captureBody.body, captureBody.size);
|
||||||
lexer_CaptureRept(&body, &size);
|
|
||||||
fstk_RunRept($2, nDefinitionLineNo, body, size);
|
|
||||||
}
|
}
|
||||||
;
|
;
|
||||||
|
|
||||||
for : T_POP_FOR T_ID T_COMMA for_args {
|
for : T_POP_FOR T_ID T_COMMA for_args T_NEWLINE {
|
||||||
uint32_t nDefinitionLineNo = lexer_GetLineNo();
|
lexer_CaptureRept(&captureBody);
|
||||||
char *body;
|
} T_NEWLINE {
|
||||||
size_t size;
|
fstk_RunFor($2, $4.start, $4.stop, $4.step, captureBody.lineNo,
|
||||||
lexer_CaptureRept(&body, &size);
|
captureBody.body, captureBody.size);
|
||||||
fstk_RunFor($2, $4.start, $4.stop, $4.step, nDefinitionLineNo, body, size);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for_args : const {
|
for_args : const {
|
||||||
@@ -885,18 +885,16 @@ for_args : const {
|
|||||||
}
|
}
|
||||||
;
|
;
|
||||||
|
|
||||||
break : T_POP_BREAK {
|
break : T_POP_BREAK T_NEWLINE {
|
||||||
if (fstk_Break())
|
if (fstk_Break())
|
||||||
lexer_SetMode(LEXER_SKIP_TO_ENDR);
|
lexer_SetMode(LEXER_SKIP_TO_ENDR);
|
||||||
}
|
}
|
||||||
;
|
;
|
||||||
|
|
||||||
macrodef : T_LABEL T_COLON T_POP_MACRO {
|
macrodef : T_LABEL T_COLON T_POP_MACRO T_NEWLINE {
|
||||||
int32_t nDefinitionLineNo = lexer_GetLineNo();
|
lexer_CaptureMacroBody(&captureBody);
|
||||||
char *body;
|
} T_NEWLINE {
|
||||||
size_t size;
|
sym_AddMacro($1, captureBody.lineNo, captureBody.body, captureBody.size);
|
||||||
lexer_CaptureMacroBody(&body, &size);
|
|
||||||
sym_AddMacro($1, nDefinitionLineNo, body, size);
|
|
||||||
}
|
}
|
||||||
;
|
;
|
||||||
|
|
||||||
|
|||||||
@@ -2,5 +2,5 @@ warning: break.asm(9): [-Wuser]
|
|||||||
done 5
|
done 5
|
||||||
warning: break.asm(17): [-Wuser]
|
warning: break.asm(17): [-Wuser]
|
||||||
OK
|
OK
|
||||||
FATAL: break.asm(18) -> break.asm::REPT~1(23):
|
FATAL: break.asm(18) -> break.asm::REPT~1(22):
|
||||||
Ended block with 1 unterminated IF construct
|
Ended block with 1 unterminated IF construct
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
; This test tries to pass invalid UTF-8 through a macro argument
|
; This test tries to pass invalid UTF-8 through a macro argument
|
||||||
; to exercise the lexer's reportGarbageChar
|
; to exercise the lexer's reportGarbageChar
|
||||||
m:MACRO \1
|
m:MACRO
|
||||||
|
\1
|
||||||
ENDM
|
ENDM
|
||||||
m <EFBFBD><EFBFBD>
|
m <EFBFBD><EFBFBD>
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
ERROR: invalid-utf-8.asm(4) -> invalid-utf-8.asm::m(3):
|
ERROR: invalid-utf-8.asm(6) -> invalid-utf-8.asm::m(4):
|
||||||
Unknown character 0xCF
|
Unknown character 0xCF
|
||||||
ERROR: invalid-utf-8.asm(4) -> invalid-utf-8.asm::m(3):
|
ERROR: invalid-utf-8.asm(6) -> invalid-utf-8.asm::m(4):
|
||||||
Unknown character 0xD3
|
Unknown character 0xD3
|
||||||
error: Assembly aborted (2 errors)!
|
error: Assembly aborted (2 errors)!
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
warning: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(22): [-Wuser]
|
warning: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(22): [-Wuser]
|
||||||
Nested macros shouldn't work, whose argument would be \1?
|
Nested macros shouldn't work, whose argument would be \1?
|
||||||
ERROR: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(25):
|
ERROR: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(24):
|
||||||
Unterminated macro definition
|
Unterminated macro definition
|
||||||
error: Assembly aborted (1 errors)!
|
ERROR: nested-macrodef.asm(27):
|
||||||
|
syntax error, unexpected identifier, expecting newline
|
||||||
|
error: Assembly aborted (2 errors)!
|
||||||
|
|||||||
7
test/asm/nested-macrodef.simple.err
Normal file
7
test/asm/nested-macrodef.simple.err
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
warning: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(22): [-Wuser]
|
||||||
|
Nested macros shouldn't work, whose argument would be \1?
|
||||||
|
ERROR: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(24):
|
||||||
|
Unterminated macro definition
|
||||||
|
ERROR: nested-macrodef.asm(27):
|
||||||
|
syntax error
|
||||||
|
error: Assembly aborted (2 errors)!
|
||||||
Reference in New Issue
Block a user