diff --git a/CMakeLists.txt b/CMakeLists.txt index f4f234e9..4106b98a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,7 +46,7 @@ if(MSVC) add_definitions(/D_CRT_SECURE_NO_WARNINGS) else() if(DEVELOP) - add_compile_options(-Werror -Wall -Wextra -pedantic + add_compile_options(-Werror -Wall -Wextra -pedantic -Wno-type-limits -Wno-sign-compare -Wformat -Wformat-security -Wformat-overflow=2 -Wformat-truncation=1 -Wformat-y2k -Wswitch-enum -Wunused -Wuninitialized -Wunknown-pragmas -Wstrict-overflow=5 diff --git a/Makefile b/Makefile index f7f82459..fe1cf548 100644 --- a/Makefile +++ b/Makefile @@ -56,7 +56,6 @@ rgbasm_obj := \ src/asm/asmy.o \ src/asm/charmap.o \ src/asm/fstack.o \ - src/asm/globlex.o \ src/asm/lexer.o \ src/asm/macro.o \ src/asm/main.o \ @@ -73,7 +72,7 @@ rgbasm_obj := \ src/hashmap.o \ src/linkdefs.o -src/asm/globlex.o src/asm/lexer.o src/asm/constexpr.o: src/asm/asmy.h +src/asm/lexer.o src/asm/main.o: src/asm/asmy.h rgblink_obj := \ src/link/assign.o \ @@ -187,7 +186,7 @@ checkpatch: # compilation and make the continous integration infrastructure return failure. develop: - $Qenv $(MAKE) -j WARNFLAGS="-Werror -Wall -Wextra -Wpedantic \ + $Qenv $(MAKE) -j WARNFLAGS="-Werror -Wall -Wextra -Wpedantic -Wno-type-limits \ -Wno-sign-compare -Wformat -Wformat-security -Wformat-overflow=2 \ -Wformat-truncation=1 -Wformat-y2k -Wswitch-enum -Wunused \ -Wuninitialized -Wunknown-pragmas -Wstrict-overflow=5 \ @@ -199,7 +198,7 @@ develop: -fsanitize=unreachable -fsanitize=vla-bound \ -fsanitize=signed-integer-overflow -fsanitize=bounds \ -fsanitize=object-size -fsanitize=bool -fsanitize=enum \ - -fsanitize=alignment -fsanitize=null -DDEVELOP" CFLAGS="-g -O0" + -fsanitize=alignment -fsanitize=null -DDEVELOP" CFLAGS="-ggdb3 -O0" # Targets for the project maintainer to easily create Windows exes. # This is not for Windows users! diff --git a/include/asm/asm.h b/include/asm/asm.h index ff5de973..47d7d256 100644 --- a/include/asm/asm.h +++ b/include/asm/asm.h @@ -24,15 +24,8 @@ #define MAXMACROARGS 99999 #define MAXINCPATHS 128 -extern int32_t nLineNo; extern uint32_t nTotalLines; extern uint32_t nIFDepth; -extern bool skipElif; -extern char tzCurrentFileName[_MAX_PATH + 1]; extern struct Section *pCurrentSection; -extern bool oDontExpandStrings; - -size_t symvaluetostring(char *dest, size_t maxLength, char *sym, - const char *mode); #endif /* RGBDS_ASM_ASM_H */ diff --git a/include/asm/fstack.h b/include/asm/fstack.h index 41fe24c9..80e2096d 100644 --- a/include/asm/fstack.h +++ b/include/asm/fstack.h @@ -21,36 +21,59 @@ #include "types.h" -struct MacroArgs; +struct FileStackNode { + struct FileStackNode *parent; /* Pointer to parent node, for error reporting */ + /* Line at which the parent context was exited; meaningless for the root level */ + uint32_t lineNo; -struct sContext { - YY_BUFFER_STATE FlexHandle; - struct Symbol const *pMacro; - struct sContext *next; - char tzFileName[_MAX_PATH + 1]; - struct MacroArgs *macroArgs; - uint32_t uniqueID; - int32_t nLine; - uint32_t nStatus; - FILE *pFile; - char *pREPTBlock; - uint32_t nREPTBlockCount; - uint32_t nREPTBlockSize; - int32_t nREPTBodyFirstLine; - int32_t nREPTBodyLastLine; + struct FileStackNode *next; /* Next node in the output linked list */ + bool referenced; /* If referenced, don't free! */ + uint32_t ID; /* Set only if referenced: ID within the object file, -1 if not output yet */ + + enum { + NODE_REPT, + NODE_FILE, + NODE_MACRO, + } type; }; -extern unsigned int nMaxRecursionDepth; +struct FileStackReptNode { /* NODE_REPT */ + struct FileStackNode node; + uint32_t reptDepth; + /* WARNING: if changing this type, change overflow check in `fstk_Init` */ + uint32_t iters[]; /* REPT iteration counts since last named node, in reverse depth order */ +}; -void fstk_RunInclude(char *tzFileName); -void fstk_Init(char *s); -void fstk_Dump(void); -void fstk_DumpToStr(char *buf, size_t len); -void fstk_DumpStringExpansions(void); -void fstk_AddIncludePath(char *s); -void fstk_RunMacro(char *s, struct MacroArgs *args); -void fstk_RunRept(uint32_t count, int32_t nReptLineNo); -FILE *fstk_FindFile(char const *fname, char **incPathUsed); -int32_t fstk_GetLine(void); +struct FileStackNamedNode { /* NODE_FILE, NODE_MACRO */ + struct FileStackNode node; + char name[]; /* File name for files, file::macro name for macros */ +}; + +extern size_t nMaxRecursionDepth; + +struct MacroArgs; + +void fstk_Dump(struct FileStackNode const *node, uint32_t lineNo); +void fstk_DumpCurrent(void); +struct FileStackNode *fstk_GetFileStack(void); +/* The lifetime of the returned chars is until reaching the end of that file */ +char const *fstk_GetFileName(void); + +void fstk_AddIncludePath(char const *s); +/** + * @param path The user-provided file name + * @param fullPath The address of a pointer, which will be made to point at the full path + * The pointer's value must be a valid argument to `realloc`, including NULL + * @param size Current size of the buffer, or 0 if the pointer is NULL + * @return True if the file was found, false if no path worked + */ +bool fstk_FindFile(char const *path, char **fullPath, size_t *size); + +bool yywrap(void); +void fstk_RunInclude(char const *path); +void fstk_RunMacro(char const *macroName, struct MacroArgs *args); +void fstk_RunRept(uint32_t count, int32_t nReptLineNo, char *body, size_t size); + +void fstk_Init(char const *mainPath, size_t maxRecursionDepth); #endif /* RGBDS_ASM_FSTACK_H */ diff --git a/include/asm/lexer.h b/include/asm/lexer.h index 7d095e53..9494ab6f 100644 --- a/include/asm/lexer.h +++ b/include/asm/lexer.h @@ -9,78 +9,65 @@ #ifndef RGBDS_ASM_LEXER_H #define RGBDS_ASM_LEXER_H -#include -#include - -#define LEXHASHSIZE (1 << 11) #define MAXSTRLEN 255 -struct sLexInitString { - char *tzName; - uint32_t nToken; +struct LexerState; +extern struct LexerState *lexerState; +extern struct LexerState *lexerStateEOL; + +static inline struct LexerState *lexer_GetState(void) +{ + return lexerState; +} + +static inline void lexer_SetState(struct LexerState *state) +{ + lexerState = state; +} + +static inline void lexer_SetStateAtEOL(struct LexerState *state) +{ + lexerStateEOL = state; +} + +extern char const *binDigits; +extern char const *gfxDigits; + +static inline void lexer_SetBinDigits(char const *digits) +{ + binDigits = digits; +} + +static inline void lexer_SetGfxDigits(char const *digits) +{ + gfxDigits = digits; +} + +/* + * `path` is referenced, but not held onto..! + */ +struct LexerState *lexer_OpenFile(char const *path); +struct LexerState *lexer_OpenFileView(char *buf, size_t size, uint32_t lineNo); +void lexer_RestartRept(uint32_t lineNo); +void lexer_DeleteState(struct LexerState *state); +void lexer_Init(void); + +enum LexerMode { + LEXER_NORMAL, + LEXER_RAW, + LEXER_SKIP_TO_ELIF, + LEXER_SKIP_TO_ENDC }; -struct sLexFloat { - uint32_t (*Callback)(char *s, uint32_t size); - uint32_t nToken; -}; +void lexer_SetMode(enum LexerMode mode); +void lexer_ToggleStringExpansion(bool enable); -struct yy_buffer_state { - /* Actual starting address */ - char *pBufferRealStart; - /* Address where the data is initially written after a safety margin */ - char *pBufferStart; - char *pBuffer; - size_t nBufferSize; - uint32_t oAtLineStart; -}; - -enum eLexerState { - LEX_STATE_NORMAL, - LEX_STATE_MACROARGS -}; - -struct sStringExpansionPos { - char *tzName; - char *pBuffer; - char *pBufferPos; - struct sStringExpansionPos *pParent; -}; - -#define INITIAL 0 -#define macroarg 3 - -typedef struct yy_buffer_state *YY_BUFFER_STATE; - -void setup_lexer(void); - -void yy_set_state(enum eLexerState i); -YY_BUFFER_STATE yy_create_buffer(FILE *f); -YY_BUFFER_STATE yy_scan_bytes(char const *mem, uint32_t size); -void yy_delete_buffer(YY_BUFFER_STATE buf); -void yy_switch_to_buffer(YY_BUFFER_STATE buf); -uint32_t lex_FloatAlloc(const struct sLexFloat *tok); -void lex_FloatAddRange(uint32_t id, uint16_t start, uint16_t end); -void lex_FloatDeleteRange(uint32_t id, uint16_t start, uint16_t end); -void lex_FloatAddFirstRange(uint32_t id, uint16_t start, uint16_t end); -void lex_FloatDeleteFirstRange(uint32_t id, uint16_t start, uint16_t end); -void lex_FloatAddSecondRange(uint32_t id, uint16_t start, uint16_t end); -void lex_FloatDeleteSecondRange(uint32_t id, uint16_t start, uint16_t end); -void lex_Init(void); -void lex_AddStrings(const struct sLexInitString *lex); -void lex_SetBuffer(char *buffer, uint32_t len); -void lex_BeginStringExpansion(const char *tzName); -int yywrap(void); +char const *lexer_GetFileName(void); +uint32_t lexer_GetLineNo(void); +uint32_t lexer_GetColNo(void); +void lexer_DumpStringExpansions(void); int yylex(void); -void yyunput(char c); -void yyunputstr(const char *s); -void yyskipbytes(uint32_t count); -void yyunputbytes(uint32_t count); - -extern YY_BUFFER_STATE pCurrentBuffer; -extern struct sStringExpansionPos *pCurrentStringExpansion; - -void upperstring(char *s); -void lowerstring(char *s); +void lexer_CaptureRept(char **capture, size_t *size); +void lexer_CaptureMacroBody(char **capture, size_t *size); #endif /* RGBDS_ASM_LEXER_H */ diff --git a/include/asm/macro.h b/include/asm/macro.h index 2142ea71..855133f8 100644 --- a/include/asm/macro.h +++ b/include/asm/macro.h @@ -28,6 +28,7 @@ char const *macro_GetArg(uint32_t i); uint32_t macro_GetUniqueID(void); char const *macro_GetUniqueIDStr(void); void macro_SetUniqueID(uint32_t id); +uint32_t macro_UseNewUniqueID(void); void macro_ShiftCurrentArgs(void); uint32_t macro_NbArgs(void); diff --git a/include/asm/main.h b/include/asm/main.h index 874ad5a6..c2820d4c 100644 --- a/include/asm/main.h +++ b/include/asm/main.h @@ -43,6 +43,10 @@ void opt_Push(void); void opt_Pop(void); void opt_Parse(char *s); +void upperstring(char *s); +void lowerstring(char *s); + +/* TODO: are these really needed? */ #define YY_FATAL_ERROR fatalerror #ifdef YYLMAX diff --git a/include/asm/output.h b/include/asm/output.h index e6a45df7..6f8895b6 100644 --- a/include/asm/output.h +++ b/include/asm/output.h @@ -18,6 +18,8 @@ struct Expression; extern char *tzObjectname; extern struct Section *pSectionList, *pCurrentSection; +void out_RegisterNode(struct FileStackNode *node); +void out_ReplaceNode(struct FileStackNode *node); void out_SetFileName(char *s); void out_CreatePatch(uint32_t type, struct Expression const *expr, uint32_t ofs); diff --git a/include/asm/symbol.h b/include/asm/symbol.h index 52f61a52..fa02a821 100644 --- a/include/asm/symbol.h +++ b/include/asm/symbol.h @@ -35,18 +35,21 @@ struct Symbol { bool isExported; /* Whether the symbol is to be exported */ bool isBuiltin; /* Whether the symbol is a built-in */ struct Section *section; - char fileName[_MAX_PATH + 1]; /* File where the symbol was defined. */ - uint32_t fileLine; /* Line where the symbol was defined. */ + struct FileStackNode *src; /* Where the symbol was defined */ + uint32_t fileLine; /* Line where the symbol was defined */ + bool hasCallback; union { - struct { /* If sym_IsNumeric */ - int32_t value; - int32_t (*callback)(void); - }; - struct { /* For SYM_MACRO */ - uint32_t macroSize; + /* If sym_IsNumeric */ + int32_t value; + int32_t (*numCallback)(void); + /* For SYM_MACRO */ + struct { + size_t macroSize; char *macro; }; + /* For SYM_EQUS, TODO: separate "base" fields from SYM_MACRO */ + char const *(*strCallback)(void); /* For SYM_EQUS */ }; uint32_t ID; /* ID of the symbol in the object file (-1 if none) */ @@ -101,6 +104,8 @@ static inline bool sym_IsExported(struct Symbol const *sym) */ static inline char const *sym_GetStringValue(struct Symbol const *sym) { + if (sym->hasCallback) + return sym->strCallback(); return sym->macro; } @@ -114,9 +119,10 @@ void sym_Export(char const *symName); struct Symbol *sym_AddEqu(char const *symName, int32_t value); struct Symbol *sym_AddSet(char const *symName, int32_t value); uint32_t sym_GetPCValue(void); +uint32_t sym_GetConstantSymValue(struct Symbol const *sym); uint32_t sym_GetConstantValue(char const *s); struct Symbol *sym_FindSymbol(char const *symName); -struct Symbol *sym_AddMacro(char const *symName, int32_t defLineNo); +struct Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char *body, size_t size); struct Symbol *sym_Ref(char const *symName); struct Symbol *sym_AddString(char const *symName, char const *value); uint32_t sym_GetDefinedValue(char const *s); diff --git a/include/asm/util.h b/include/asm/util.h index c03281f5..ca2e43e0 100644 --- a/include/asm/util.h +++ b/include/asm/util.h @@ -12,6 +12,7 @@ #include uint32_t calchash(const char *s); +char const *print(int c); size_t readUTF8Char(uint8_t *dest, char const *src); #endif /* RGBDS_UTIL_H */ diff --git a/include/link/main.h b/include/link/main.h index 25a69a9a..d55e0a8e 100644 --- a/include/link/main.h +++ b/include/link/main.h @@ -29,15 +29,45 @@ extern bool beVerbose; extern bool isWRA0Mode; extern bool disablePadding; +struct FileStackNode { + struct FileStackNode *parent; + /* Line at which the parent context was exited; meaningless for the root level */ + uint32_t lineNo; + + enum { + NODE_REPT, + NODE_FILE, + NODE_MACRO, + } type; + union { + char *name; /* NODE_FILE, NODE_MACRO */ + struct { /* NODE_REPT */ + uint32_t reptDepth; + uint32_t *iters; + }; + }; +}; + /* Helper macro for printing verbose-mode messages */ #define verbosePrint(...) do { \ if (beVerbose) \ fprintf(stderr, __VA_ARGS__); \ } while (0) -void error(char const *fmt, ...); +/** + * Dump a file stack to stderr + * @param node The leaf node to dump the context of + */ +char const *dumpFileStack(struct FileStackNode const *node); -noreturn_ void fatal(char const *fmt, ...); +void warning(struct FileStackNode const *where, uint32_t lineNo, + char const *fmt, ...) format_(printf, 3, 4); + +void error(struct FileStackNode const *where, uint32_t lineNo, + char const *fmt, ...) format_(printf, 3, 4); + +noreturn_ void fatal(struct FileStackNode const *where, uint32_t lineNo, + char const *fmt, ...) format_(printf, 3, 4); /** * Opens a file if specified, and aborts on error. diff --git a/include/link/object.h b/include/link/object.h index 2ecb0046..b43d728b 100644 --- a/include/link/object.h +++ b/include/link/object.h @@ -14,8 +14,9 @@ /** * Read an object (.o) file, and add its info to the data structures. * @param fileName A path to the object file to be read + * @param i The ID of the file */ -void obj_ReadFile(char const *fileName); +void obj_ReadFile(char const *fileName, unsigned int i); /** * Perform validation on the object files' contents @@ -27,6 +28,12 @@ void obj_DoSanityChecks(void); */ void obj_CheckAssertions(void); +/** + * Sets up object file reading + * @param nbFiles The number of object files that will be read + */ +void obj_Setup(unsigned int nbFiles); + /** * `free`s all object memory that was allocated. */ diff --git a/include/link/section.h b/include/link/section.h index 24280412..d9da1a5b 100644 --- a/include/link/section.h +++ b/include/link/section.h @@ -19,6 +19,7 @@ #include "linkdefs.h" +struct FileStackNode; struct Section; struct AttachedSymbol { @@ -27,7 +28,8 @@ struct AttachedSymbol { }; struct Patch { - char *fileName; + struct FileStackNode const *src; + uint32_t lineNo; int32_t offset; uint32_t pcSectionID; uint32_t pcOffset; diff --git a/include/link/symbol.h b/include/link/symbol.h index 14210f7e..d0085795 100644 --- a/include/link/symbol.h +++ b/include/link/symbol.h @@ -16,12 +16,14 @@ #include "linkdefs.h" +struct FileStackNode; + struct Symbol { /* Info contained in the object files */ char *name; enum ExportLevel type; char const *objFileName; - char *fileName; + struct FileStackNode const *src; int32_t lineNo; int32_t sectionID; union { diff --git a/include/linkdefs.h b/include/linkdefs.h index 029501ca..e088a511 100644 --- a/include/linkdefs.h +++ b/include/linkdefs.h @@ -14,7 +14,7 @@ #define RGBDS_OBJECT_VERSION_STRING "RGB%1u" #define RGBDS_OBJECT_VERSION_NUMBER 9U -#define RGBDS_OBJECT_REV 5U +#define RGBDS_OBJECT_REV 6U enum AssertionType { ASSERT_WARN, diff --git a/include/platform.h b/include/platform.h index 6c6170d6..4c060e96 100644 --- a/include/platform.h +++ b/include/platform.h @@ -32,4 +32,11 @@ # define S_ISDIR(mode) (((mode) & S_IFMT) == S_IFDIR) #endif +/* MSVC doesn't use POSIX types or defines for `read` */ +#ifdef _MSC_VER +# define STDIN_FILENO 0 +# define ssize_t int +# define SSIZE_MAX INT_MAX +#endif + #endif /* RGBDS_PLATFORM_H */ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 861c85d1..63f155df 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -31,7 +31,6 @@ set(rgbasm_src "${BISON_ASMy_OUTPUT_SOURCE}" "asm/charmap.c" "asm/fstack.c" - "asm/globlex.c" "asm/lexer.c" "asm/macro.c" "asm/main.c" diff --git a/src/asm/asmy.y b/src/asm/asmy.y index 6a1694f7..653c001e 100644 --- a/src/asm/asmy.y +++ b/src/asm/asmy.y @@ -39,62 +39,7 @@ uint32_t nListCountEmpty; char *tzNewMacro; uint32_t ulNewMacroSize; int32_t nPCOffset; - -size_t symvaluetostring(char *dest, size_t maxLength, char *symName, - const char *mode) -{ - size_t length; - struct Symbol *sym = sym_FindSymbol(symName); - - if (sym && sym->type == SYM_EQUS) { - char const *src = sym_GetStringValue(sym); - size_t i; - - if (mode) - error("Print types are only allowed for numbers\n"); - - for (i = 0; src[i] != 0; i++) { - if (i >= maxLength) - fatalerror("Symbol value too long to fit buffer\n"); - - dest[i] = src[i]; - } - - length = i; - - } else { - uint32_t value = sym_GetConstantValue(symName); - int32_t fullLength; - - /* Special cheat for binary */ - if (mode && !mode[0]) { - char binary[33]; /* 32 bits + 1 terminator */ - char *write_ptr = binary + 32; - fullLength = 0; - binary[32] = 0; - do { - *(--write_ptr) = (value & 1) + '0'; - value >>= 1; - fullLength++; - } while(value); - strncpy(dest, write_ptr, maxLength + 1); - } else { - fullLength = snprintf(dest, maxLength + 1, - mode ? mode : "$%" PRIX32, - value); - } - - if (fullLength < 0) { - fatalerror("snprintf encoding error\n"); - } else { - length = (size_t)fullLength; - if (length > maxLength) - fatalerror("Symbol value too long to fit buffer\n"); - } - } - - return length; -} +bool executeElseBlock; /* If this is set, ELIFs cannot be executed anymore */ static uint32_t str2int2(uint8_t *s, int32_t length) { @@ -111,278 +56,6 @@ static uint32_t str2int2(uint8_t *s, int32_t length) return r; } -static uint32_t isWhiteSpace(char s) -{ - return (s == ' ') || (s == '\t') || (s == '\0') || (s == '\n'); -} - -static uint32_t isRept(char *s) -{ - return (strncasecmp(s, "REPT", 4) == 0) - && isWhiteSpace(*(s - 1)) && isWhiteSpace(s[4]); -} - -static uint32_t isEndr(char *s) -{ - return (strncasecmp(s, "ENDR", 4) == 0) - && isWhiteSpace(*(s - 1)) && isWhiteSpace(s[4]); -} - -static void copyrept(void) -{ - int32_t level = 1, len, instring = 0; - char *src = pCurrentBuffer->pBuffer; - char *bufferEnd = pCurrentBuffer->pBufferStart - + pCurrentBuffer->nBufferSize; - - while (src < bufferEnd && level) { - if (instring == 0) { - if (isRept(src)) { - level++; - src += 4; - } else if (isEndr(src)) { - level--; - src += 4; - } else { - if (*src == '\"') - instring = 1; - src++; - } - } else { - if (*src == '\\') { - src += 2; - } else if (*src == '\"') { - src++; - instring = 0; - } else { - src++; - } - } - } - - if (level != 0) - fatalerror("Unterminated REPT block\n"); - - len = src - pCurrentBuffer->pBuffer - 4; - - src = pCurrentBuffer->pBuffer; - ulNewMacroSize = len; - - tzNewMacro = malloc(ulNewMacroSize + 1); - - if (tzNewMacro == NULL) - fatalerror("Not enough memory for REPT block.\n"); - - uint32_t i; - - tzNewMacro[ulNewMacroSize] = 0; - for (i = 0; i < ulNewMacroSize; i++) { - tzNewMacro[i] = src[i]; - if (src[i] == '\n') - nLineNo++; - } - - yyskipbytes(ulNewMacroSize + 4); - -} - -static uint32_t isMacro(char *s) -{ - return (strncasecmp(s, "MACRO", 4) == 0) - && isWhiteSpace(*(s - 1)) && isWhiteSpace(s[5]); -} - -static uint32_t isEndm(char *s) -{ - return (strncasecmp(s, "ENDM", 4) == 0) - && isWhiteSpace(*(s - 1)) && isWhiteSpace(s[4]); -} - -static void copymacro(void) -{ - int32_t level = 1, len, instring = 0; - char *src = pCurrentBuffer->pBuffer; - char *bufferEnd = pCurrentBuffer->pBufferStart - + pCurrentBuffer->nBufferSize; - - while (src < bufferEnd && level) { - if (instring == 0) { - if (isMacro(src)) { - level++; - src += 4; - } else if (isEndm(src)) { - level--; - src += 4; - } else { - if(*src == '\"') - instring = 1; - src++; - } - } else { - if (*src == '\\') { - src += 2; - } else if (*src == '\"') { - src++; - instring = 0; - } else { - src++; - } - } - } - - if (level != 0) - fatalerror("Unterminated MACRO definition.\n"); - - len = src - pCurrentBuffer->pBuffer - 4; - - src = pCurrentBuffer->pBuffer; - ulNewMacroSize = len; - - tzNewMacro = (char *)malloc(ulNewMacroSize + 1); - if (tzNewMacro == NULL) - fatalerror("Not enough memory for MACRO definition.\n"); - - uint32_t i; - - tzNewMacro[ulNewMacroSize] = 0; - for (i = 0; i < ulNewMacroSize; i++) { - tzNewMacro[i] = src[i]; - if (src[i] == '\n') - nLineNo++; - } - - yyskipbytes(ulNewMacroSize + 4); -} - -static bool endsIf(char c) -{ - return isWhiteSpace(c) || c == '(' || c == '{'; -} - -static uint32_t isIf(char *s) -{ - return (strncasecmp(s, "IF", 2) == 0) - && isWhiteSpace(s[-1]) && endsIf(s[2]); -} - -static uint32_t isElif(char *s) -{ - return (strncasecmp(s, "ELIF", 4) == 0) - && isWhiteSpace(s[-1]) && endsIf(s[4]); -} - -static uint32_t isElse(char *s) -{ - return (strncasecmp(s, "ELSE", 4) == 0) - && isWhiteSpace(s[-1]) && isWhiteSpace(s[4]); -} - -static uint32_t isEndc(char *s) -{ - return (strncasecmp(s, "ENDC", 4) == 0) - && isWhiteSpace(s[-1]) && isWhiteSpace(s[4]); -} - -static void if_skip_to_else(void) -{ - int32_t level = 1; - bool inString = false; - char *src = pCurrentBuffer->pBuffer; - - while (*src && level) { - if (*src == '\n') - nLineNo++; - - if (!inString) { - if (isIf(src)) { - level++; - src += 2; - - } else if (level == 1 && isElif(src)) { - level--; - skipElif = false; - - } else if (level == 1 && isElse(src)) { - level--; - src += 4; - - } else if (isEndc(src)) { - level--; - if (level != 0) - src += 4; - - } else { - if (*src == '\"') - inString = true; - src++; - } - } else { - if (*src == '\"') { - inString = false; - } else if (*src == '\\') { - /* Escaped quotes don't end the string */ - if (*++src != '\"') - src--; - } - src++; - } - } - - if (level != 0) - fatalerror("Unterminated IF construct\n"); - - int32_t len = src - pCurrentBuffer->pBuffer; - - yyskipbytes(len); - yyunput('\n'); - nLineNo--; -} - -static void if_skip_to_endc(void) -{ - int32_t level = 1; - bool inString = false; - char *src = pCurrentBuffer->pBuffer; - - while (*src && level) { - if (*src == '\n') - nLineNo++; - - if (!inString) { - if (isIf(src)) { - level++; - src += 2; - } else if (isEndc(src)) { - level--; - if (level != 0) - src += 4; - } else { - if (*src == '\"') - inString = true; - src++; - } - } else { - if (*src == '\"') { - inString = false; - } else if (*src == '\\') { - /* Escaped quotes don't end the string */ - if (*++src != '\"') - src--; - } - src++; - } - } - - if (level != 0) - fatalerror("Unterminated IF construct\n"); - - int32_t len = src - pCurrentBuffer->pBuffer; - - yyskipbytes(len); - yyunput('\n'); - nLineNo--; -} - static size_t strlenUTF8(const char *s) { size_t len = 0; @@ -659,17 +332,70 @@ lines : /* empty */ | lines { nListCountEmpty = 0; nPCOffset = 0; - } line '\n' { - nLineNo++; + } line { nTotalLines++; } ; -line : label - | label cpu_command - | label macro - | label simple_pseudoop - | pseudoop +line : label '\n' + | label cpu_command '\n' + | label macro '\n' + | label simple_pseudoop '\n' + | pseudoop '\n' + | conditional /* May not necessarily be followed by a newline, see below */ +; + +/* + * For "logistical" reasons, conditionals must manage newlines themselves. + * This is because we need to switch the lexer's mode *after* the newline has been read, + * and to avoid causing some grammar conflicts (token reducing is finicky). + * This is DEFINITELY one of the more FRAGILE parts of the codebase, handle with care. + */ +conditional : if + /* It's important that all of these require being at line start for `skipIfBlock` */ + | elif + | else + | endc +; + +if : T_POP_IF const '\n' { + nIFDepth++; + executeElseBlock = !$2; + if (executeElseBlock) + lexer_SetMode(LEXER_SKIP_TO_ELIF); + } +; + +elif : T_POP_ELIF const '\n' { + if (nIFDepth <= 0) + fatalerror("Found ELIF outside an IF construct\n"); + + if (!executeElseBlock) { + lexer_SetMode(LEXER_SKIP_TO_ENDC); + } else { + executeElseBlock = !$2; + if (executeElseBlock) + lexer_SetMode(LEXER_SKIP_TO_ELIF); + } + } +; + +else : T_POP_ELSE '\n' { + if (nIFDepth <= 0) + fatalerror("Found ELSE outside an IF construct\n"); + + if (!executeElseBlock) + lexer_SetMode(LEXER_SKIP_TO_ENDC); + } +; + +endc : T_POP_ENDC '\n' { + if (nIFDepth <= 0) + fatalerror("Found ENDC outside an IF construct\n"); + + nIFDepth--; + executeElseBlock = false; + } ; scoped_id : T_ID | T_LOCAL_ID ; @@ -699,9 +425,9 @@ label : /* empty */ ; macro : T_ID { - yy_set_state(LEX_STATE_MACROARGS); + lexer_SetMode(LEXER_RAW); } macroargs { - yy_set_state(LEX_STATE_NORMAL); + lexer_SetMode(LEXER_NORMAL); fstk_RunMacro($1, $3); } ; @@ -732,10 +458,6 @@ simple_pseudoop : include | printt | printv | printi - | if - | elif - | else - | endc | export | db | dw @@ -786,9 +508,9 @@ align : T_OP_ALIGN uconst { ; opt : T_POP_OPT { - yy_set_state(LEX_STATE_MACROARGS); + lexer_SetMode(LEXER_RAW); } opt_list { - yy_set_state(LEX_STATE_NORMAL); + lexer_SetMode(LEXER_NORMAL); } ; @@ -875,16 +597,20 @@ load : T_POP_LOAD string ',' sectiontype sectorg sectattrs { ; rept : T_POP_REPT uconst { - uint32_t nDefinitionLineNo = nLineNo; - copyrept(); - fstk_RunRept($2, nDefinitionLineNo); + uint32_t nDefinitionLineNo = lexer_GetLineNo(); + char *body; + size_t size; + lexer_CaptureRept(&body, &size); + fstk_RunRept($2, nDefinitionLineNo, body, size); } ; macrodef : T_LABEL ':' T_POP_MACRO { - int32_t nDefinitionLineNo = nLineNo; - copymacro(); - sym_AddMacro($1, nDefinitionLineNo); + int32_t nDefinitionLineNo = lexer_GetLineNo(); + char *body; + size_t size; + lexer_CaptureMacroBody(&body, &size); + sym_AddMacro($1, nDefinitionLineNo, body, size); } ; @@ -956,9 +682,9 @@ dl : T_POP_DL constlist_32bit_entry ',' constlist_32bit { ; purge : T_POP_PURGE { - oDontExpandStrings = true; + lexer_ToggleStringExpansion(false); } purge_list { - oDontExpandStrings = false; + lexer_ToggleStringExpansion(true); } ; @@ -1052,62 +778,6 @@ printi : T_POP_PRINTI const { printf("%" PRId32, $2); } printf : T_POP_PRINTF const { math_Print($2); } ; -if : T_POP_IF const { - nIFDepth++; - if (!$2) - if_skip_to_else(); - } -; - -elif : T_POP_ELIF const { - if (nIFDepth <= 0) - fatalerror("Found ELIF outside an IF construct\n"); - - if (skipElif) { - /* - * Executed when ELIF is reached at the end of - * an IF or ELIF block for which the condition - * was true. - * - * Continue parsing at ENDC keyword - */ - if_skip_to_endc(); - } else { - /* - * Executed when ELIF is skipped to because the - * condition of the previous IF or ELIF block - * was false. - */ - skipElif = true; - - if (!$2) { - /* - * Continue parsing after ELSE, or at - * ELIF or ENDC keyword. - */ - if_skip_to_else(); - } - } - } -; - -else : T_POP_ELSE { - if (nIFDepth <= 0) - fatalerror("Found ELSE outside an IF construct\n"); - - /* Continue parsing at ENDC keyword */ - if_skip_to_endc(); - } -; - -endc : T_POP_ENDC { - if (nIFDepth <= 0) - fatalerror("Found ENDC outside an IF construct\n"); - - nIFDepth--; - } -; - const_3bit : const { int32_t value = $1; @@ -1267,13 +937,13 @@ relocexpr_no_str : scoped_id { rpn_Symbol(&$$, $1); } } | T_OP_BANK '(' string ')' { rpn_BankSection(&$$, $3); } | T_OP_DEF { - oDontExpandStrings = true; + lexer_ToggleStringExpansion(false); } '(' scoped_id ')' { struct Symbol const *sym = sym_FindSymbol($4); rpn_Number(&$$, !!sym); - oDontExpandStrings = false; + lexer_ToggleStringExpansion(true); } | T_OP_ROUND '(' const ')' { rpn_Number(&$$, math_Round($3)); diff --git a/src/asm/fstack.c b/src/asm/fstack.c index c667acea..33387ad7 100644 --- a/src/asm/fstack.c +++ b/src/asm/fstack.c @@ -6,554 +6,470 @@ * SPDX-License-Identifier: MIT */ -/* - * FileStack routines - */ - +#include +#include #include #include -#include #include #include #include -#include -#include -#include #include "asm/fstack.h" -#include "asm/lexer.h" #include "asm/macro.h" #include "asm/main.h" -#include "asm/output.h" +#include "asm/symbol.h" #include "asm/warning.h" +#include "platform.h" /* S_ISDIR (stat macro) */ -#include "extern/err.h" +#ifdef LEXER_DEBUG + #define dbgPrint(...) fprintf(stderr, "[lexer] " __VA_ARGS__) +#else + #define dbgPrint(...) +#endif -#include "platform.h" // S_ISDIR (stat macro) -#include "types.h" +struct Context { + struct Context *parent; + struct FileStackNode *fileInfo; + struct LexerState *lexerState; + uint32_t uniqueID; + struct MacroArgs *macroArgs; /* Macro args are *saved* here */ + uint32_t nbReptIters; +}; -static struct sContext *pFileStack; -static unsigned int nFileStackDepth; -unsigned int nMaxRecursionDepth; -static struct Symbol const *pCurrentMacro; -static YY_BUFFER_STATE CurrentFlexHandle; -static FILE *pCurrentFile; -static uint32_t nCurrentStatus; -char tzCurrentFileName[_MAX_PATH + 1]; -static char IncludePaths[MAXINCPATHS][_MAX_PATH + 1]; -static int32_t NextIncPath; -static uint32_t nMacroCount; +static struct Context *contextStack; +static size_t contextDepth = 0; +#define DEFAULT_MAX_DEPTH 64 +size_t nMaxRecursionDepth; -static char *pCurrentREPTBlock; -static uint32_t nCurrentREPTBlockSize; -static uint32_t nCurrentREPTBlockCount; -static int32_t nCurrentREPTBodyFirstLine; -static int32_t nCurrentREPTBodyLastLine; +static unsigned int nbIncPaths = 0; +static char const *includePaths[MAXINCPATHS]; -uint32_t ulMacroReturnValue; - -/* - * defines for nCurrentStatus - */ -#define STAT_isInclude 0 /* 'Normal' state as well */ -#define STAT_isMacro 1 -#define STAT_isMacroArg 2 -#define STAT_isREPTBlock 3 - -/* Max context stack size */ - -/* - * Context push and pop - */ -static void pushcontext(void) +char const *dumpNodeAndParents(struct FileStackNode const *node) { - struct sContext **ppFileStack; + char const *name; - if (++nFileStackDepth > nMaxRecursionDepth) - fatalerror("Recursion limit (%u) exceeded\n", nMaxRecursionDepth); + if (node->type == NODE_REPT) { + assert(node->parent); /* REPT nodes should always have a parent */ + struct FileStackReptNode const *reptInfo = (struct FileStackReptNode const *)node; - ppFileStack = &pFileStack; - while (*ppFileStack) - ppFileStack = &((*ppFileStack)->next); - - *ppFileStack = malloc(sizeof(struct sContext)); - - if (*ppFileStack == NULL) - fatalerror("No memory for context\n"); - - (*ppFileStack)->FlexHandle = CurrentFlexHandle; - (*ppFileStack)->next = NULL; - strcpy((char *)(*ppFileStack)->tzFileName, (char *)tzCurrentFileName); - (*ppFileStack)->nLine = nLineNo; - - switch ((*ppFileStack)->nStatus = nCurrentStatus) { - case STAT_isMacroArg: - case STAT_isMacro: - (*ppFileStack)->macroArgs = macro_GetCurrentArgs(); - (*ppFileStack)->pMacro = pCurrentMacro; - break; - case STAT_isInclude: - (*ppFileStack)->pFile = pCurrentFile; - break; - case STAT_isREPTBlock: - (*ppFileStack)->macroArgs = macro_GetCurrentArgs(); - (*ppFileStack)->pREPTBlock = pCurrentREPTBlock; - (*ppFileStack)->nREPTBlockSize = nCurrentREPTBlockSize; - (*ppFileStack)->nREPTBlockCount = nCurrentREPTBlockCount; - (*ppFileStack)->nREPTBodyFirstLine = nCurrentREPTBodyFirstLine; - (*ppFileStack)->nREPTBodyLastLine = nCurrentREPTBodyLastLine; - break; - default: - fatalerror("%s: Internal error.\n", __func__); - } - (*ppFileStack)->uniqueID = macro_GetUniqueID(); - - nLineNo = 0; -} - -static int32_t popcontext(void) -{ - struct sContext *pLastFile, **ppLastFile; - - if (nCurrentStatus == STAT_isREPTBlock) { - if (--nCurrentREPTBlockCount) { - char *pREPTIterationWritePtr; - unsigned long nREPTIterationNo; - int nNbCharsWritten; - int nNbCharsLeft; - - yy_delete_buffer(CurrentFlexHandle); - CurrentFlexHandle = - yy_scan_bytes(pCurrentREPTBlock, - nCurrentREPTBlockSize); - yy_switch_to_buffer(CurrentFlexHandle); - macro_SetUniqueID(nMacroCount++); - - /* Increment REPT count in file path */ - pREPTIterationWritePtr = - strrchr(tzCurrentFileName, '~') + 1; - nREPTIterationNo = - strtoul(pREPTIterationWritePtr, NULL, 10); - nNbCharsLeft = sizeof(tzCurrentFileName) - - (pREPTIterationWritePtr - tzCurrentFileName); - nNbCharsWritten = snprintf(pREPTIterationWritePtr, - nNbCharsLeft, "%lu", - nREPTIterationNo + 1); - if (nNbCharsWritten >= nNbCharsLeft) { - /* - * The string is probably corrupted somehow, - * revert the change to avoid a bad error - * output. - */ - sprintf(pREPTIterationWritePtr, "%lu", - nREPTIterationNo); - fatalerror("Cannot write REPT count to file path\n"); - } - - nLineNo = nCurrentREPTBodyFirstLine; - return 0; + name = dumpNodeAndParents(node->parent); + fprintf(stderr, "(%" PRIu32 ") -> %s", node->lineNo, name); + for (uint32_t i = reptInfo->reptDepth; i--; ) + fprintf(stderr, "::REPT~%" PRIu32, reptInfo->iters[i]); + } else { + name = ((struct FileStackNamedNode const *)node)->name; + if (node->parent) { + dumpNodeAndParents(node->parent); + fprintf(stderr, "(%" PRIu32 ") -> %s", node->lineNo, name); + } else { + fputs(name, stderr); } } - - pLastFile = pFileStack; - if (pLastFile == NULL) - return 1; - - ppLastFile = &pFileStack; - while (pLastFile->next) { - ppLastFile = &(pLastFile->next); - pLastFile = *ppLastFile; - } - - yy_delete_buffer(CurrentFlexHandle); - nLineNo = nCurrentStatus == STAT_isREPTBlock ? nCurrentREPTBodyLastLine - : pLastFile->nLine; - - if (nCurrentStatus == STAT_isInclude) - fclose(pCurrentFile); - - if (nCurrentStatus == STAT_isMacro - || nCurrentStatus == STAT_isREPTBlock) - nLineNo++; - - CurrentFlexHandle = pLastFile->FlexHandle; - strcpy((char *)tzCurrentFileName, (char *)pLastFile->tzFileName); - - switch (pLastFile->nStatus) { - struct MacroArgs *args; - - case STAT_isMacroArg: - case STAT_isMacro: - args = macro_GetCurrentArgs(); - if (nCurrentStatus == STAT_isMacro) { - macro_FreeArgs(args); - free(args); - } - macro_UseNewArgs(pLastFile->macroArgs); - pCurrentMacro = pLastFile->pMacro; - break; - case STAT_isInclude: - pCurrentFile = pLastFile->pFile; - break; - case STAT_isREPTBlock: - args = macro_GetCurrentArgs(); - if (nCurrentStatus == STAT_isMacro) { - macro_FreeArgs(args); - free(args); - } - macro_UseNewArgs(pLastFile->macroArgs); - pCurrentREPTBlock = pLastFile->pREPTBlock; - nCurrentREPTBlockSize = pLastFile->nREPTBlockSize; - nCurrentREPTBlockCount = pLastFile->nREPTBlockCount; - nCurrentREPTBodyFirstLine = pLastFile->nREPTBodyFirstLine; - break; - default: - fatalerror("%s: Internal error.\n", __func__); - } - macro_SetUniqueID(pLastFile->uniqueID); - - nCurrentStatus = pLastFile->nStatus; - - nFileStackDepth--; - - free(*ppLastFile); - *ppLastFile = NULL; - yy_switch_to_buffer(CurrentFlexHandle); - return 0; + return name; } -int32_t fstk_GetLine(void) +void fstk_Dump(struct FileStackNode const *node, uint32_t lineNo) { - struct sContext *pLastFile, **ppLastFile; - - switch (nCurrentStatus) { - case STAT_isInclude: - /* This is the normal mode, also used when including a file. */ - return nLineNo; - case STAT_isMacro: - break; /* Peek top file of the stack */ - case STAT_isMacroArg: - return nLineNo; /* ??? */ - case STAT_isREPTBlock: - break; /* Peek top file of the stack */ - default: - fatalerror("%s: Internal error.\n", __func__); - } - - pLastFile = pFileStack; - - if (pLastFile != NULL) { - while (pLastFile->next) { - ppLastFile = &(pLastFile->next); - pLastFile = *ppLastFile; - } - return pLastFile->nLine; - } - - /* - * This is only reached if the lexer is in REPT or MACRO mode but there - * are no saved contexts with the origin of said REPT or MACRO. - */ - fatalerror("%s: Internal error.\n", __func__); + dumpNodeAndParents(node); + fprintf(stderr, "(%" PRIu32 ")", lineNo); } -int yywrap(void) +void fstk_DumpCurrent(void) { - return popcontext(); -} - -/* - * Dump the context stack to stderr - */ -void fstk_Dump(void) -{ - const struct sContext *pLastFile; - - pLastFile = pFileStack; - - while (pLastFile) { - fprintf(stderr, "%s(%" PRId32 ") -> ", pLastFile->tzFileName, - pLastFile->nLine); - pLastFile = pLastFile->next; + if (!contextStack) { + fputs("at top level", stderr); + return; } - - fprintf(stderr, "%s(%" PRId32 ")", tzCurrentFileName, nLineNo); + fstk_Dump(contextStack->fileInfo, lexer_GetLineNo()); } -void fstk_DumpToStr(char *buf, size_t buflen) +struct FileStackNode *fstk_GetFileStack(void) { - const struct sContext *pLastFile = pFileStack; - int retcode; - size_t len = buflen; + struct FileStackNode *node = contextStack->fileInfo; - while (pLastFile) { - retcode = snprintf(&buf[buflen - len], len, "%s(%" PRId32 ") -> ", - pLastFile->tzFileName, pLastFile->nLine); - if (retcode < 0) - fatalerror("Failed to dump file stack to string: %s\n", strerror(errno)); - else if (retcode >= len) - len = 0; - else - len -= retcode; - pLastFile = pLastFile->next; + /* Mark node and all of its parents as referenced if not already so they don't get freed */ + while (node && !node->referenced) { + node->ID = -1; + node->referenced = true; + node = node->parent; } - - retcode = snprintf(&buf[buflen - len], len, "%s(%" PRId32 ")", - tzCurrentFileName, nLineNo); - if (retcode < 0) - fatalerror("Failed to dump file stack to string: %s\n", strerror(errno)); - else if (retcode >= len) - len = 0; - else - len -= retcode; - - if (!len) - warning(WARNING_LONG_STR, "File stack dump too long, got truncated\n"); + return contextStack->fileInfo; } -/* - * Dump the string expansion stack to stderr - */ -void fstk_DumpStringExpansions(void) +char const *fstk_GetFileName(void) { - const struct sStringExpansionPos *pExpansion = pCurrentStringExpansion; + /* Iterating via the nodes themselves skips nested REPTs */ + struct FileStackNode const *node = contextStack->fileInfo; - while (pExpansion) { - fprintf(stderr, "while expanding symbol \"%s\"\n", - pExpansion->tzName); - pExpansion = pExpansion->pParent; + while (node->type != NODE_FILE) + node = node->parent; + return ((struct FileStackNamedNode const *)node)->name; +} + +void fstk_AddIncludePath(char const *path) +{ + if (path[0] == '\0') + return; + if (nbIncPaths >= MAXINCPATHS) { + error("Too many include directories passed from command line\n"); + return; } + size_t len = strlen(path); + size_t allocSize = len + (path[len - 1] != '/') + 1; + char *str = malloc(allocSize); + + if (!str) { + /* Attempt to continue without that path */ + error("Failed to allocate new include path: %s\n", strerror(errno)); + return; + } + memcpy(str, path, len); + char *end = str + len - 1; + + if (*end++ != '/') + *end++ = '/'; + *end = '\0'; + includePaths[nbIncPaths++] = str; } -/* - * Extra includepath stuff - */ -void fstk_AddIncludePath(char *s) -{ - if (NextIncPath == MAXINCPATHS) - fatalerror("Too many include directories passed from command line\n"); - - // Find last occurrence of slash; is it at the end of the string? - char const *lastSlash = strrchr(s, '/'); - char const *pattern = lastSlash && *(lastSlash + 1) == 0 ? "%s" : "%s/"; - - if (snprintf(IncludePaths[NextIncPath++], _MAX_PATH, pattern, - s) >= _MAX_PATH) - fatalerror("Include path too long '%s'\n", s); -} - -static void printdep(const char *fileName) +static void printDep(char const *path) { if (dependfile) { - fprintf(dependfile, "%s: %s\n", tzTargetFileName, fileName); + fprintf(dependfile, "%s: %s\n", tzTargetFileName, path); if (oGeneratePhonyDeps) - fprintf(dependfile, "%s:\n", fileName); + fprintf(dependfile, "%s:\n", path); } } -static FILE *getFile(char const *pathname) +static bool isPathValid(char const *path) { struct stat statbuf; - if (stat(pathname, &statbuf) != 0) - return NULL; + if (stat(path, &statbuf) != 0) + return false; /* Reject directories */ - if (S_ISDIR(statbuf.st_mode)) - return NULL; - - return fopen(pathname, "rb"); + return !S_ISDIR(statbuf.st_mode); } -FILE *fstk_FindFile(char const *fname, char **incPathUsed) +bool fstk_FindFile(char const *path, char **fullPath, size_t *size) { - if (fname == NULL) - return NULL; - - char path[_MAX_PATH]; - FILE *f = getFile(fname); - - if (f) { - printdep(fname); - return f; + if (!*size) { + *size = 64; /* This is arbitrary, really */ + *fullPath = realloc(*fullPath, *size); + if (!*fullPath) + error("realloc error during include path search: %s\n", + strerror(errno)); } - for (size_t i = 0; i < NextIncPath; ++i) { - /* - * The function snprintf() does not write more than `size` bytes - * (including the terminating null byte ('\0')). If the output - * was truncated due to this limit, the return value is the - * number of characters (excluding the terminating null byte) - * which would have been written to the final string if enough - * space had been available. Thus, a return value of `size` or - * more means that the output was truncated. - */ - int fullpathlen = snprintf(path, sizeof(path), "%s%s", - IncludePaths[i], fname); + if (*fullPath) { + for (size_t i = 0; i <= nbIncPaths; ++i) { + char const *incPath = i ? includePaths[i - 1] : ""; + int len = snprintf(*fullPath, *size, "%s%s", incPath, path); - if (fullpathlen >= (int)sizeof(path)) - continue; + /* Oh how I wish `asnprintf` was standard... */ + if (len >= *size) { /* `len` doesn't include the terminator, `size` does */ + *size = len + 1; + *fullPath = realloc(*fullPath, *size); + if (!*fullPath) { + error("realloc error during include path search: %s\n", + strerror(errno)); + break; + } + len = sprintf(*fullPath, "%s%s", incPath, path); + } - f = getFile(path); - if (f) { - printdep(path); - - if (incPathUsed) - *incPathUsed = IncludePaths[i]; - return f; + if (len < 0) { + error("snprintf error during include path search: %s\n", + strerror(errno)); + } else if (isPathValid(*fullPath)) { + printDep(*fullPath); + return true; + } } } errno = ENOENT; if (oGeneratedMissingIncludes) - printdep(fname); - return NULL; + printDep(path); + return false; +} + +bool yywrap(void) +{ + if (contextStack->fileInfo->type == NODE_REPT) { /* The context is a REPT block, which may loop */ + struct FileStackReptNode *fileInfo = (struct FileStackReptNode *)contextStack->fileInfo; + + /* If the node is referenced, we can't edit it; duplicate it */ + if (contextStack->fileInfo->referenced) { + size_t size = sizeof(*fileInfo) + sizeof(fileInfo->iters[0]) * fileInfo->reptDepth; + struct FileStackReptNode *copy = malloc(size); + + if (!copy) + fatalerror("Failed to duplicate REPT file node: %s\n", strerror(errno)); + /* Copy all info but the referencing */ + memcpy(copy, fileInfo, size); + copy->node.next = NULL; + copy->node.referenced = false; + + fileInfo = copy; + contextStack->fileInfo = (struct FileStackNode *)fileInfo; + } + + fileInfo->iters[0]++; + /* If this wasn't the last iteration, wrap instead of popping */ + if (fileInfo->iters[0] <= contextStack->nbReptIters) { + lexer_RestartRept(contextStack->fileInfo->lineNo); + contextStack->uniqueID = macro_UseNewUniqueID(); + return false; + } + } else if (!contextStack->parent) { + return true; + } + dbgPrint("Popping context\n"); + + struct Context *context = contextStack; + + contextStack = contextStack->parent; + contextDepth--; + + lexer_DeleteState(context->lexerState); + /* Restore args if a macro (not REPT) saved them */ + if (context->fileInfo->type == NODE_MACRO) { + dbgPrint("Restoring macro args %p\n", contextStack->macroArgs); + macro_UseNewArgs(contextStack->macroArgs); + } + /* Free the file stack node */ + if (!context->fileInfo->referenced) + free(context->fileInfo); + /* Free the entry and make its parent the current entry */ + free(context); + + lexer_SetState(contextStack->lexerState); + macro_SetUniqueID(contextStack->uniqueID); + return false; } /* - * Set up an include file for parsing + * Make sure not to switch the lexer state before calling this, so the saved line no is correct + * BE CAREFUL!! This modifies the file stack directly, you should have set up the file info first */ -void fstk_RunInclude(char *tzFileName) +static void newContext(struct FileStackNode *fileInfo) { - char *incPathUsed = ""; - FILE *f = fstk_FindFile(tzFileName, &incPathUsed); + if (++contextDepth >= nMaxRecursionDepth) + fatalerror("Recursion limit (%zu) exceeded\n", nMaxRecursionDepth); + struct Context *context = malloc(sizeof(*context)); - if (f == NULL) { - if (oGeneratedMissingIncludes) { + if (!context) + fatalerror("Failed to allocate memory for new context: %s\n", strerror(errno)); + fileInfo->parent = contextStack->fileInfo; + fileInfo->lineNo = 0; /* Init to a default value, see struct definition for info */ + fileInfo->referenced = false; + fileInfo->lineNo = lexer_GetLineNo(); + context->fileInfo = fileInfo; + /* + * Link new entry to its parent so it's reachable later + * ERRORS SHOULD NOT OCCUR AFTER THIS!! + */ + context->parent = contextStack; + contextStack = context; + +} + +void fstk_RunInclude(char const *path) +{ + dbgPrint("Including path \"%s\"\n", path); + + char *fullPath = NULL; + size_t size = 0; + + if (!fstk_FindFile(path, &fullPath, &size)) { + free(fullPath); + if (oGeneratedMissingIncludes) oFailedOnMissingInclude = true; - return; - } - error("Unable to open included file '%s': %s\n", tzFileName, strerror(errno)); + else + error("Unable to open included file '%s': %s\n", path, strerror(errno)); return; } + dbgPrint("Full path: \"%s\"\n", fullPath); - pushcontext(); - nLineNo = 1; - nCurrentStatus = STAT_isInclude; - snprintf(tzCurrentFileName, sizeof(tzCurrentFileName), "%s%s", - incPathUsed, tzFileName); - if (verbose) - printf("Assembling %s\n", tzCurrentFileName); - pCurrentFile = f; - CurrentFlexHandle = yy_create_buffer(pCurrentFile); - yy_switch_to_buffer(CurrentFlexHandle); + struct FileStackNamedNode *fileInfo = malloc(sizeof(*fileInfo) + size); - /* Dirty hack to give the INCLUDE directive a linefeed */ + if (!fileInfo) { + error("Failed to alloc file info for INCLUDE: %s\n", strerror(errno)); + return; + } + fileInfo->node.type = NODE_FILE; + strcpy(fileInfo->name, fullPath); + free(fullPath); - yyunput('\n'); - nLineNo--; + newContext((struct FileStackNode *)fileInfo); + contextStack->lexerState = lexer_OpenFile(fileInfo->name); + if (!contextStack->lexerState) + fatalerror("Failed to set up lexer for file include\n"); + lexer_SetStateAtEOL(contextStack->lexerState); + /* We're back at top-level, so most things are reset */ + contextStack->uniqueID = 0; + macro_SetUniqueID(0); } -/* - * Set up a macro for parsing - */ -void fstk_RunMacro(char *s, struct MacroArgs *args) +void fstk_RunMacro(char const *macroName, struct MacroArgs *args) { - struct Symbol const *sym = sym_FindSymbol(s); - int nPrintedChars; + dbgPrint("Running macro \"%s\"\n", macroName); - if (sym == NULL) { - error("Macro \"%s\" not defined\n", s); + struct Symbol *macro = sym_FindSymbol(macroName); + + if (!macro) { + error("Macro \"%s\" not defined\n", macroName); return; } - if (sym->type != SYM_MACRO) { - error("\"%s\" is not a macro\n", s); + if (macro->type != SYM_MACRO) { + error("\"%s\" is not a macro\n", macroName); return; } + contextStack->macroArgs = macro_GetCurrentArgs(); - pushcontext(); - macro_SetUniqueID(nMacroCount++); - /* Minus 1 because there is a newline at the beginning of the buffer */ - nLineNo = sym->fileLine - 1; + /* Compute total length of this node's name: :: */ + size_t reptNameLen = 0; + struct FileStackNode const *node = macro->src; + + if (node->type == NODE_REPT) { + struct FileStackReptNode const *reptNode = (struct FileStackReptNode const *)node; + + /* 4294967295 = 2^32 - 1, aka UINT32_MAX */ + reptNameLen += reptNode->reptDepth * strlen("::REPT~4294967295"); + /* Look for next named node */ + do { + node = node->parent; + } while (node->type == NODE_REPT); + } + struct FileStackNamedNode const *baseNode = (struct FileStackNamedNode const *)node; + size_t baseLen = strlen(baseNode->name); + size_t macroNameLen = strlen(macro->name); + struct FileStackNamedNode *fileInfo = malloc(sizeof(*fileInfo) + baseLen + + reptNameLen + 2 + macroNameLen + 1); + + if (!fileInfo) { + error("Failed to alloc file info for \"%s\": %s\n", macro->name, strerror(errno)); + return; + } + fileInfo->node.type = NODE_MACRO; + /* Print the name... */ + char *dest = fileInfo->name; + + memcpy(dest, baseNode->name, baseLen); + dest += baseLen; + if (node->type == NODE_REPT) { + struct FileStackReptNode const *reptNode = (struct FileStackReptNode const *)node; + + for (uint32_t i = reptNode->reptDepth; i--; ) { + int nbChars = sprintf(dest, "::REPT~%" PRIu32, reptNode->iters[i]); + + if (nbChars < 0) + fatalerror("Failed to write macro invocation info: %s\n", + strerror(errno)); + dest += nbChars; + } + } + *dest++ = ':'; + *dest++ = ':'; + memcpy(dest, macro->name, macroNameLen + 1); + + newContext((struct FileStackNode *)fileInfo); + /* Line minus 1 because buffer begins with a newline */ + contextStack->lexerState = lexer_OpenFileView(macro->macro, macro->macroSize, + macro->fileLine - 1); + if (!contextStack->lexerState) + fatalerror("Failed to set up lexer for macro invocation\n"); + lexer_SetStateAtEOL(contextStack->lexerState); + contextStack->uniqueID = macro_UseNewUniqueID(); macro_UseNewArgs(args); - nCurrentStatus = STAT_isMacro; - nPrintedChars = snprintf(tzCurrentFileName, _MAX_PATH + 1, - "%s::%s", sym->fileName, s); - if (nPrintedChars > _MAX_PATH) { - popcontext(); - fatalerror("File name + macro name is too large to fit into buffer\n"); - } - - pCurrentMacro = sym; - /* TODO: why is `strlen` being used when there's a macro size field? */ - CurrentFlexHandle = yy_scan_bytes(pCurrentMacro->macro, - strlen(pCurrentMacro->macro)); - yy_switch_to_buffer(CurrentFlexHandle); } -/* - * Set up a repeat block for parsing - */ -void fstk_RunRept(uint32_t count, int32_t nReptLineNo) +void fstk_RunRept(uint32_t count, int32_t reptLineNo, char *body, size_t size) { - if (count) { - static const char *tzReptStr = "::REPT~1"; + dbgPrint("Running REPT(%" PRIu32 ")\n", count); + if (count == 0) + return; - /* For error printing to make sense, fake nLineNo */ - nCurrentREPTBodyLastLine = nLineNo; - nLineNo = nReptLineNo; - pushcontext(); - macro_SetUniqueID(nMacroCount++); - nCurrentREPTBlockCount = count; - nCurrentStatus = STAT_isREPTBlock; - nCurrentREPTBlockSize = ulNewMacroSize; - pCurrentREPTBlock = tzNewMacro; - nCurrentREPTBodyFirstLine = nReptLineNo + 1; - nLineNo = nReptLineNo; + uint32_t reptDepth = contextStack->fileInfo->type == NODE_REPT + ? ((struct FileStackReptNode *)contextStack->fileInfo)->reptDepth + : 0; + struct FileStackReptNode *fileInfo = malloc(sizeof(*fileInfo) + + (reptDepth + 1) * sizeof(fileInfo->iters[0])); - if (strlen(tzCurrentFileName) + strlen(tzReptStr) > _MAX_PATH) - fatalerror("Cannot append \"%s\" to file path\n", tzReptStr); - strcat(tzCurrentFileName, tzReptStr); - - CurrentFlexHandle = - yy_scan_bytes(pCurrentREPTBlock, nCurrentREPTBlockSize); - yy_switch_to_buffer(CurrentFlexHandle); + if (!fileInfo) { + error("Failed to alloc file info for REPT: %s\n", strerror(errno)); + return; } + fileInfo->node.type = NODE_REPT; + fileInfo->reptDepth = reptDepth + 1; + fileInfo->iters[0] = 1; + if (reptDepth) + /* Copy all parent iter counts */ + memcpy(&fileInfo->iters[1], + ((struct FileStackReptNode *)contextStack->fileInfo)->iters, + reptDepth * sizeof(fileInfo->iters[0])); + + newContext((struct FileStackNode *)fileInfo); + /* Correct our line number, which currently points to the `ENDR` line */ + contextStack->fileInfo->lineNo = reptLineNo; + + contextStack->lexerState = lexer_OpenFileView(body, size, reptLineNo); + if (!contextStack->lexerState) + fatalerror("Failed to set up lexer for rept block\n"); + lexer_SetStateAtEOL(contextStack->lexerState); + contextStack->uniqueID = macro_UseNewUniqueID(); + contextStack->nbReptIters = count; + } -/* - * Initialize the filestack routines - */ -void fstk_Init(char *pFileName) +void fstk_Init(char const *mainPath, size_t maxRecursionDepth) { - char tzSymFileName[_MAX_PATH + 1 + 2]; + struct LexerState *state = lexer_OpenFile(mainPath); - char *c = pFileName; - int fileNameIndex = 0; + if (!state) + fatalerror("Failed to open main file!\n"); + lexer_SetState(state); + char const *fileName = lexer_GetFileName(); + size_t len = strlen(fileName); + struct Context *context = malloc(sizeof(*contextStack)); + struct FileStackNamedNode *fileInfo = malloc(sizeof(*fileInfo) + len + 1); - tzSymFileName[fileNameIndex++] = '"'; + if (!context) + fatalerror("Failed to allocate memory for main context: %s\n", strerror(errno)); + if (!fileInfo) + fatalerror("Failed to allocate memory for main file info: %s\n", strerror(errno)); - // minus 2 to account for trailing "\"\0" - // minus 1 to avoid a buffer overflow in extreme cases - while (*c && fileNameIndex < sizeof(tzSymFileName) - 2 - 1) { + context->fileInfo = (struct FileStackNode *)fileInfo; + /* lineNo and reptIter are unused on the top-level context */ + context->fileInfo->parent = NULL; + context->fileInfo->referenced = false; + context->fileInfo->type = NODE_FILE; + memcpy(fileInfo->name, fileName, len + 1); - if (*c == '"') { - tzSymFileName[fileNameIndex++] = '\\'; - } + context->parent = NULL; + context->lexerState = state; + context->uniqueID = 0; + macro_SetUniqueID(0); + context->nbReptIters = 0; - tzSymFileName[fileNameIndex++] = *c; - ++c; - } + /* Now that it's set up properly, register the context */ + contextStack = context; - tzSymFileName[fileNameIndex++] = '"'; - tzSymFileName[fileNameIndex] = '\0'; - - sym_AddString("__FILE__", tzSymFileName); - - pFileStack = NULL; - if (strcmp(pFileName, "-") == 0) { - pCurrentFile = stdin; + /* + * Check that max recursion depth won't allow overflowing node `malloc`s + * This assumes that the rept node is larger + */ +#define DEPTH_LIMIT ((SIZE_MAX - sizeof(struct FileStackReptNode)) / sizeof(uint32_t)) + if (maxRecursionDepth > DEPTH_LIMIT) { + error("Recursion depth may not be higher than %zu, defaulting to " + EXPAND_AND_STR(DEFAULT_MAX_DEPTH) "\n", DEPTH_LIMIT); + nMaxRecursionDepth = DEFAULT_MAX_DEPTH; } else { - pCurrentFile = fopen(pFileName, "rb"); - if (pCurrentFile == NULL) - fatalerror("Unable to open file '%s': %s\n", pFileName, strerror(errno)); + nMaxRecursionDepth = maxRecursionDepth; } - nFileStackDepth = 0; - - nMacroCount = 0; - nCurrentStatus = STAT_isInclude; - snprintf(tzCurrentFileName, _MAX_PATH + 1, "%s", pFileName); - CurrentFlexHandle = yy_create_buffer(pCurrentFile); - yy_switch_to_buffer(CurrentFlexHandle); - nLineNo = 1; + /* Make sure that the default of 64 is OK, though */ + assert(DEPTH_LIMIT >= DEFAULT_MAX_DEPTH); +#undef DEPTH_LIMIT } diff --git a/src/asm/globlex.c b/src/asm/globlex.c deleted file mode 100644 index 33d2e9c6..00000000 --- a/src/asm/globlex.c +++ /dev/null @@ -1,698 +0,0 @@ -/* - * This file is part of RGBDS. - * - * Copyright (c) 1997-2018, Carsten Sorensen and RGBDS contributors. - * - * SPDX-License-Identifier: MIT - */ - -#include -#include -#include -#include -#include -#include - -#include "asm/asm.h" -#include "asm/lexer.h" -#include "asm/macro.h" -#include "asm/main.h" -#include "asm/rpn.h" -#include "asm/section.h" -#include "asm/warning.h" - -#include "helpers.h" - -#include "asmy.h" - -bool oDontExpandStrings; -int32_t nGBGfxID = -1; -int32_t nBinaryID = -1; - -static int32_t gbgfx2bin(char ch) -{ - int32_t i; - - for (i = 0; i <= 3; i++) { - if (CurrentOptions.gbgfx[i] == ch) - return i; - } - - return 0; -} - -static int32_t binary2bin(char ch) -{ - int32_t i; - - for (i = 0; i <= 1; i++) { - if (CurrentOptions.binary[i] == ch) - return i; - } - - return 0; -} - -static int32_t char2bin(char ch) -{ - if (ch >= 'a' && ch <= 'f') - return (ch - 'a' + 10); - - if (ch >= 'A' && ch <= 'F') - return (ch - 'A' + 10); - - if (ch >= '0' && ch <= '9') - return (ch - '0'); - - return 0; -} - -typedef int32_t(*x2bin) (char ch); - -static int32_t ascii2bin(char *s) -{ - char *start = s; - uint32_t radix = 10; - uint32_t result = 0; - x2bin convertfunc = char2bin; - - switch (*s) { - case '$': - radix = 16; - s++; - convertfunc = char2bin; - break; - case '&': - radix = 8; - s++; - convertfunc = char2bin; - break; - case '`': - radix = 4; - s++; - convertfunc = gbgfx2bin; - break; - case '%': - radix = 2; - s++; - convertfunc = binary2bin; - break; - default: - /* Handle below */ - break; - } - - const uint32_t max_q = UINT32_MAX / radix; - const uint32_t max_r = UINT32_MAX % radix; - - if (*s == '\0') { - /* - * There are no digits after the radix prefix - * (or the string is empty, which shouldn't happen). - */ - error("Invalid integer constant\n"); - } else if (radix == 4) { - int32_t size = 0; - int32_t c; - - while (*s != '\0') { - c = convertfunc(*s++); - result = result * 2 + ((c & 2) << 7) + (c & 1); - size++; - } - - /* - * Extending a graphics constant longer than 8 pixels, - * the Game Boy tile width, produces a nonsensical result. - */ - if (size > 8) { - warning(WARNING_LARGE_CONSTANT, "Graphics constant '%s' is too long\n", - start); - } - } else { - bool overflow = false; - - while (*s != '\0') { - int32_t digit = convertfunc(*s++); - - if (result > max_q - || (result == max_q && digit > max_r)) { - overflow = true; - } - result = result * radix + digit; - } - - if (overflow) - warning(WARNING_LARGE_CONSTANT, "Integer constant '%s' is too large\n", - start); - } - - return result; -} - -uint32_t ParseFixedPoint(char *s, uint32_t size) -{ - uint32_t i; - uint32_t dot = 0; - - for (i = 0; i < size; i++) { - if (s[i] == '.') { - dot++; - - if (dot == 2) - break; - } - } - - yyskipbytes(i); - - yylval.nConstValue = (int32_t)(atof(s) * 65536); - - return 1; -} - -uint32_t ParseNumber(char *s, uint32_t size) -{ - char dest[256]; - - if (size > 255) - fatalerror("Number token too long\n"); - - strncpy(dest, s, size); - dest[size] = 0; - yylval.nConstValue = ascii2bin(dest); - - yyskipbytes(size); - - return 1; -} - -/* - * If the symbol name ends before the end of the macro arg, - * return a pointer to the rest of the macro arg. - * Otherwise, return NULL. - */ -char const *AppendMacroArg(char whichArg, char *dest, size_t *destIndex) -{ - char const *marg; - - if (whichArg == '@') - marg = macro_GetUniqueIDStr(); - else if (whichArg >= '1' && whichArg <= '9') - marg = macro_GetArg(whichArg - '0'); - else - fatalerror("Invalid macro argument '\\%c' in symbol\n", whichArg); - - if (!marg) - fatalerror("Macro argument '\\%c' not defined\n", whichArg); - - char ch; - - while ((ch = *marg) != 0) { - if ((ch >= 'a' && ch <= 'z') - || (ch >= 'A' && ch <= 'Z') - || (ch >= '0' && ch <= '9') - || ch == '_' - || ch == '@' - || ch == '#' - || ch == '.') { - if (*destIndex >= MAXSYMLEN) - fatalerror("Symbol too long\n"); - - dest[*destIndex] = ch; - (*destIndex)++; - } else { - return marg; - } - - marg++; - } - - return NULL; -} - -uint32_t ParseSymbol(char *src, uint32_t size) -{ - char dest[MAXSYMLEN + 1]; - size_t srcIndex = 0; - size_t destIndex = 0; - char const *rest = NULL; - - while (srcIndex < size) { - char ch = src[srcIndex++]; - - if (ch == '\\') { - /* - * We don't check if srcIndex is still less than size, - * but that can only fail to be true when the - * following char is neither '@' nor a digit. - * In that case, AppendMacroArg() will catch the error. - */ - ch = src[srcIndex++]; - - rest = AppendMacroArg(ch, dest, &destIndex); - /* If the symbol's end was in the middle of the token */ - if (rest) - break; - } else { - if (destIndex >= MAXSYMLEN) - fatalerror("Symbol too long\n"); - dest[destIndex++] = ch; - } - } - - dest[destIndex] = 0; - - /* Tell the lexer we read all bytes that we did */ - yyskipbytes(srcIndex); - - /* - * If an escape's expansion left some chars after the symbol's end, - * such as the `::` in a `Backup\1` expanded to `BackupCamX::`, - * put those into the buffer. - * Note that this NEEDS to be done after the `yyskipbytes` above. - */ - if (rest) - yyunputstr(rest); - - /* If the symbol is an EQUS, expand it */ - if (!oDontExpandStrings) { - struct Symbol const *sym = sym_FindSymbol(dest); - - if (sym && sym->type == SYM_EQUS) { - char const *s; - - lex_BeginStringExpansion(dest); - - /* Feed the symbol's contents into the buffer */ - yyunputstr(s = sym_GetStringValue(sym)); - - /* Lines inserted this way shall not increase nLineNo */ - while (*s) { - if (*s++ == '\n') - nLineNo--; - } - return 0; - } - } - - strcpy(yylval.tzSym, dest); - return 1; -} - -uint32_t PutMacroArg(char *src, uint32_t size) -{ - char const *s; - - yyskipbytes(size); - if ((size == 2 && src[1] >= '1' && src[1] <= '9')) { - s = macro_GetArg(src[1] - '0'); - - if (s != NULL) - yyunputstr(s); - else - error("Macro argument '\\%c' not defined\n", src[1]); - } else { - error("Invalid macro argument '\\%c'\n", src[1]); - } - return 0; -} - -uint32_t PutUniqueID(char *src, uint32_t size) -{ - (void)src; - char const *s; - - yyskipbytes(size); - - s = macro_GetUniqueIDStr(); - - if (s != NULL) - yyunputstr(s); - else - error("Macro unique label string not defined\n"); - - return 0; -} - -enum { - T_LEX_MACROARG = 3000, - T_LEX_MACROUNIQUE -}; - -const struct sLexInitString lexer_strings[] = { - {"adc", T_Z80_ADC}, - {"add", T_Z80_ADD}, - {"and", T_Z80_AND}, - {"bit", T_Z80_BIT}, - {"call", T_Z80_CALL}, - {"ccf", T_Z80_CCF}, - {"cpl", T_Z80_CPL}, - {"cp", T_Z80_CP}, - {"daa", T_Z80_DAA}, - {"dec", T_Z80_DEC}, - {"di", T_Z80_DI}, - {"ei", T_Z80_EI}, - {"halt", T_Z80_HALT}, - {"inc", T_Z80_INC}, - {"jp", T_Z80_JP}, - {"jr", T_Z80_JR}, - {"ld", T_Z80_LD}, - {"ldi", T_Z80_LDI}, - {"ldd", T_Z80_LDD}, - {"ldio", T_Z80_LDIO}, - {"ldh", T_Z80_LDIO}, - {"nop", T_Z80_NOP}, - {"or", T_Z80_OR}, - {"pop", T_Z80_POP}, - {"push", T_Z80_PUSH}, - {"res", T_Z80_RES}, - {"reti", T_Z80_RETI}, - {"ret", T_Z80_RET}, - {"rlca", T_Z80_RLCA}, - {"rlc", T_Z80_RLC}, - {"rla", T_Z80_RLA}, - {"rl", T_Z80_RL}, - {"rrc", T_Z80_RRC}, - {"rrca", T_Z80_RRCA}, - {"rra", T_Z80_RRA}, - {"rr", T_Z80_RR}, - {"rst", T_Z80_RST}, - {"sbc", T_Z80_SBC}, - {"scf", T_Z80_SCF}, - {"set", T_POP_SET}, - {"sla", T_Z80_SLA}, - {"sra", T_Z80_SRA}, - {"srl", T_Z80_SRL}, - {"stop", T_Z80_STOP}, - {"sub", T_Z80_SUB}, - {"swap", T_Z80_SWAP}, - {"xor", T_Z80_XOR}, - - {"nz", T_CC_NZ}, - {"z", T_CC_Z}, - {"nc", T_CC_NC}, - /* Handled in list of registers */ - /* { "c", T_TOKEN_C }, */ - - {"hli", T_MODE_HL_INC}, - {"hld", T_MODE_HL_DEC}, - {"$ff00+c", T_MODE_HW_C}, - {"$ff00 + c", T_MODE_HW_C}, - {"af", T_MODE_AF}, - {"bc", T_MODE_BC}, - {"de", T_MODE_DE}, - {"hl", T_MODE_HL}, - {"sp", T_MODE_SP}, - - {"a", T_TOKEN_A}, - {"b", T_TOKEN_B}, - {"c", T_TOKEN_C}, - {"d", T_TOKEN_D}, - {"e", T_TOKEN_E}, - {"h", T_TOKEN_H}, - {"l", T_TOKEN_L}, - - {"||", T_OP_LOGICOR}, - {"&&", T_OP_LOGICAND}, - {"==", T_OP_LOGICEQU}, - {">", T_OP_LOGICGT}, - {"<", T_OP_LOGICLT}, - {">=", T_OP_LOGICGE}, - {"<=", T_OP_LOGICLE}, - {"!=", T_OP_LOGICNE}, - {"!", T_OP_LOGICNOT}, - {"|", T_OP_OR}, - {"^", T_OP_XOR}, - {"&", T_OP_AND}, - {"<<", T_OP_SHL}, - {">>", T_OP_SHR}, - {"+", T_OP_ADD}, - {"-", T_OP_SUB}, - {"*", T_OP_MUL}, - {"/", T_OP_DIV}, - {"%", T_OP_MOD}, - {"~", T_OP_NOT}, - - {"def", T_OP_DEF}, - - {"fragment", T_POP_FRAGMENT}, - {"bank", T_OP_BANK}, - {"align", T_OP_ALIGN}, - - {"round", T_OP_ROUND}, - {"ceil", T_OP_CEIL}, - {"floor", T_OP_FLOOR}, - {"div", T_OP_FDIV}, - {"mul", T_OP_FMUL}, - {"sin", T_OP_SIN}, - {"cos", T_OP_COS}, - {"tan", T_OP_TAN}, - {"asin", T_OP_ASIN}, - {"acos", T_OP_ACOS}, - {"atan", T_OP_ATAN}, - {"atan2", T_OP_ATAN2}, - - {"high", T_OP_HIGH}, - {"low", T_OP_LOW}, - {"isconst", T_OP_ISCONST}, - - {"strcmp", T_OP_STRCMP}, - {"strin", T_OP_STRIN}, - {"strsub", T_OP_STRSUB}, - {"strlen", T_OP_STRLEN}, - {"strcat", T_OP_STRCAT}, - {"strupr", T_OP_STRUPR}, - {"strlwr", T_OP_STRLWR}, - - {"include", T_POP_INCLUDE}, - {"printt", T_POP_PRINTT}, - {"printi", T_POP_PRINTI}, - {"printv", T_POP_PRINTV}, - {"printf", T_POP_PRINTF}, - {"export", T_POP_EXPORT}, - {"xdef", T_POP_XDEF}, - {"global", T_POP_GLOBAL}, - {"ds", T_POP_DS}, - {"db", T_POP_DB}, - {"dw", T_POP_DW}, - {"dl", T_POP_DL}, - {"section", T_POP_SECTION}, - {"purge", T_POP_PURGE}, - - {"rsreset", T_POP_RSRESET}, - {"rsset", T_POP_RSSET}, - - {"incbin", T_POP_INCBIN}, - {"charmap", T_POP_CHARMAP}, - {"newcharmap", T_POP_NEWCHARMAP}, - {"setcharmap", T_POP_SETCHARMAP}, - {"pushc", T_POP_PUSHC}, - {"popc", T_POP_POPC}, - - {"fail", T_POP_FAIL}, - {"warn", T_POP_WARN}, - {"fatal", T_POP_FATAL}, - {"assert", T_POP_ASSERT}, - {"static_assert", T_POP_STATIC_ASSERT}, - - {"macro", T_POP_MACRO}, - /* Not needed but we have it here just to protect the name */ - {"endm", T_POP_ENDM}, - {"shift", T_POP_SHIFT}, - - {"rept", T_POP_REPT}, - /* Not needed but we have it here just to protect the name */ - {"endr", T_POP_ENDR}, - - {"load", T_POP_LOAD}, - {"endl", T_POP_ENDL}, - - {"if", T_POP_IF}, - {"else", T_POP_ELSE}, - {"elif", T_POP_ELIF}, - {"endc", T_POP_ENDC}, - - {"union", T_POP_UNION}, - {"nextu", T_POP_NEXTU}, - {"endu", T_POP_ENDU}, - - {"wram0", T_SECT_WRAM0}, - {"vram", T_SECT_VRAM}, - {"romx", T_SECT_ROMX}, - {"rom0", T_SECT_ROM0}, - {"hram", T_SECT_HRAM}, - {"wramx", T_SECT_WRAMX}, - {"sram", T_SECT_SRAM}, - {"oam", T_SECT_OAM}, - - {"rb", T_POP_RB}, - {"rw", T_POP_RW}, - {"equ", T_POP_EQU}, - {"equs", T_POP_EQUS}, - - /* Handled before in list of CPU instructions */ - /* {"set", T_POP_SET}, */ - {"=", T_POP_EQUAL}, - - {"pushs", T_POP_PUSHS}, - {"pops", T_POP_POPS}, - {"pusho", T_POP_PUSHO}, - {"popo", T_POP_POPO}, - - {"opt", T_POP_OPT}, - - {NULL, 0} -}; - -const struct sLexFloat tNumberToken = { - ParseNumber, - T_NUMBER -}; - -const struct sLexFloat tFixedPointToken = { - ParseFixedPoint, - T_NUMBER -}; - -const struct sLexFloat tIDToken = { - ParseSymbol, - T_ID -}; - -const struct sLexFloat tMacroArgToken = { - PutMacroArg, - T_LEX_MACROARG -}; - -const struct sLexFloat tMacroUniqueToken = { - PutUniqueID, - T_LEX_MACROUNIQUE -}; - -void setup_lexer(void) -{ - uint32_t id; - - lex_Init(); - lex_AddStrings(lexer_strings); - - //Macro arguments - - id = lex_FloatAlloc(&tMacroArgToken); - lex_FloatAddFirstRange(id, '\\', '\\'); - lex_FloatAddSecondRange(id, '1', '9'); - id = lex_FloatAlloc(&tMacroUniqueToken); - lex_FloatAddFirstRange(id, '\\', '\\'); - lex_FloatAddSecondRange(id, '@', '@'); - - //Decimal constants - - id = lex_FloatAlloc(&tNumberToken); - lex_FloatAddFirstRange(id, '0', '9'); - lex_FloatAddSecondRange(id, '0', '9'); - lex_FloatAddRange(id, '0', '9'); - - //Binary constants - - id = lex_FloatAlloc(&tNumberToken); - nBinaryID = id; - lex_FloatAddFirstRange(id, '%', '%'); - lex_FloatAddSecondRange(id, CurrentOptions.binary[0], - CurrentOptions.binary[0]); - lex_FloatAddSecondRange(id, CurrentOptions.binary[1], - CurrentOptions.binary[1]); - lex_FloatAddRange(id, CurrentOptions.binary[0], - CurrentOptions.binary[0]); - lex_FloatAddRange(id, CurrentOptions.binary[1], - CurrentOptions.binary[1]); - - //Octal constants - - id = lex_FloatAlloc(&tNumberToken); - lex_FloatAddFirstRange(id, '&', '&'); - lex_FloatAddSecondRange(id, '0', '7'); - lex_FloatAddRange(id, '0', '7'); - - //Gameboy gfx constants - - id = lex_FloatAlloc(&tNumberToken); - nGBGfxID = id; - lex_FloatAddFirstRange(id, '`', '`'); - lex_FloatAddSecondRange(id, CurrentOptions.gbgfx[0], - CurrentOptions.gbgfx[0]); - lex_FloatAddSecondRange(id, CurrentOptions.gbgfx[1], - CurrentOptions.gbgfx[1]); - lex_FloatAddSecondRange(id, CurrentOptions.gbgfx[2], - CurrentOptions.gbgfx[2]); - lex_FloatAddSecondRange(id, CurrentOptions.gbgfx[3], - CurrentOptions.gbgfx[3]); - lex_FloatAddRange(id, CurrentOptions.gbgfx[0], CurrentOptions.gbgfx[0]); - lex_FloatAddRange(id, CurrentOptions.gbgfx[1], CurrentOptions.gbgfx[1]); - lex_FloatAddRange(id, CurrentOptions.gbgfx[2], CurrentOptions.gbgfx[2]); - lex_FloatAddRange(id, CurrentOptions.gbgfx[3], CurrentOptions.gbgfx[3]); - - //Hex constants - - id = lex_FloatAlloc(&tNumberToken); - lex_FloatAddFirstRange(id, '$', '$'); - lex_FloatAddSecondRange(id, '0', '9'); - lex_FloatAddSecondRange(id, 'A', 'F'); - lex_FloatAddSecondRange(id, 'a', 'f'); - lex_FloatAddRange(id, '0', '9'); - lex_FloatAddRange(id, 'A', 'F'); - lex_FloatAddRange(id, 'a', 'f'); - - //ID 's - - id = lex_FloatAlloc(&tIDToken); - lex_FloatAddFirstRange(id, 'a', 'z'); - lex_FloatAddFirstRange(id, 'A', 'Z'); - lex_FloatAddFirstRange(id, '_', '_'); - lex_FloatAddSecondRange(id, '.', '.'); - lex_FloatAddSecondRange(id, 'a', 'z'); - lex_FloatAddSecondRange(id, 'A', 'Z'); - lex_FloatAddSecondRange(id, '0', '9'); - lex_FloatAddSecondRange(id, '_', '_'); - lex_FloatAddSecondRange(id, '\\', '\\'); - lex_FloatAddSecondRange(id, '@', '@'); - lex_FloatAddSecondRange(id, '#', '#'); - lex_FloatAddRange(id, '.', '.'); - lex_FloatAddRange(id, 'a', 'z'); - lex_FloatAddRange(id, 'A', 'Z'); - lex_FloatAddRange(id, '0', '9'); - lex_FloatAddRange(id, '_', '_'); - lex_FloatAddRange(id, '\\', '\\'); - lex_FloatAddRange(id, '@', '@'); - lex_FloatAddRange(id, '#', '#'); - - //Local ID - - id = lex_FloatAlloc(&tIDToken); - lex_FloatAddFirstRange(id, '.', '.'); - lex_FloatAddSecondRange(id, 'a', 'z'); - lex_FloatAddSecondRange(id, 'A', 'Z'); - lex_FloatAddSecondRange(id, '_', '_'); - lex_FloatAddRange(id, 'a', 'z'); - lex_FloatAddRange(id, 'A', 'Z'); - lex_FloatAddRange(id, '0', '9'); - lex_FloatAddRange(id, '_', '_'); - lex_FloatAddRange(id, '\\', '\\'); - lex_FloatAddRange(id, '@', '@'); - lex_FloatAddRange(id, '#', '#'); - - // "@" - - id = lex_FloatAlloc(&tIDToken); - lex_FloatAddFirstRange(id, '@', '@'); - - //Fixed point constants - - id = lex_FloatAlloc(&tFixedPointToken); - lex_FloatAddFirstRange(id, '.', '.'); - lex_FloatAddFirstRange(id, '0', '9'); - lex_FloatAddSecondRange(id, '.', '.'); - lex_FloatAddSecondRange(id, '0', '9'); - lex_FloatAddRange(id, '.', '.'); - lex_FloatAddRange(id, '0', '9'); -} diff --git a/src/asm/lexer.c b/src/asm/lexer.c index e9a15215..146dd2c5 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -1,1054 +1,2094 @@ /* * This file is part of RGBDS. * - * Copyright (c) 1997-2019, Carsten Sorensen and RGBDS contributors. + * Copyright (c) 2020, Eldred Habert and RGBDS contributors. * * SPDX-License-Identifier: MIT */ +#include +#include #include #include +#include +#include #include -#include +#include +#include #include +#include #include #include +#include + +#include "extern/utf8decoder.h" +#include "platform.h" /* For `ssize_t` */ #include "asm/asm.h" -#include "asm/fstack.h" #include "asm/lexer.h" +#include "asm/fstack.h" #include "asm/macro.h" #include "asm/main.h" #include "asm/rpn.h" -#include "asm/section.h" +#include "asm/symbol.h" +#include "asm/util.h" #include "asm/warning.h" +/* Include this last so it gets all type & constant definitions */ +#include "asmy.h" /* For token definitions, generated from asmy.y */ -#include "extern/err.h" +#ifdef LEXER_DEBUG + #define dbgPrint(...) fprintf(stderr, "[lexer] " __VA_ARGS__) +#else + #define dbgPrint(...) +#endif -#include "asmy.h" -#include "platform.h" // strncasecmp, strdup +/* Neither MSVC nor MinGW provide `mmap` */ +#if defined(_MSC_VER) || defined(__MINGW32__) +# include +# include +# include +# define MAP_FAILED NULL +# define mapFile(ptr, fd, path, size) do { \ + (ptr) = MAP_FAILED; \ + HANDLE file = CreateFileA(path, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, \ + FILE_FLAG_POSIX_SEMANTICS | FILE_FLAG_RANDOM_ACCESS, NULL); \ + HANDLE mappingObj; \ + \ + if (file == INVALID_HANDLE_VALUE) \ + break; \ + mappingObj = CreateFileMappingA(file, NULL, PAGE_READONLY, 0, 0, NULL); \ + if (mappingObj != INVALID_HANDLE_VALUE) \ + (ptr) = MapViewOfFile(mappingObj, FILE_MAP_READ, 0, 0, 0); \ + CloseHandle(mappingObj); \ + CloseHandle(file); \ +} while (0) +# define munmap(ptr, size) UnmapViewOfFile((ptr)) -struct sLexString { - char *tzName; - uint32_t nToken; - uint32_t nNameLength; - struct sLexString *next; +#else /* defined(_MSC_VER) || defined(__MINGW32__) */ + +# include +# define mapFile(ptr, fd, path, size) do { \ + (ptr) = mmap(NULL, (size), PROT_READ, MAP_PRIVATE, (fd), 0); \ + \ + if ((ptr) == MAP_FAILED && errno == ENOTSUP) { \ + /* + * The implementation may not support MAP_PRIVATE; try again with MAP_SHARED + * instead, offering, I believe, weaker guarantees about external modifications to + * the file while reading it. That's still better than not opening it at all, though + */ \ + if (verbose) \ + printf("mmap(%s, MAP_PRIVATE) failed, retrying with MAP_SHARED\n", path); \ + (ptr) = mmap(NULL, (size), PROT_READ, MAP_SHARED, (fd), 0); \ + } \ +} while (0) +#endif /* !( defined(_MSC_VER) || defined(__MINGW32__) ) */ + +/* + * Identifiers that are also keywords are listed here. This ONLY applies to ones + * that would normally be matched as identifiers! Check out `yylex_NORMAL` to + * see how this is used. + * Tokens / keywords not handled here are handled in `yylex_NORMAL`'s switch. + */ +static struct KeywordMapping { + char const *name; + int token; +} const keywords[] = { + /* + * CAUTION when editing this: adding keywords will probably require extra nodes in the + * `keywordDict` array. If you forget to, you will probably trip up an assertion, anyways. + * Also, all entries in this array must be in uppercase for the dict to build correctly. + */ + {"ADC", T_Z80_ADC}, + {"ADD", T_Z80_ADD}, + {"AND", T_Z80_AND}, + {"BIT", T_Z80_BIT}, + {"CALL", T_Z80_CALL}, + {"CCF", T_Z80_CCF}, + {"CPL", T_Z80_CPL}, + {"CP", T_Z80_CP}, + {"DAA", T_Z80_DAA}, + {"DEC", T_Z80_DEC}, + {"DI", T_Z80_DI}, + {"EI", T_Z80_EI}, + {"HALT", T_Z80_HALT}, + {"INC", T_Z80_INC}, + {"JP", T_Z80_JP}, + {"JR", T_Z80_JR}, + {"LD", T_Z80_LD}, + {"LDI", T_Z80_LDI}, + {"LDD", T_Z80_LDD}, + {"LDIO", T_Z80_LDIO}, + {"LDH", T_Z80_LDIO}, + {"NOP", T_Z80_NOP}, + {"OR", T_Z80_OR}, + {"POP", T_Z80_POP}, + {"PUSH", T_Z80_PUSH}, + {"RES", T_Z80_RES}, + {"RETI", T_Z80_RETI}, + {"RET", T_Z80_RET}, + {"RLCA", T_Z80_RLCA}, + {"RLC", T_Z80_RLC}, + {"RLA", T_Z80_RLA}, + {"RL", T_Z80_RL}, + {"RRC", T_Z80_RRC}, + {"RRCA", T_Z80_RRCA}, + {"RRA", T_Z80_RRA}, + {"RR", T_Z80_RR}, + {"RST", T_Z80_RST}, + {"SBC", T_Z80_SBC}, + {"SCF", T_Z80_SCF}, + {"SET", T_POP_SET}, + {"SLA", T_Z80_SLA}, + {"SRA", T_Z80_SRA}, + {"SRL", T_Z80_SRL}, + {"STOP", T_Z80_STOP}, + {"SUB", T_Z80_SUB}, + {"SWAP", T_Z80_SWAP}, + {"XOR", T_Z80_XOR}, + + {"NZ", T_CC_NZ}, + {"Z", T_CC_Z}, + {"NC", T_CC_NC}, + /* Handled in list of registers */ + /* { "C", T_CC_C }, */ + + {"AF", T_MODE_AF}, + {"BC", T_MODE_BC}, + {"DE", T_MODE_DE}, + {"HL", T_MODE_HL}, + {"SP", T_MODE_SP}, + {"HLD", T_MODE_HL_DEC}, + {"HLI", T_MODE_HL_INC}, + + {"A", T_TOKEN_A}, + {"B", T_TOKEN_B}, + {"C", T_TOKEN_C}, + {"D", T_TOKEN_D}, + {"E", T_TOKEN_E}, + {"H", T_TOKEN_H}, + {"L", T_TOKEN_L}, + + {"DEF", T_OP_DEF}, + + {"FRAGMENT", T_POP_FRAGMENT}, + {"BANK", T_OP_BANK}, + {"ALIGN", T_OP_ALIGN}, + + {"ROUND", T_OP_ROUND}, + {"CEIL", T_OP_CEIL}, + {"FLOOR", T_OP_FLOOR}, + {"DIV", T_OP_FDIV}, + {"MUL", T_OP_FMUL}, + {"SIN", T_OP_SIN}, + {"COS", T_OP_COS}, + {"TAN", T_OP_TAN}, + {"ASIN", T_OP_ASIN}, + {"ACOS", T_OP_ACOS}, + {"ATAN", T_OP_ATAN}, + {"ATAN2", T_OP_ATAN2}, + + {"HIGH", T_OP_HIGH}, + {"LOW", T_OP_LOW}, + {"ISCONST", T_OP_ISCONST}, + + {"STRCMP", T_OP_STRCMP}, + {"STRIN", T_OP_STRIN}, + {"STRSUB", T_OP_STRSUB}, + {"STRLEN", T_OP_STRLEN}, + {"STRCAT", T_OP_STRCAT}, + {"STRUPR", T_OP_STRUPR}, + {"STRLWR", T_OP_STRLWR}, + + {"INCLUDE", T_POP_INCLUDE}, + {"PRINTT", T_POP_PRINTT}, + {"PRINTI", T_POP_PRINTI}, + {"PRINTV", T_POP_PRINTV}, + {"PRINTF", T_POP_PRINTF}, + {"EXPORT", T_POP_EXPORT}, + {"XDEF", T_POP_XDEF}, + {"GLOBAL", T_POP_GLOBAL}, + {"DS", T_POP_DS}, + {"DB", T_POP_DB}, + {"DW", T_POP_DW}, + {"DL", T_POP_DL}, + {"SECTION", T_POP_SECTION}, + {"PURGE", T_POP_PURGE}, + + {"RSRESET", T_POP_RSRESET}, + {"RSSET", T_POP_RSSET}, + + {"INCBIN", T_POP_INCBIN}, + {"CHARMAP", T_POP_CHARMAP}, + {"NEWCHARMAP", T_POP_NEWCHARMAP}, + {"SETCHARMAP", T_POP_SETCHARMAP}, + {"PUSHC", T_POP_PUSHC}, + {"POPC", T_POP_POPC}, + + {"FAIL", T_POP_FAIL}, + {"WARN", T_POP_WARN}, + {"FATAL", T_POP_FATAL}, + {"ASSERT", T_POP_ASSERT}, + {"STATIC_ASSERT", T_POP_STATIC_ASSERT}, + + {"MACRO", T_POP_MACRO}, + {"ENDM", T_POP_ENDM}, + {"SHIFT", T_POP_SHIFT}, + + {"REPT", T_POP_REPT}, + {"ENDR", T_POP_ENDR}, + + {"LOAD", T_POP_LOAD}, + {"ENDL", T_POP_ENDL}, + + {"IF", T_POP_IF}, + {"ELSE", T_POP_ELSE}, + {"ELIF", T_POP_ELIF}, + {"ENDC", T_POP_ENDC}, + + {"UNION", T_POP_UNION}, + {"NEXTU", T_POP_NEXTU}, + {"ENDU", T_POP_ENDU}, + + {"WRAM0", T_SECT_WRAM0}, + {"VRAM", T_SECT_VRAM}, + {"ROMX", T_SECT_ROMX}, + {"ROM0", T_SECT_ROM0}, + {"HRAM", T_SECT_HRAM}, + {"WRAMX", T_SECT_WRAMX}, + {"SRAM", T_SECT_SRAM}, + {"OAM", T_SECT_OAM}, + + {"RB", T_POP_RB}, + {"RW", T_POP_RW}, + {"EQU", T_POP_EQU}, + {"EQUS", T_POP_EQUS}, + + /* Handled before in list of CPU instructions */ + /* {"SET", T_POP_SET}, */ + + {"PUSHS", T_POP_PUSHS}, + {"POPS", T_POP_POPS}, + {"PUSHO", T_POP_PUSHO}, + {"POPO", T_POP_POPO}, + + {"OPT", T_POP_OPT} }; -#define pLexBufferRealStart (pCurrentBuffer->pBufferRealStart) -#define pLexBuffer (pCurrentBuffer->pBuffer) -#define AtLineStart (pCurrentBuffer->oAtLineStart) - -#define SAFETYMARGIN 1024 - -#define BOM_SIZE 3 - -struct sLexFloat tLexFloat[32]; -struct sLexString *tLexHash[LEXHASHSIZE]; -YY_BUFFER_STATE pCurrentBuffer; -uint32_t nLexMaxLength; // max length of all keywords and operators - -uint32_t tFloatingSecondChar[256]; -uint32_t tFloatingFirstChar[256]; -uint32_t tFloatingChars[256]; -uint32_t nFloating; -enum eLexerState lexerstate = LEX_STATE_NORMAL; - -struct sStringExpansionPos *pCurrentStringExpansion; -static unsigned int nNbStringExpansions; - -/* UTF-8 byte order mark */ -static const unsigned char bom[BOM_SIZE] = { 0xEF, 0xBB, 0xBF }; - -void upperstring(char *s) +static bool isWhitespace(int c) { - while (*s) { - *s = toupper(*s); - s++; + return c == ' ' || c == '\t'; +} + +#define LEXER_BUF_SIZE 42 /* TODO: determine a sane value for this */ +/* This caps the size of buffer reads, and according to POSIX, passing more than SSIZE_MAX is UB */ +static_assert(LEXER_BUF_SIZE <= SSIZE_MAX, "Lexer buffer size is too large"); + +struct Expansion { + struct Expansion *firstChild; + struct Expansion *next; + char *name; + char const *contents; + size_t len; + size_t totalLen; + size_t distance; /* Distance between the beginning of this expansion and of its parent */ + uint8_t skip; /* How many extra characters to skip after the expansion is over */ +}; + +struct LexerState { + char const *path; + + /* mmap()-dependent IO state */ + bool isMmapped; + union { + struct { /* If mmap()ed */ + char *ptr; /* Technically `const` during the lexer's execution */ + off_t size; + off_t offset; + bool isReferenced; /* If a macro in this file requires not unmapping it */ + }; + struct { /* Otherwise */ + int fd; + size_t index; /* Read index into the buffer */ + char buf[LEXER_BUF_SIZE]; /* Circular buffer */ + size_t nbChars; /* Number of "fresh" chars in the buffer */ + }; + }; + + /* Common state */ + bool isFile; + + enum LexerMode mode; + bool atLineStart; + uint32_t lineNo; + uint32_t colNo; + int lastToken; + + bool capturing; /* Whether the text being lexed should be captured */ + size_t captureSize; /* Amount of text captured */ + char *captureBuf; /* Buffer to send the captured text to if non-NULL */ + size_t captureCapacity; /* Size of the buffer above */ + + bool disableMacroArgs; + size_t macroArgScanDistance; /* Max distance already scanned for macro args */ + bool expandStrings; + struct Expansion *expansions; + size_t expansionOfs; /* Offset into the current top-level expansion (negative = before) */ +}; + +struct LexerState *lexerState = NULL; +struct LexerState *lexerStateEOL = NULL; + +static void initState(struct LexerState *state) +{ + state->mode = LEXER_NORMAL; + state->atLineStart = true; /* yylex() will init colNo due to this */ + state->lastToken = 0; + + state->capturing = false; + state->captureBuf = NULL; + + state->disableMacroArgs = false; + state->macroArgScanDistance = 0; + state->expandStrings = true; + state->expansions = NULL; + state->expansionOfs = 0; +} + +struct LexerState *lexer_OpenFile(char const *path) +{ + dbgPrint("Opening file \"%s\"\n", path); + + bool isStdin = !strcmp(path, "-"); + struct LexerState *state = malloc(sizeof(*state)); + struct stat fileInfo; + + /* Give stdin a nicer file name */ + if (isStdin) + path = ""; + if (!state) { + error("Failed to allocate memory for lexer state: %s\n", strerror(errno)); + return NULL; } -} - -void lowerstring(char *s) -{ - while (*s) { - *s = tolower(*s); - s++; + if (!isStdin && stat(path, &fileInfo) != 0) { + error("Failed to stat file \"%s\": %s\n", path, strerror(errno)); + free(state); + return NULL; } -} + state->path = path; + state->isFile = true; + state->fd = isStdin ? STDIN_FILENO : open(path, O_RDONLY); + state->isMmapped = false; /* By default, assume it won't be mmap()ed */ + if (!isStdin && fileInfo.st_size > 0) { + /* Try using `mmap` for better performance */ -void yyskipbytes(uint32_t count) -{ - pLexBuffer += count; -} + /* + * Important: do NOT assign to `state->ptr` directly, to avoid a cast that may + * alter an eventual `MAP_FAILED` value. It would also invalidate `state->fd`, + * being on the other side of the union. + */ + void *mappingAddr; -void yyunputbytes(uint32_t count) -{ - pLexBuffer -= count; -} - -void yyunput(char c) -{ - if (pLexBuffer <= pLexBufferRealStart) - fatalerror("Buffer safety margin exceeded\n"); - - *(--pLexBuffer) = c; -} - -void yyunputstr(const char *s) -{ - int32_t len; - - len = strlen(s); - - /* - * It would be undefined behavior to subtract `len` from pLexBuffer and - * potentially have it point outside of pLexBufferRealStart's buffer, - * this is why the check is done this way. - * Refer to https://github.com/rednex/rgbds/pull/411#discussion_r319779797 - */ - if (pLexBuffer - pLexBufferRealStart < len) - fatalerror("Buffer safety margin exceeded\n"); - - pLexBuffer -= len; - - memcpy(pLexBuffer, s, len); -} - -/* - * Marks that a new string expansion with name `tzName` ends here - * Enforces recursion depth - */ -void lex_BeginStringExpansion(const char *tzName) -{ - if (++nNbStringExpansions > nMaxRecursionDepth) - fatalerror("Recursion limit (%u) exceeded\n", nMaxRecursionDepth); - - struct sStringExpansionPos *pNewStringExpansion = - malloc(sizeof(*pNewStringExpansion)); - char *tzNewExpansionName = strdup(tzName); - - if (!pNewStringExpansion || !tzNewExpansionName) - fatalerror("Could not allocate memory to expand '%s'\n", tzName); - - pNewStringExpansion->tzName = tzNewExpansionName; - pNewStringExpansion->pBuffer = pLexBufferRealStart; - pNewStringExpansion->pBufferPos = pLexBuffer; - pNewStringExpansion->pParent = pCurrentStringExpansion; - - pCurrentStringExpansion = pNewStringExpansion; -} - -void yy_switch_to_buffer(YY_BUFFER_STATE buf) -{ - pCurrentBuffer = buf; -} - -void yy_set_state(enum eLexerState i) -{ - lexerstate = i; -} - -void yy_delete_buffer(YY_BUFFER_STATE buf) -{ - free(buf->pBufferStart - SAFETYMARGIN); - free(buf); -} - -/* - * Maintains the following invariants: - * 1. nBufferSize < capacity - * 2. The buffer is terminated with 0 - * 3. nBufferSize is the size without the terminator - */ -static void yy_buffer_append(YY_BUFFER_STATE buf, size_t capacity, char c) -{ - assert(buf->pBufferStart[buf->nBufferSize] == 0); - assert(buf->nBufferSize + 1 < capacity); - - buf->pBufferStart[buf->nBufferSize++] = c; - buf->pBufferStart[buf->nBufferSize] = 0; -} - -static void yy_buffer_append_newlines(YY_BUFFER_STATE buf, size_t capacity) -{ - /* Add newline if file doesn't end with one */ - if (buf->nBufferSize == 0 - || buf->pBufferStart[buf->nBufferSize - 1] != '\n') - yy_buffer_append(buf, capacity, '\n'); - - /* Add newline if \ will eat the last newline */ - if (buf->nBufferSize >= 2) { - size_t pos = buf->nBufferSize - 2; - - /* Skip spaces and tabs */ - while (pos > 0 && (buf->pBufferStart[pos] == ' ' - || buf->pBufferStart[pos] == '\t')) - pos--; - - if (buf->pBufferStart[pos] == '\\') - yy_buffer_append(buf, capacity, '\n'); - } -} - -YY_BUFFER_STATE yy_scan_bytes(char const *mem, uint32_t size) -{ - YY_BUFFER_STATE pBuffer = malloc(sizeof(struct yy_buffer_state)); - - if (pBuffer == NULL) - fatalerror("%s: Out of memory!\n", __func__); - - size_t capacity = size + 3; /* space for 2 newlines and terminator */ - - pBuffer->pBufferRealStart = malloc(capacity + SAFETYMARGIN); - - if (pBuffer->pBufferRealStart == NULL) - fatalerror("%s: Out of memory for buffer!\n", __func__); - - pBuffer->pBufferStart = pBuffer->pBufferRealStart + SAFETYMARGIN; - pBuffer->pBuffer = pBuffer->pBufferRealStart + SAFETYMARGIN; - memcpy(pBuffer->pBuffer, mem, size); - pBuffer->pBuffer[size] = 0; - pBuffer->nBufferSize = size; - yy_buffer_append_newlines(pBuffer, capacity); - pBuffer->oAtLineStart = 1; - - return pBuffer; -} - -YY_BUFFER_STATE yy_create_buffer(FILE *f) -{ - YY_BUFFER_STATE pBuffer = malloc(sizeof(struct yy_buffer_state)); - - if (pBuffer == NULL) - fatalerror("%s: Out of memory!\n", __func__); - - size_t size = 0, capacity = -1; - char *buf = NULL; - - /* - * Check if we can get the file size without implementation-defined - * behavior: - * - * From ftell(3p): - * [On error], ftell() and ftello() shall return −1, and set errno to - * indicate the error. - * - * The ftell() and ftello() functions shall fail if: [...] - * ESPIPE The file descriptor underlying stream is associated with a - * pipe, FIFO, or socket. - * - * From fseek(3p): - * The behavior of fseek() on devices which are incapable of seeking - * is implementation-defined. - */ - if (ftell(f) != -1) { - fseek(f, 0, SEEK_END); - capacity = ftell(f); - rewind(f); - } - - // If ftell errored or the block above wasn't executed - if (capacity == -1) - capacity = 4096; - // Handle 0-byte files gracefully - else if (capacity == 0) - capacity = 1; - - do { - if (buf == NULL || size >= capacity) { - if (buf) - capacity *= 2; - /* Give extra room for 2 newlines and terminator */ - buf = realloc(buf, capacity + SAFETYMARGIN + 3); - - if (buf == NULL) - fatalerror("%s: Out of memory for buffer!\n", - __func__); - } - - char *bufpos = buf + SAFETYMARGIN + size; - size_t read_count = fread(bufpos, 1, capacity - size, f); - - if (read_count == 0 && !feof(f)) - fatalerror("%s: fread error\n", __func__); - - size += read_count; - } while (!feof(f)); - - pBuffer->pBufferRealStart = buf; - pBuffer->pBufferStart = buf + SAFETYMARGIN; - pBuffer->pBuffer = buf + SAFETYMARGIN; - pBuffer->pBuffer[size] = 0; - pBuffer->nBufferSize = size; - - /* This is added here to make the buffer scaling above easy to express, - * while taking the newline space into account - * for the yy_buffer_append_newlines() call below. - */ - capacity += 3; - - /* Skip UTF-8 byte order mark. */ - if (pBuffer->nBufferSize >= BOM_SIZE - && !memcmp(pBuffer->pBuffer, bom, BOM_SIZE)) - pBuffer->pBuffer += BOM_SIZE; - - /* Convert all line endings to LF and spaces */ - - char *mem = pBuffer->pBuffer; - int32_t lineCount = 0; - - while (*mem) { - if ((mem[0] == '\\') && (mem[1] == '\"' || mem[1] == '\\')) { - mem += 2; + mapFile(mappingAddr, state->fd, state->path, fileInfo.st_size); + if (mappingAddr == MAP_FAILED) { + /* If mmap()ing failed, try again using another method (below) */ + state->isMmapped = false; } else { - /* LF CR and CR LF */ - if (((mem[0] == '\n') && (mem[1] == '\r')) - || ((mem[0] == '\r') && (mem[1] == '\n'))) { - *mem++ = ' '; - *mem++ = '\n'; - lineCount++; - /* LF and CR */ - } else if ((mem[0] == '\n') || (mem[0] == '\r')) { - *mem++ = '\n'; - lineCount++; - } else { - mem++; - } + /* IMPORTANT: the `union` mandates this is accessed before other members! */ + close(state->fd); + + state->isMmapped = true; + state->ptr = mappingAddr; + state->size = fileInfo.st_size; + state->offset = 0; + + if (verbose) + printf("File %s successfully mmap()ped\n", path); } } - - if (mem != pBuffer->pBuffer + size) { - nLineNo = lineCount + 1; - fatalerror("Found null character\n"); + if (!state->isMmapped) { + /* Sometimes mmap() fails or isn't available, so have a fallback */ + if (verbose) + printf("File %s opened as regular, errno reports \"%s\"\n", + path, strerror(errno)); + state->index = 0; + state->nbChars = 0; } - /* Remove comments */ + initState(state); + state->lineNo = 0; /* Will be incremented at first line start */ + return state; +} - mem = pBuffer->pBuffer; - bool instring = false; +struct LexerState *lexer_OpenFileView(char *buf, size_t size, uint32_t lineNo) +{ + dbgPrint("Opening view on buffer \"%.*s\"[...]\n", size < 16 ? (int)size : 16, buf); - while (*mem) { - if (*mem == '\"') - instring = !instring; + struct LexerState *state = malloc(sizeof(*state)); - if ((mem[0] == '\\') && (mem[1] == '\"' || mem[1] == '\\')) { - mem += 2; - } else if (instring) { - mem++; - } else { - /* Comments that start with ; anywhere in a line */ - if (*mem == ';') { - while (!((*mem == '\n') || (*mem == '\0'))) - *mem++ = ' '; - /* Comments that start with * at the start of a line */ - } else if ((mem[0] == '\n') && (mem[1] == '*')) { - warning(WARNING_OBSOLETE, - "'*' is deprecated for comments, please use ';' instead\n"); - mem++; - while (!((*mem == '\n') || (*mem == '\0'))) - *mem++ = ' '; - } else { - mem++; - } - } + if (!state) { + error("Failed to allocate memory for lexer state: %s\n", strerror(errno)); + return NULL; } + // TODO: init `path` - yy_buffer_append_newlines(pBuffer, capacity); - pBuffer->oAtLineStart = 1; - return pBuffer; + state->isFile = false; + state->isMmapped = true; /* It's not *really* mmap()ed, but it behaves the same */ + state->ptr = buf; + state->size = size; + state->offset = 0; + + initState(state); + state->lineNo = lineNo; /* Will be incremented at first line start */ + return state; } -uint32_t lex_FloatAlloc(const struct sLexFloat *token) +void lexer_RestartRept(uint32_t lineNo) { - tLexFloat[nFloating] = *token; - - return (1 << (nFloating++)); + dbgPrint("Restarting REPT\n"); + lexerState->offset = 0; + initState(lexerState); + lexerState->lineNo = lineNo; } -/* - * Make sure that only non-zero ASCII characters are used. Also, check if the - * start is greater than the end of the range. - */ -bool lex_CheckCharacterRange(uint16_t start, uint16_t end) +void lexer_DeleteState(struct LexerState *state) { - if (start > end || start < 1 || end > 127) { - error("Invalid character range (start: %" PRIu16 ", end: %" PRIu16 ")\n", - start, end); - return false; - } - return true; + if (!state->isMmapped) + close(state->fd); + else if (state->isFile && !state->isReferenced) + munmap(state->ptr, state->size); + free(state); } -void lex_FloatDeleteRange(uint32_t id, uint16_t start, uint16_t end) +struct KeywordDictNode { + /* + * The identifier charset is (currently) 44 characters big. By storing entries for the + * entire printable ASCII charset, minus lower-case due to case-insensitivity, + * we only waste (0x60 - 0x20) - 70 = 20 entries per node, which should be acceptable. + * In turn, this allows greatly simplifying checking an index into this array, + * which should help speed up the lexer. + */ + uint16_t children[0x60 - ' ']; + struct KeywordMapping const *keyword; +/* Since the keyword structure is invariant, the min number of nodes is known at compile time */ +} keywordDict[338] = {0}; /* Make sure to keep this correct when adding keywords! */ + +/* Convert a char into its index into the dict */ +static inline uint8_t dictIndex(char c) { - if (lex_CheckCharacterRange(start, end)) { - while (start <= end) { - tFloatingChars[start] &= ~id; - start++; - } - } + /* Translate uppercase to lowercase (roughly) */ + if (c > 0x60) + c = c - ('a' - 'A'); + return c - ' '; } -void lex_FloatAddRange(uint32_t id, uint16_t start, uint16_t end) -{ - if (lex_CheckCharacterRange(start, end)) { - while (start <= end) { - tFloatingChars[start] |= id; - start++; - } - } -} - -void lex_FloatDeleteFirstRange(uint32_t id, uint16_t start, uint16_t end) -{ - if (lex_CheckCharacterRange(start, end)) { - while (start <= end) { - tFloatingFirstChar[start] &= ~id; - start++; - } - } -} - -void lex_FloatAddFirstRange(uint32_t id, uint16_t start, uint16_t end) -{ - if (lex_CheckCharacterRange(start, end)) { - while (start <= end) { - tFloatingFirstChar[start] |= id; - start++; - } - } -} - -void lex_FloatDeleteSecondRange(uint32_t id, uint16_t start, uint16_t end) -{ - if (lex_CheckCharacterRange(start, end)) { - while (start <= end) { - tFloatingSecondChar[start] &= ~id; - start++; - } - } -} - -void lex_FloatAddSecondRange(uint32_t id, uint16_t start, uint16_t end) -{ - if (lex_CheckCharacterRange(start, end)) { - while (start <= end) { - tFloatingSecondChar[start] |= id; - start++; - } - } -} - -static struct sLexFloat *lexgetfloat(uint32_t nFloatMask) -{ - if (nFloatMask == 0) - fatalerror("Internal error in %s\n", __func__); - - int32_t i = 0; - - while ((nFloatMask & 1) == 0) { - nFloatMask >>= 1; - i++; - } - - return &tLexFloat[i]; -} - -static uint32_t lexcalchash(char *s) -{ - uint32_t hash = 0; - - while (*s) - hash = (hash * 283) ^ toupper(*s++); - - return hash % LEXHASHSIZE; -} - -void lex_Init(void) -{ - uint32_t i; - - for (i = 0; i < LEXHASHSIZE; i++) - tLexHash[i] = NULL; - - for (i = 0; i < 256; i++) { - tFloatingFirstChar[i] = 0; - tFloatingSecondChar[i] = 0; - tFloatingChars[i] = 0; - } - - nLexMaxLength = 0; - nFloating = 0; - - pCurrentStringExpansion = NULL; - nNbStringExpansions = 0; -} - -void lex_AddStrings(const struct sLexInitString *lex) -{ - while (lex->tzName) { - struct sLexString **ppHash; - uint32_t hash = lexcalchash(lex->tzName); - - ppHash = &tLexHash[hash]; - while (*ppHash) - ppHash = &((*ppHash)->next); - - *ppHash = malloc(sizeof(struct sLexString)); - if (*ppHash == NULL) - fatalerror("Out of memory!\n"); - - (*ppHash)->tzName = (char *)strdup(lex->tzName); - if ((*ppHash)->tzName == NULL) - fatalerror("Out of memory!\n"); - - (*ppHash)->nNameLength = strlen(lex->tzName); - (*ppHash)->nToken = lex->nToken; - (*ppHash)->next = NULL; - - upperstring((*ppHash)->tzName); - - if ((*ppHash)->nNameLength > nLexMaxLength) - nLexMaxLength = (*ppHash)->nNameLength; - - lex++; - } -} - -/* - * Gets the "float" mask and "float" length. - * "Float" refers to the token type of a token that is not a keyword. - * The character classes floatingFirstChar, floatingSecondChar, and - * floatingChars are defined separately for each token type. - * It uses bit masks to match against a set of simple regular expressions - * of the form /[floatingFirstChar]([floatingSecondChar][floatingChars]*)?/. - * The token types with the longest match from the current position in the - * buffer will have their bits set in the float mask. - */ -void yylex_GetFloatMaskAndFloatLen(uint32_t *pnFloatMask, uint32_t *pnFloatLen) +void lexer_Init(void) { /* - * Note that '\0' should always have a bit mask of 0 in the "floating" - * tables, so it doesn't need to be checked for separately. + * Build the dictionary of keywords. This could be done at compile time instead, however: + * - Doing so manually is a task nobody wants to undertake + * - It would be massively hard to read + * - Doing it within CC or CPP would be quite non-trivial + * - Doing it externally would require some extra work to use only POSIX tools + * - The startup overhead isn't much compared to the program's */ + uint16_t usedNodes = 1; - char *s = pLexBuffer; - uint32_t nOldFloatMask = 0; - uint32_t nFloatMask = tFloatingFirstChar[(uint8_t)*s]; + for (size_t i = 0; i < sizeof(keywords) / sizeof(*keywords); i++) { + uint16_t nodeID = 0; - if (nFloatMask != 0) { - s++; - nOldFloatMask = nFloatMask; - nFloatMask &= tFloatingSecondChar[(uint8_t)*s]; + /* Walk the dictionary, creating intermediate nodes for the keyword */ + for (char const *ptr = keywords[i].name; *ptr; ptr++) { + /* We should be able to assume all entries are well-formed */ + if (keywordDict[nodeID].children[*ptr - ' '] == 0) { + /* + * If this gets tripped up, set the size of keywordDict to + * something high, compile with `-DPRINT_NODE_COUNT` (see below), + * and set the size to that. + */ + assert(usedNodes < sizeof(keywordDict) / sizeof(*keywordDict)); - while (nFloatMask != 0) { - s++; - nOldFloatMask = nFloatMask; - nFloatMask &= tFloatingChars[(uint8_t)*s]; - } - } - - *pnFloatMask = nOldFloatMask; - *pnFloatLen = (uint32_t)(s - pLexBuffer); -} - -/* - * Gets the longest keyword/operator from the current position in the buffer. - */ -struct sLexString *yylex_GetLongestFixed(void) -{ - struct sLexString *pLongestFixed = NULL; - char *s = pLexBuffer; - uint32_t hash = 0; - uint32_t length = 0; - - while (length < nLexMaxLength && *s) { - hash = (hash * 283) ^ toupper(*s); - s++; - length++; - - struct sLexString *lex = tLexHash[hash % LEXHASHSIZE]; - - while (lex) { - if (lex->nNameLength == length - && strncasecmp(pLexBuffer, lex->tzName, length) == 0) { - pLongestFixed = lex; - break; + /* There is no node at that location, grab one from the pool */ + keywordDict[nodeID].children[*ptr - ' '] = usedNodes; + usedNodes++; } - lex = lex->next; + nodeID = keywordDict[nodeID].children[*ptr - ' ']; } + + /* This assumes that no two keywords have the same name */ + keywordDict[nodeID].keyword = &keywords[i]; } - return pLongestFixed; +#ifdef PRINT_NODE_COUNT /* For the maintainer to check how many nodes are needed */ + printf("Lexer keyword dictionary: %zu keywords in %u nodes (pool size %zu)\n", + sizeof(keywords) / sizeof(*keywords), usedNodes, + sizeof(keywordDict) / sizeof(*keywordDict)); +#endif } -size_t CopyMacroArg(char *dest, size_t maxLength, char c) +void lexer_SetMode(enum LexerMode mode) { - size_t i; - char const *s; + lexerState->mode = mode; +} - if (c == '@') - s = macro_GetUniqueIDStr(); - else if (c >= '1' && c <= '9') - s = macro_GetArg(c - '0'); +void lexer_ToggleStringExpansion(bool enable) +{ + lexerState->expandStrings = enable; +} + +/* Functions for the actual lexer to obtain characters */ + +static void reallocCaptureBuf(void) +{ + if (lexerState->captureCapacity == SIZE_MAX) + fatalerror("Cannot grow capture buffer past %zu bytes\n", SIZE_MAX); + else if (lexerState->captureCapacity > SIZE_MAX / 2) + lexerState->captureCapacity = SIZE_MAX; else - return 0; - - if (s == NULL) - fatalerror("Macro argument '\\%c' not defined\n", c); - - // TODO: `strncpy`, nay? - for (i = 0; s[i] != 0; i++) { - if (i >= maxLength) - fatalerror("Macro argument too long to fit buffer\n"); - - dest[i] = s[i]; - } - - return i; -} - -static inline void yylex_StringWriteChar(char *s, size_t index, char c) -{ - if (index >= MAXSTRLEN) - fatalerror("String too long\n"); - - s[index] = c; -} - -static inline void yylex_SymbolWriteChar(char *s, size_t index, char c) -{ - if (index >= MAXSYMLEN) - fatalerror("Symbol too long\n"); - - s[index] = c; + lexerState->captureCapacity *= 2; + lexerState->captureBuf = realloc(lexerState->captureBuf, lexerState->captureCapacity); + if (!lexerState->captureBuf) + fatalerror("realloc error while resizing capture buffer: %s\n", strerror(errno)); } /* - * Trims white space at the end of a string. - * The index parameter is the index of the 0 at the end of the string. + * The multiple evaluations of `retvar` causing side effects is INTENTIONAL, and + * required for example by `lexer_dumpStringExpansions`. It is however only + * evaluated once per level, and only then. + * + * This uses the concept of "X macros": you must #define LOOKUP_PRE_NEST and + * LOOKUP_POST_NEST before invoking this (and #undef them right after), and + * those macros will be expanded at the corresponding points in the loop. + * This is necessary because there are at least 3 places which need to iterate + * through iterations while performing custom actions */ -void yylex_TrimEnd(char *s, size_t index) -{ - int32_t i = (int32_t)index - 1; +#define lookupExpansion(retvar, dist) do { \ + struct Expansion *exp = lexerState->expansions; \ + \ + for (;;) { \ + /* Find the closest expansion whose end is after the target */ \ + while (exp && exp->totalLen + exp->distance <= (dist)) { \ + (dist) -= exp->totalLen + exp->skip; \ + exp = exp->next; \ + } \ + \ + /* If there is none, or it begins after the target, return the previous level */ \ + if (!exp || exp->distance > (dist)) \ + break; \ + \ + /* We know we are inside of that expansion */ \ + (dist) -= exp->distance; /* Distances are relative to their parent */ \ + \ + /* Otherwise, register this expansion and repeat the process */ \ + LOOKUP_PRE_NEST(exp); \ + (retvar) = exp; \ + if (!exp->firstChild) /* If there are no children, this is it */ \ + break; \ + exp = exp->firstChild; \ + \ + LOOKUP_POST_NEST(exp); \ + } \ +} while (0) - while ((i >= 0) && (s[i] == ' ' || s[i] == '\t')) { - s[i] = 0; +static struct Expansion *getExpansionAtDistance(size_t *distance) +{ + struct Expansion *expansion = NULL; /* Top level has no "previous" level */ + +#define LOOKUP_PRE_NEST(exp) +#define LOOKUP_POST_NEST(exp) + struct Expansion *exp = lexerState->expansions; + + for (;;) { + /* Find the closest expansion whose end is after the target */ + while (exp && exp->totalLen + exp->distance <= *distance) { + *distance -= exp->totalLen - exp->skip; + exp = exp->next; + } + + /* If there is none, or it begins after the target, return the previous level */ + if (!exp || exp->distance > *distance) + break; + + /* We know we are inside of that expansion */ + *distance -= exp->distance; /* Distances are relative to their parent */ + + /* Otherwise, register this expansion and repeat the process */ + LOOKUP_PRE_NEST(exp); + expansion = exp; + if (!exp->firstChild) /* If there are no children, this is it */ + break; + exp = exp->firstChild; + + LOOKUP_POST_NEST(exp); + } +#undef LOOKUP_PRE_NEST +#undef LOOKUP_POST_NEST + + return expansion; +} + +static void beginExpansion(size_t distance, uint8_t skip, + char const *str, size_t size, char const *name) +{ + distance += lexerState->expansionOfs; /* Distance argument is relative to read offset! */ + /* Increase the total length of all parents, and return the topmost one */ + struct Expansion *parent = NULL; + unsigned int depth = 0; + +#define LOOKUP_PRE_NEST(exp) (exp)->totalLen += size - skip +#define LOOKUP_POST_NEST(exp) do { \ + if (name && ++depth >= nMaxRecursionDepth) \ + fatalerror("Recursion limit (%u) exceeded\n", nMaxRecursionDepth); \ +} while (0) + lookupExpansion(parent, distance); +#undef LOOKUP_PRE_NEST +#undef LOOKUP_POST_NEST + struct Expansion **insertPoint = parent ? &parent->firstChild : &lexerState->expansions; + + /* We know we are in none of the children expansions: add ourselves, keeping it sorted */ + while (*insertPoint && (*insertPoint)->distance < distance) + insertPoint = &(*insertPoint)->next; + + *insertPoint = malloc(sizeof(**insertPoint)); + if (!*insertPoint) + fatalerror("Unable to allocate new expansion: %s\n", strerror(errno)); + (*insertPoint)->firstChild = NULL; + (*insertPoint)->next = NULL; /* Expansions are always performed left to right */ + (*insertPoint)->name = name ? strdup(name) : NULL; + (*insertPoint)->contents = str; + (*insertPoint)->len = size; + (*insertPoint)->totalLen = size; + (*insertPoint)->distance = distance; + (*insertPoint)->skip = skip; + + /* If expansion is the new closest one, update offset */ + if (insertPoint == &lexerState->expansions) + lexerState->expansionOfs = 0; +} + +static void freeExpansion(struct Expansion *expansion) +{ + struct Expansion *child = expansion->firstChild; + + while (child) { + struct Expansion *next = child->next; + + freeExpansion(child); + child = next; + } + free(expansion->name); + free(expansion); +} + +static char const *expandMacroArg(char name, size_t distance) +{ + char const *str; + + if (name == '@') + str = macro_GetUniqueIDStr(); + else if (name == '0') + fatalerror("Invalid macro argument '\\0'\n"); + else + str = macro_GetArg(name - '0'); + if (!str) + fatalerror("Macro argument '\\%c' not defined\n", name); + + beginExpansion(distance, 2, str, strlen(str), NULL); + return str; +} + +/* If at any point we need more than 255 characters of lookahead, something went VERY wrong. */ +static int peekInternal(uint8_t distance) +{ + if (distance >= LEXER_BUF_SIZE) + fatalerror("Internal lexer error: buffer has insufficient size for peeking (%" + PRIu8 " >= %u)\n", distance, LEXER_BUF_SIZE); + + size_t ofs = lexerState->expansionOfs + distance; + struct Expansion const *expansion = getExpansionAtDistance(&ofs); + + if (expansion) { + assert(ofs < expansion->len); + return expansion->contents[ofs]; + } + + distance = ofs; + + if (lexerState->isMmapped) { + if (lexerState->offset + distance >= lexerState->size) + return EOF; + + return (unsigned char)lexerState->ptr[lexerState->offset + distance]; + } + + if (lexerState->nbChars <= distance) { + /* Buffer isn't full enough, read some chars in */ + size_t target = LEXER_BUF_SIZE - lexerState->nbChars; /* Aim: making the buf full */ + + /* Compute the index we'll start writing to */ + size_t writeIndex = (lexerState->index + lexerState->nbChars) % LEXER_BUF_SIZE; + ssize_t nbCharsRead = 0, totalCharsRead = 0; + +#define readChars(size) do { \ + /* This buffer overflow made me lose WEEKS of my life. Never again. */ \ + assert(writeIndex + (size) <= LEXER_BUF_SIZE); \ + nbCharsRead = read(lexerState->fd, &lexerState->buf[writeIndex], (size)); \ + if (nbCharsRead == -1) \ + fatalerror("Error while reading \"%s\": %s\n", lexerState->path, errno); \ + totalCharsRead += nbCharsRead; \ + writeIndex += nbCharsRead; \ + if (writeIndex == LEXER_BUF_SIZE) \ + writeIndex = 0; \ + target -= nbCharsRead; \ +} while (0) + + /* If the range to fill passes over the buffer wrapping point, we need two reads */ + if (writeIndex + target > LEXER_BUF_SIZE) { + size_t nbExpectedChars = LEXER_BUF_SIZE - writeIndex; + + readChars(nbExpectedChars); + /* If the read was incomplete, don't perform a second read */ + if (nbCharsRead < nbExpectedChars) + target = 0; + } + if (target != 0) + readChars(target); + +#undef readChars + + lexerState->nbChars += totalCharsRead; + + /* If there aren't enough chars even after refilling, give up */ + if (lexerState->nbChars <= distance) + return EOF; + } + return (unsigned char)lexerState->buf[(lexerState->index + distance) % LEXER_BUF_SIZE]; +} + +static int peek(uint8_t distance) +{ + int c = peekInternal(distance); + + if (distance >= lexerState->macroArgScanDistance) { + lexerState->macroArgScanDistance = distance + 1; /* Do not consider again */ + /* If enabled and character is a backslash, check for a macro arg */ + if (!lexerState->disableMacroArgs && c == '\\') { + distance++; + lexerState->macroArgScanDistance++; + c = peekInternal(distance); + if (c == '@' || (c >= '0' && c <= '9')) { + /* Expand the argument and return its first character */ + char const *str = expandMacroArg(c, distance - 1); + + /* + * Assuming macro args can't be recursive (I'll be damned if a way + * is found...), then we mark the entire macro arg as scanned; + * however, the two macro arg characters (\1) will be ignored, + * so they shouldn't be counted in the scan distance! + */ + lexerState->macroArgScanDistance += strlen(str) - 2; + /* WARNING: this assumes macro args can't be empty!! */ + c = str[0]; + } else { + c = '\\'; + } + } + } + return c; +} + +static void shiftChars(uint8_t distance) +{ + if (lexerState->capturing) { + if (lexerState->captureBuf) { + if (lexerState->captureSize + distance >= lexerState->captureCapacity) + reallocCaptureBuf(); + /* TODO: improve this? */ + for (uint8_t i = 0; i < distance; i++) + lexerState->captureBuf[lexerState->captureSize++] = peek(i); + } else { + lexerState->captureSize += distance; + } + } + + lexerState->macroArgScanDistance -= distance; + + /* FIXME: this may not be too great, as only the top level is considered... */ + + /* + * The logic is as follows: + * - Any characters up to the expansion need to be consumed in the file + * - If some remain after that, advance the offset within the expansion + * - If that goes *past* the expansion, then leftovers shall be consumed in the file + * - If we went past the expansion, we're back to square one, and should re-do all + */ +nextExpansion: + if (lexerState->expansions) { + /* If the read cursor reaches into the expansion, update offset */ + if (distance > lexerState->expansions->distance) { + /* distance = + */ + lexerState->expansionOfs += distance - lexerState->expansions->distance; + distance = lexerState->expansions->distance; /* Nb chars to read in file */ + /* Now, check if the expansion finished being read */ + if (lexerState->expansionOfs >= lexerState->expansions->totalLen) { + /* Add the leftovers to the distance */ + distance += lexerState->expansionOfs; + distance -= lexerState->expansions->totalLen; + /* Also add in the post-expansion skip */ + distance += lexerState->expansions->skip; + /* Move on to the next expansion */ + struct Expansion *next = lexerState->expansions->next; + + freeExpansion(lexerState->expansions); + lexerState->expansions = next; + /* Reset the offset for the next expansion */ + lexerState->expansionOfs = 0; + /* And repeat, in case we also go into or over the next expansion */ + goto nextExpansion; + } + } + /* Getting closer to the expansion */ + lexerState->expansions->distance -= distance; + /* Now, `distance` is how many bytes to move forward **in the file** */ + } + + if (lexerState->isMmapped) { + lexerState->offset += distance; + } else { + lexerState->index += distance; + lexerState->colNo += distance; + /* Wrap around if necessary */ + if (lexerState->index >= LEXER_BUF_SIZE) + lexerState->index %= LEXER_BUF_SIZE; + assert(lexerState->nbChars >= distance); + lexerState->nbChars -= distance; + } +} + +static int nextChar(void) +{ + int c = peek(0); + + /* If not at EOF, advance read position */ + if (c != EOF) + shiftChars(1); + return c; +} + +/* "Services" provided by the lexer to the rest of the program */ + +char const *lexer_GetFileName(void) +{ + return lexerState ? lexerState->path : NULL; +} + +uint32_t lexer_GetLineNo(void) +{ + return lexerState->lineNo; +} + +uint32_t lexer_GetColNo(void) +{ + return lexerState->colNo; +} + +void lexer_DumpStringExpansions(void) +{ + if (!lexerState) + return; + struct Expansion *stack[nMaxRecursionDepth + 1]; + struct Expansion *expansion; /* Temp var for `lookupExpansion` */ + unsigned int depth = 0; + size_t distance = lexerState->expansionOfs; + +#define LOOKUP_PRE_NEST(exp) do { \ + /* Only register EQUS expansions, not string args */ \ + if ((exp)->name) \ + stack[depth++] = (exp); \ +} while (0) +#define LOOKUP_POST_NEST(exp) + lookupExpansion(expansion, distance); + (void)expansion; +#undef LOOKUP_PRE_NEST +#undef LOOKUP_POST_NEST + + while (depth--) + fprintf(stderr, "while expanding symbol \"%s\"\n", stack[depth]->name); +} + +/* Function to discard all of a line's comments */ + +static void discardComment(void) +{ + dbgPrint("Discarding comment\n"); + lexerState->disableMacroArgs = true; + for (;;) { + int c = peek(0); + + if (c == EOF || c == '\r' || c == '\n') + break; + shiftChars(1); + } + lexerState->disableMacroArgs = false; +} + +/* Function to read a line continuation */ + +static void readLineContinuation(void) +{ + dbgPrint("Beginning line continuation\n"); + for (;;) { + int c = peek(0); + + if (isWhitespace(c)) { + shiftChars(1); + } else if (c == '\r' || c == '\n') { + shiftChars(1); + if (c == '\r' && peek(0) == '\n') + shiftChars(1); + if (!lexerState->expansions + || lexerState->expansions->distance) + lexerState->lineNo++; + return; + } else if (c == ';') { + discardComment(); + } else { + error("Begun line continuation, but encountered character '%s'\n", + print(c)); + return; + } + } +} + +/* Functions to lex numbers of various radixes */ + +static void readNumber(int radix, int32_t baseValue) +{ + uint32_t value = baseValue; + + for (;;) { + int c = peek(0); + + if (c < '0' || c > '0' + radix - 1) + break; + if (value > (UINT32_MAX - (c - '0')) / radix) + warning(WARNING_LARGE_CONSTANT, "Integer constant is too large\n"); + value = value * radix + (c - '0'); + + shiftChars(1); + } + + yylval.nConstValue = value; +} + +static void readFractionalPart(void) +{ + uint32_t value = 0, divisor = 1; + + dbgPrint("Reading fractional part\n"); + for (;;) { + int c = peek(0); + + if (c < '0' || c > '9') + break; + shiftChars(1); + if (divisor > (UINT32_MAX - (c - '0')) / 10) { + warning(WARNING_LARGE_CONSTANT, + "Precision of fixed-point constant is too large\n"); + /* Discard any additional digits */ + while (c = peek(0), c >= '0' && c <= '9') + shiftChars(1); + break; + } + value = value * 10 + (c - '0'); + divisor *= 10; + } + + if (yylval.nConstValue > INT16_MAX || yylval.nConstValue < INT16_MIN) + warning(WARNING_LARGE_CONSTANT, "Magnitude of fixed-point constant is too large\n"); + + /* Cast to unsigned avoids UB if shifting discards bits */ + yylval.nConstValue = (uint32_t)yylval.nConstValue << 16; + /* Cast to unsigned avoids undefined overflow behavior */ + uint16_t fractional = value * 65536 / divisor; + + yylval.nConstValue |= fractional * (yylval.nConstValue >= 0 ? 1 : -1); +} + +char const *binDigits; + +static void readBinaryNumber(void) +{ + uint32_t value = 0; + + dbgPrint("Reading binary number with digits [%c,%c]\n", binDigits[0], binDigits[1]); + for (;;) { + int c = peek(0); + int bit; + + if (c == binDigits[0]) + bit = 0; + else if (c == binDigits[1]) + bit = 1; + else + break; + if (value > (UINT32_MAX - bit) / 2) + warning(WARNING_LARGE_CONSTANT, "Integer constant is too large\n"); + value = value * 2 + bit; + + shiftChars(1); + } + + yylval.nConstValue = value; +} + +static void readHexNumber(void) +{ + uint32_t value = 0; + bool empty = true; + + dbgPrint("Reading hex number\n"); + for (;;) { + int c = peek(0); + + if (c >= 'a' && c <= 'f') /* Convert letters to right after digits */ + c = c - 'a' + 10; + else if (c >= 'A' && c <= 'F') + c = c - 'A' + 10; + else if (c >= '0' && c <= '9') + c = c - '0'; + else + break; + + if (value > (UINT32_MAX - c) / 16) + warning(WARNING_LARGE_CONSTANT, "Integer constant is too large\n"); + value = value * 16 + c; + + shiftChars(1); + empty = false; + } + + if (empty) + error("Invalid integer constant, no digits after '$'\n"); + + yylval.nConstValue = value; +} + +char const *gfxDigits; + +static void readGfxConstant(void) +{ + uint32_t bp0 = 0, bp1 = 0; + uint8_t width = 0; + + dbgPrint("Reading gfx constant with digits [%c,%c,%c,%c]\n", + gfxDigits[0], gfxDigits[1], gfxDigits[2], gfxDigits[3]); + for (;;) { + int c = peek(0); + uint32_t pixel; + + if (c == gfxDigits[0]) + pixel = 0; + else if (c == gfxDigits[1]) + pixel = 1; + else if (c == gfxDigits[2]) + pixel = 2; + else if (c == gfxDigits[3]) + pixel = 3; + else + break; + + if (width < 8) { + bp0 = bp0 << 1 | (pixel & 1); + bp1 = bp1 << 1 | (pixel >> 1); + } + if (width < 9) + width++; + shiftChars(1); + } + + if (width == 0) + error("Invalid graphics constant, no digits after '`'\n"); + else if (width == 9) + warning(WARNING_LARGE_CONSTANT, + "Graphics constant is too long, only 8 first pixels considered\n"); + + yylval.nConstValue = bp1 << 8 | bp0; +} + +/* Function to read identifiers & keywords */ + +static bool startsIdentifier(int c) +{ + return (c <= 'Z' && c >= 'A') || (c <= 'z' && c >= 'a') || c == '.' || c == '_'; +} + +static int readIdentifier(char firstChar) +{ + dbgPrint("Reading identifier or keyword\n"); + /* Lex while checking for a keyword */ + yylval.tzSym[0] = firstChar; + uint16_t nodeID = keywordDict[0].children[dictIndex(firstChar)]; + int tokenType = firstChar == '.' ? T_LOCAL_ID : T_ID; + size_t i; + + for (i = 1; ; i++) { + int c = peek(0); + + /* If that char isn't in the symbol charset, end */ + if ((c > '9' || c < '0') + && (c > 'Z' || c < 'A') + && (c > 'z' || c < 'a') + && c != '#' && c != '.' && c != '@' && c != '_') + break; + shiftChars(1); + + /* Write the char to the identifier's name */ + if (i < sizeof(yylval.tzSym) - 1) + yylval.tzSym[i] = c; + + /* If the char was a dot, mark the identifier as local */ + if (c == '.') + tokenType = T_LOCAL_ID; + + /* Attempt to traverse the tree to check for a keyword */ + if (nodeID) /* Do nothing if matching already failed */ + nodeID = keywordDict[nodeID].children[dictIndex(c)]; + } + + if (i > sizeof(yylval.tzSym) - 1) { + warning(WARNING_LONG_STR, "Symbol name too long, got truncated\n"); + i = sizeof(yylval.tzSym) - 1; + } + yylval.tzSym[i] = '\0'; /* Terminate the string */ + dbgPrint("Ident/keyword = \"%s\"\n", yylval.tzSym); + + if (keywordDict[nodeID].keyword) + return keywordDict[nodeID].keyword->token; + + return tokenType; +} + +/* Functions to read strings */ + +enum PrintType { + TYPE_NONE, + TYPE_DECIMAL, /* d */ + TYPE_UPPERHEX, /* X */ + TYPE_LOWERHEX, /* x */ + TYPE_BINARY, /* b */ +}; + +static void intToString(char *dest, size_t bufSize, struct Symbol const *sym, enum PrintType type) +{ + uint32_t value = sym_GetConstantSymValue(sym); + int fullLength; + + /* Special cheat for binary */ + if (type == TYPE_BINARY) { + char binary[33]; /* 32 bits + 1 terminator */ + char *write_ptr = binary + 32; + + fullLength = 0; + binary[32] = 0; + do { + *(--write_ptr) = (value & 1) + '0'; + value >>= 1; + fullLength++; + } while (value); + strncpy(dest, write_ptr, bufSize - 1); + } else { + static char const * const formats[] = { + [TYPE_NONE] = "$%" PRIX32, + [TYPE_DECIMAL] = "%" PRId32, + [TYPE_UPPERHEX] = "%" PRIX32, + [TYPE_LOWERHEX] = "%" PRIx32 + }; + + fullLength = snprintf(dest, bufSize, formats[type], value); + if (fullLength < 0) { + error("snprintf encoding error: %s\n", strerror(errno)); + dest[0] = '\0'; + } + } + + if ((size_t)fullLength >= bufSize) + warning(WARNING_LONG_STR, "Interpolated symbol %s too long to fit buffer\n", + sym->name); +} + +static char const *readInterpolation(void) +{ + char symName[MAXSYMLEN + 1]; + size_t i = 0; + enum PrintType type = TYPE_NONE; + + for (;;) { + int c = peek(0); + + if (c == '{') { /* Nested interpolation */ + shiftChars(1); + char const *inner = readInterpolation(); + + if (inner) { + while (*inner) { + if (i == sizeof(symName)) + break; + symName[i++] = *inner++; + } + } + } else if (c == EOF || c == '\r' || c == '\n' || c == '"') { + error("Missing }\n"); + break; + } else if (c == '}') { + shiftChars(1); + break; + } else if (c == ':' && type == TYPE_NONE) { /* Print type, only once */ + if (i != 1) { + error("Print types are exactly 1 character long\n"); + } else { + switch (symName[0]) { + case 'b': + type = TYPE_BINARY; + break; + case 'd': + type = TYPE_DECIMAL; + break; + case 'X': + type = TYPE_UPPERHEX; + break; + case 'x': + type = TYPE_LOWERHEX; + break; + default: + error("Invalid print type '%s'\n", print(symName[0])); + } + } + i = 0; /* Now that type has been set, restart at beginning of string */ + shiftChars(1); + } else { + if (i < sizeof(symName)) /* Allow writing an extra char to flag overflow */ + symName[i++] = c; + shiftChars(1); + } + } + + if (i == sizeof(symName)) { + warning(WARNING_LONG_STR, "Symbol name too long\n"); i--; } -} + symName[i] = '\0'; -size_t yylex_ReadBracketedSymbol(char *dest, size_t index) -{ - char sym[MAXSYMLEN + 1]; - char ch; - size_t i = 0; - size_t length, maxLength; - const char *mode = NULL; + struct Symbol const *sym = sym_FindSymbol(symName); - for (ch = *pLexBuffer; - ch != '}' && ch != '"' && ch != '\n'; - ch = *(++pLexBuffer)) { - if (ch == '\\') { - ch = *(++pLexBuffer); - maxLength = MAXSYMLEN - i; - length = CopyMacroArg(&sym[i], maxLength, ch); + if (!sym) { + error("Interpolated symbol \"%s\" does not exist\n", symName); + } else if (sym->type == SYM_EQUS) { + if (type != TYPE_NONE) + error("Print types are only allowed for numbers\n"); + return sym_GetStringValue(sym); + } else if (sym_IsNumeric(sym)) { + static char buf[33]; /* Worst case of 32 digits + terminator */ - if (length != 0) - i += length; - else - fatalerror("Illegal character escape '%c'\n", ch); - } else if (ch == '{') { - /* Handle nested symbols */ - ++pLexBuffer; - i += yylex_ReadBracketedSymbol(sym, i); - --pLexBuffer; - } else if (ch == ':' && !mode) { /* Only grab 1st colon */ - /* Use a whitelist of modes, which does prevent the - * use of some features such as precision, - * but also avoids a security flaw - */ - const char *acceptedModes = "bxXd"; - /* Binary isn't natively supported, - * so it's handled differently - */ - static const char * const formatSpecifiers[] = { - "", "%" PRIx32, "%" PRIX32, "%" PRId32 - }; - /* Prevent reading out of bounds! */ - const char *designatedMode; - - if (i != 1) - fatalerror("Print types are exactly 1 character long\n"); - - designatedMode = strchr(acceptedModes, sym[i - 1]); - if (!designatedMode) - fatalerror("Illegal print type '%c'\n", sym[i - 1]); - mode = formatSpecifiers[designatedMode - acceptedModes]; - /* Begin writing the symbol again */ - i = 0; - } else { - yylex_SymbolWriteChar(sym, i++, ch); - } + intToString(buf, sizeof(buf), sym, type); + return buf; + } else { + error("Only numerical and string symbols can be interpolated\n"); } - - /* Properly terminate the string */ - yylex_SymbolWriteChar(sym, i, 0); - - /* It's assumed we're writing to a T_STRING */ - maxLength = MAXSTRLEN - index; - length = symvaluetostring(&dest[index], maxLength, sym, mode); - - if (*pLexBuffer == '}') - pLexBuffer++; - else - fatalerror("Missing }\n"); - - return length; + return NULL; } -static void yylex_ReadQuotedString(void) +static void readString(void) { - size_t index = 0; - size_t length, maxLength; + size_t i = 0; - while (*pLexBuffer != '"' && *pLexBuffer != '\n') { - char ch = *pLexBuffer++; + dbgPrint("Reading string\n"); + for (;;) { + int c = peek(0); - if (ch == '\\') { - ch = *pLexBuffer++; + switch (c) { + case '"': + shiftChars(1); + if (i == sizeof(yylval.tzString)) { + i--; + warning(WARNING_LONG_STR, "String constant too long\n"); + } + yylval.tzString[i] = '\0'; + dbgPrint("Read string \"%s\"\n", yylval.tzString); + return; + case '\r': + case '\n': /* Do not shift these! */ + case EOF: + if (i == sizeof(yylval.tzString)) { + i--; + warning(WARNING_LONG_STR, "String constant too long\n"); + } + yylval.tzString[i] = '\0'; + error("Unterminated string\n"); + dbgPrint("Read string \"%s\"\n", yylval.tzString); + return; - switch (ch) { + case '\\': /* Character escape */ + c = peek(1); + switch (c) { + case '\\': /* Return that character unchanged */ + case '"': + case '{': + case '}': + shiftChars(1); + break; case 'n': - ch = '\n'; + c = '\n'; + shiftChars(1); break; case 'r': - ch = '\r'; + c = '\r'; + shiftChars(1); break; case 't': - ch = '\t'; + c = '\t'; + shiftChars(1); break; - case '\\': - ch = '\\'; - break; - case '"': - ch = '"'; - break; - case ',': - ch = ','; - break; - case '{': - ch = '{'; - break; - case '}': - ch = '}'; + + case ' ': + case '\r': + case '\n': + shiftChars(1); /* Shift the backslash */ + readLineContinuation(); + continue; + + case EOF: /* Can't really print that one */ + error("Illegal character escape at end of input\n"); + c = '\\'; break; default: - maxLength = MAXSTRLEN - index; - length = CopyMacroArg(&yylval.tzString[index], - maxLength, ch); - - if (length != 0) - index += length; - else - fatalerror("Illegal character escape '%c'\n", ch); - - ch = 0; + error("Illegal character escape '%s'\n", print(c)); + c = '\\'; break; } - } else if (ch == '{') { - // Get bracketed symbol within string. - index += yylex_ReadBracketedSymbol(yylval.tzString, - index); - ch = 0; - } + break; - if (ch) - yylex_StringWriteChar(yylval.tzString, index++, ch); - } + case '{': /* Symbol interpolation */ + shiftChars(1); + char const *ptr = readInterpolation(); - yylex_StringWriteChar(yylval.tzString, index, 0); - - if (*pLexBuffer == '"') - pLexBuffer++; - else - fatalerror("Unterminated string\n"); -} - -static uint32_t yylex_NORMAL(void) -{ - struct sLexString *pLongestFixed = NULL; - uint32_t nFloatMask, nFloatLen; - uint32_t linestart = AtLineStart; - - AtLineStart = 0; - -scanagain: - while (*pLexBuffer == ' ' || *pLexBuffer == '\t') { - linestart = 0; - pLexBuffer++; - } - - if (*pLexBuffer == 0) { - // Reached the end of a file, macro, or rept. - if (yywrap() == 0) { - linestart = AtLineStart; - AtLineStart = 0; - goto scanagain; - } - } - - /* Check for line continuation character */ - if (*pLexBuffer == '\\') { - /* - * Look for line continuation character after a series of - * spaces. This is also useful for files that use Windows line - * endings: "\r\n" is replaced by " \n" before the lexer has the - * opportunity to see it. - */ - if (pLexBuffer[1] == ' ' || pLexBuffer[1] == '\t') { - pLexBuffer += 2; - while (1) { - if (*pLexBuffer == ' ' || *pLexBuffer == '\t') { - pLexBuffer++; - } else if (*pLexBuffer == '\n') { - pLexBuffer++; - nLineNo++; - goto scanagain; - } else { - error("Expected a new line after the continuation character.\n"); - pLexBuffer++; + if (ptr) { + while (*ptr) { + if (i == sizeof(yylval.tzString)) + break; + yylval.tzString[i++] = *ptr++; } } - } + continue; /* Do not copy an additional character */ - /* Line continuation character */ - if (pLexBuffer[1] == '\n') { - pLexBuffer += 2; - nLineNo++; - goto scanagain; + /* Regular characters will just get copied */ } - - /* - * If there isn't a newline character or a space, ignore the - * character '\'. It will eventually be handled by other - * functions like PutMacroArg(). - */ + if (i < sizeof(yylval.tzString)) /* Copy one extra to flag overflow */ + yylval.tzString[i++] = c; + shiftChars(1); } - - /* - * Try to match an identifier, macro argument (e.g. \1), - * or numeric literal. - */ - yylex_GetFloatMaskAndFloatLen(&nFloatMask, &nFloatLen); - - /* Try to match a keyword or operator. */ - pLongestFixed = yylex_GetLongestFixed(); - - if (nFloatLen == 0 && pLongestFixed == NULL) { - /* - * No keyword, identifier, operator, or numerical literal - * matches. - */ - - if (*pLexBuffer == '"') { - pLexBuffer++; - yylex_ReadQuotedString(); - return T_STRING; - } else if (*pLexBuffer == '{') { - pLexBuffer++; - size_t len = yylex_ReadBracketedSymbol(yylval.tzString, - 0); - yylval.tzString[len] = 0; - return T_STRING; - } - - /* - * It's not a keyword, operator, identifier, macro argument, - * numeric literal, string, or bracketed symbol, so just return - * the ASCII character. - */ - unsigned char ch = *pLexBuffer++; - - if (ch == '\n') - AtLineStart = 1; - - /* - * Check for invalid unprintable characters. - * They may not be readily apparent in a text editor, - * so this is useful for identifying encoding problems. - */ - if (ch != 0 - && ch != '\n' - && !(ch >= 0x20 && ch <= 0x7E)) - fatalerror("Found garbage character: 0x%02X\n", ch); - - return ch; - } - - if (pLongestFixed == NULL || nFloatLen > pLongestFixed->nNameLength) { - /* - * Longest match was an identifier, macro argument, or numeric - * literal. - */ - struct sLexFloat *token = lexgetfloat(nFloatMask); - - if (token->Callback) { - int32_t done = token->Callback(pLexBuffer, nFloatLen); - - if (!done) - goto scanagain; - } - - uint32_t type = token->nToken; - - if (type == T_ID && strchr(yylval.tzSym, '.')) - type = T_LOCAL_ID; - - if (linestart && type == T_ID) - return T_LABEL; - return type; - } - - /* Longest match was a keyword or operator. */ - pLexBuffer += pLongestFixed->nNameLength; - yylval.nConstValue = pLongestFixed->nToken; - return pLongestFixed->nToken; } -static uint32_t yylex_MACROARGS(void) +/* Function to report one character's worth of garbage bytes */ + +static char const *reportGarbageChar(unsigned char firstByte) { - size_t index = 0; - size_t length, maxLength; + static char bytes[6 + 2 + 1]; /* Max size of a UTF-8 encoded code point, plus "''\0" */ + /* First, attempt UTF-8 decoding */ + uint32_t state = 0; /* UTF8_ACCEPT */ + uint32_t codepoint; + uint8_t size = 0; /* Number of additional bytes to shift */ - while ((*pLexBuffer == ' ') || (*pLexBuffer == '\t')) - pLexBuffer++; + bytes[1] = firstByte; /* No need to init the rest of the array */ + decode(&state, &codepoint, firstByte); + while (state != 0 && state != 1 /* UTF8_REJECT */) { + int c = peek(size++); - while ((*pLexBuffer != ',') && (*pLexBuffer != '\n')) { - char ch = *pLexBuffer++; + if (c == EOF) + break; + bytes[size + 1] = c; + decode(&state, &codepoint, c); + } - if (ch == '\\') { - ch = *pLexBuffer++; + if (state == 0 && (codepoint > UCHAR_MAX || isprint((unsigned char)codepoint))) { + /* Character is valid, printable UTF-8! */ + shiftChars(size); + bytes[0] = '\''; + bytes[size + 2] = '\''; + bytes[size + 3] = '\0'; + return bytes; + } - switch (ch) { - case 'n': - ch = '\n'; - break; - case 't': - ch = '\t'; - break; - case '\\': - ch = '\\'; - break; - case '"': - ch = '\"'; - break; - case ',': - ch = ','; - break; - case '{': - ch = '{'; - break; - case '}': - ch = '}'; - break; - case ' ': - case '\t': - /* - * Look for line continuation character after a - * series of spaces. This is also useful for - * files that use Windows line endings: "\r\n" - * is replaced by " \n" before the lexer has the - * opportunity to see it. - */ - while (1) { - if (*pLexBuffer == ' ' - || *pLexBuffer == '\t') { - pLexBuffer++; - } else if (*pLexBuffer == '\n') { - pLexBuffer++; - nLineNo++; - ch = 0; - break; - } else { - error("Expected a new line after the continuation character.\n"); + /* The character isn't valid UTF-8, so we'll only print that first byte */ + if (isprint(firstByte)) { + /* bytes[1] = firstByte; */ + bytes[0] = '\''; + bytes[2] = '\''; + bytes[3] = '\0'; + return bytes; + } + /* Well then, print its hex value */ + static char const hexChars[16] = "0123456789ABCDEF"; + + bytes[0] = '0'; + bytes[1] = 'x'; + bytes[2] = hexChars[firstByte >> 4]; + bytes[3] = hexChars[firstByte & 0x0f]; + bytes[4] = '\0'; + return bytes; +} + +/* Lexer core */ + +static int yylex_NORMAL(void) +{ + dbgPrint("Lexing in normal mode, line=%" PRIu32 ", col=%" PRIu32 "\n", + lexer_GetLineNo(), lexer_GetColNo()); + for (;;) { + int c = nextChar(); + + switch (c) { + /* Ignore whitespace and comments */ + + case '*': + if (!lexerState->atLineStart) + return T_OP_MUL; + warning(WARNING_OBSOLETE, + "'*' is deprecated for comments, please use ';' instead\n"); + /* fallthrough */ + case ';': + discardComment(); + /* fallthrough */ + case ' ': + case '\t': + break; + + /* Handle unambiguous single-char tokens */ + + case '^': + return T_OP_XOR; + case '+': + return T_OP_ADD; + case '-': + return T_OP_SUB; + case '/': + return T_OP_DIV; + case '~': + return T_OP_NOT; + + case '@': + yylval.tzSym[0] = '@'; + yylval.tzSym[1] = '\0'; + return T_ID; + + /* Handle accepted single chars */ + + case '[': + case ']': + case '(': + case ')': + case ',': + case ':': + return c; + + /* Handle ambiguous 1- or 2-char tokens */ + char secondChar; + + case '|': /* Either binary or logical OR */ + secondChar = peek(0); + if (secondChar == '|') { + shiftChars(1); + return T_OP_LOGICOR; + } + return T_OP_OR; + + case '=': /* Either SET alias, or EQ */ + secondChar = peek(0); + if (secondChar == '=') { + shiftChars(1); + return T_OP_LOGICEQU; + } + return T_POP_EQUAL; + + case '<': /* Either a LT, LTE, or left shift */ + secondChar = peek(0); + if (secondChar == '=') { + shiftChars(1); + return T_OP_LOGICLE; + } else if (secondChar == '<') { + shiftChars(1); + return T_OP_SHL; + } + return T_OP_LOGICLT; + + case '>': /* Either a GT, GTE, or right shift */ + secondChar = peek(0); + if (secondChar == '=') { + shiftChars(1); + return T_OP_LOGICGE; + } else if (secondChar == '>') { + shiftChars(1); + return T_OP_SHR; + } + return T_OP_LOGICGT; + + case '!': /* Either a NEQ, or negation */ + secondChar = peek(0); + if (secondChar == '=') { + shiftChars(1); + return T_OP_LOGICNE; + } + return T_OP_LOGICNOT; + + /* Handle numbers */ + + case '$': + yylval.nConstValue = 0; + readHexNumber(); + /* Attempt to match `$ff00+c` */ + if (yylval.nConstValue == 0xff00) { + /* Whitespace is ignored anyways */ + while (isWhitespace(c = peek(0))) + shiftChars(1); + if (c == '+') { + /* FIXME: not great due to large lookahead */ + uint8_t distance = 1; + + do { + c = peek(distance++); + } while (isWhitespace(c)); + + if (c == 'c' || c == 'C') { + shiftChars(distance); + return T_MODE_HW_C; } } - break; + } + return T_NUMBER; + + case '0': /* Decimal number */ + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + readNumber(10, c - '0'); + if (peek(0) == '.') { + shiftChars(1); + readFractionalPart(); + } + return T_NUMBER; + + case '&': + secondChar = peek(0); + if (secondChar == '&') { + shiftChars(1); + return T_OP_LOGICAND; + } else if (secondChar >= '0' && secondChar <= '7') { + readNumber(8, 0); + return T_NUMBER; + } + return T_OP_AND; + + case '%': /* Either a modulo, or a binary constant */ + secondChar = peek(0); + if (secondChar != binDigits[0] && secondChar != binDigits[1]) + return T_OP_MOD; + + yylval.nConstValue = 0; + readBinaryNumber(); + return T_NUMBER; + + case '`': /* Gfx constant */ + readGfxConstant(); + return T_NUMBER; + + /* Handle strings */ + + case '"': + readString(); + return T_STRING; + + /* Handle newlines and EOF */ + + case '\r': + return '\r'; + case '\n': + return '\n'; + + case EOF: + return 0; + + /* Handle escapes */ + + case '\\': + c = peek(0); + + switch (c) { + case ' ': + case '\r': case '\n': - /* Line continuation character */ - nLineNo++; - ch = 0; + readLineContinuation(); + break; + + case EOF: + error("Illegal character escape at end of input\n"); break; default: - maxLength = MAXSTRLEN - index; - length = CopyMacroArg(&yylval.tzString[index], - maxLength, ch); + shiftChars(1); + error("Illegal character escape '%s'\n", print(c)); + } + break; - if (length != 0) - index += length; - else - fatalerror("Illegal character escape '%c'\n", ch); + /* Handle identifiers and escapes... or error out */ - ch = 0; + default: + if (startsIdentifier(c)) { + int tokenType = readIdentifier(c); + + /* If a keyword, don't try to expand */ + if (tokenType != T_ID && tokenType != T_LOCAL_ID) + return tokenType; + + if (lexerState->expandStrings) { + /* Attempt string expansion */ + struct Symbol const *sym = sym_FindSymbol(yylval.tzSym); + + if (sym && sym->type == SYM_EQUS) { + char const *s = sym_GetStringValue(sym); + + beginExpansion(0, 0, s, strlen(s), sym->name); + continue; /* Restart, reading from the new buffer */ + } + } + + if (tokenType == T_ID && lexerState->atLineStart) + return T_LABEL; + + return tokenType; + } + + /* Do not report weird characters when capturing, it'll be done later */ + if (!lexerState->capturing) { + /* TODO: try to group reportings */ + error("Unknown character %s\n", reportGarbageChar(c)); + } + } + lexerState->atLineStart = false; + } +} + +static int yylex_RAW(void) +{ + dbgPrint("Lexing in raw mode, line=%" PRIu32 ", col=%" PRIu32 "\n", + lexer_GetLineNo(), lexer_GetColNo()); + + /* This is essentially a modified `readString` */ + size_t i = 0; + bool insideString = false; + + /* Trim left of string... */ + while (isWhitespace(peek(0))) + shiftChars(1); + + for (;;) { + int c = peek(0); + + switch (c) { + case '"': + insideString = !insideString; + /* Other than that, just process quotes normally */ + break; + + case ';': /* Comments inside macro args */ + if (insideString) + break; + discardComment(); + c = peek(0); + /* fallthrough */ + case ',': + case '\r': + case '\n': + case EOF: + if (i == sizeof(yylval.tzString)) { + i--; + warning(WARNING_LONG_STR, "Macro argument too long\n"); + } + /* Trim whitespace */ + while (i && isWhitespace(yylval.tzString[i - 1])) + i--; + /* Empty macro args break their expansion, so prevent that */ + if (i == 0) { + /* Return the EOF token, and don't shift a non-existent char! */ + if (c == EOF) + return 0; + shiftChars(1); + return c; + } + yylval.tzString[i] = '\0'; + dbgPrint("Read raw string \"%s\"\n", yylval.tzString); + return T_STRING; + + case '\\': /* Character escape */ + c = peek(1); + switch (c) { + case ',': + shiftChars(1); + break; + + case ' ': + case '\r': + case '\n': + shiftChars(1); /* Shift the backslash */ + readLineContinuation(); + continue; + + case EOF: /* Can't really print that one */ + error("Illegal character escape at end of input\n"); + c = '\\'; + break; + default: /* Pass the rest as-is */ + c = '\\'; break; } - } else if (ch == '{') { - index += yylex_ReadBracketedSymbol(yylval.tzString, - index); - ch = 0; + break; + + case '{': /* Symbol interpolation */ + shiftChars(1); + char const *ptr = readInterpolation(); + + if (ptr) { + while (*ptr) { + if (i == sizeof(yylval.tzString)) + break; + yylval.tzString[i++] = *ptr++; + } + } + continue; /* Do not copy an additional character */ + + /* Regular characters will just get copied */ } - if (ch) - yylex_StringWriteChar(yylval.tzString, index++, ch); + if (i < sizeof(yylval.tzString)) /* Copy one extra to flag overflow */ + yylval.tzString[i++] = c; + shiftChars(1); } +} - if (index) { - yylex_StringWriteChar(yylval.tzString, index, 0); +/* + * This function uses the fact that `if`, etc. constructs are only valid when + * there's nothing before them on their lines. This enables filtering + * "meaningful" (= at line start) vs. "meaningless" (everything else) tokens. + * It's especially important due to macro args not being handled in this + * state, and lexing them in "normal" mode potentially producing such tokens. + */ +static int skipIfBlock(bool toEndc) +{ + dbgPrint("Skipping IF block (toEndc = %s)\n", toEndc ? "true" : "false"); + lexer_SetMode(LEXER_NORMAL); + int startingDepth = nIFDepth; + int token; + bool atLineStart = lexerState->atLineStart; - /* trim trailing white space at the end of the line */ - if (*pLexBuffer == '\n') - yylex_TrimEnd(yylval.tzString, index); + /* Prevent expanding macro args in this state */ + lexerState->disableMacroArgs = true; - return T_STRING; - } else if (*pLexBuffer == '\n') { - pLexBuffer++; - AtLineStart = 1; - return '\n'; - } else if (*pLexBuffer == ',') { - pLexBuffer++; - return ','; + for (;;) { + if (atLineStart) { + int c; + + for (;;) { + c = peek(0); + if (!isWhitespace(c)) + break; + shiftChars(1); + } + + if (startsIdentifier(c)) { + shiftChars(1); + token = readIdentifier(c); + switch (token) { + case T_POP_IF: + nIFDepth++; + break; + + case T_POP_ELIF: + case T_POP_ELSE: + if (toEndc) /* Ignore ELIF and ELSE, go to ENDC */ + break; + /* fallthrough */ + case T_POP_ENDC: + if (nIFDepth == startingDepth) + goto finish; + if (token == T_POP_ENDC) + nIFDepth--; + } + } + atLineStart = false; + } + + /* Read chars until EOL */ + do { + int c = nextChar(); + + if (c == EOF) { + token = 0; + goto finish; + } else if (c == '\\') { + /* Unconditionally skip the next char, including line conts */ + c = nextChar(); + } else if (c == '\r' || c == '\n') { + atLineStart = true; + } + + if (c == '\r' || c == '\n') + /* Do this both on line continuations and plain EOLs */ + lexerState->lineNo++; + /* Handle CRLF */ + if (c == '\r' && peek(0) == '\n') + shiftChars(1); + } while (!atLineStart); } +finish: - fatalerror("Internal error in %s\n", __func__); + lexerState->disableMacroArgs = false; + lexerState->atLineStart = false; + + return token; +} + +static int yylex_SKIP_TO_ELIF(void) +{ + return skipIfBlock(false); +} + +static int yylex_SKIP_TO_ENDC(void) +{ + return skipIfBlock(true); } int yylex(void) { - int returnedChar; - - switch (lexerstate) { - case LEX_STATE_NORMAL: - returnedChar = yylex_NORMAL(); - break; - case LEX_STATE_MACROARGS: - returnedChar = yylex_MACROARGS(); - break; - default: - fatalerror("%s: Internal error.\n", __func__); +restart: + if (lexerState->atLineStart && lexerStateEOL) { + lexer_SetState(lexerStateEOL); + lexerStateEOL = NULL; + } + if (lexerState->atLineStart) { + /* Newlines read within an expansion should not increase the line count */ + if (!lexerState->expansions || lexerState->expansions->distance) { + lexerState->lineNo++; + lexerState->colNo = 0; + } } - /* Check if string expansions were fully read */ - while (pCurrentStringExpansion - && pCurrentStringExpansion->pBuffer == pLexBufferRealStart - && pCurrentStringExpansion->pBufferPos <= pLexBuffer) { - struct sStringExpansionPos *pParent = - pCurrentStringExpansion->pParent; - free(pCurrentStringExpansion->tzName); - free(pCurrentStringExpansion); + static int (* const lexerModeFuncs[])(void) = { + [LEXER_NORMAL] = yylex_NORMAL, + [LEXER_RAW] = yylex_RAW, + [LEXER_SKIP_TO_ELIF] = yylex_SKIP_TO_ELIF, + [LEXER_SKIP_TO_ENDC] = yylex_SKIP_TO_ENDC + }; + int token = lexerModeFuncs[lexerState->mode](); - pCurrentStringExpansion = pParent; - nNbStringExpansions--; + /* Make sure to terminate files with a line feed */ + if (token == 0) { + if (lexerState->lastToken != '\n') { + dbgPrint("Forcing EOL at EOF\n"); + token = '\n'; + } else { /* Try to switch to new buffer; if it succeeds, scan again */ + dbgPrint("Reached EOF!\n"); + /* Captures end at their buffer's boundary no matter what */ + if (!lexerState->capturing) { + if (!yywrap()) + goto restart; + dbgPrint("Reached end of input."); + return 0; + } + } + } else if (token == '\r') { /* Handle CR and CRLF line endings */ + token = '\n'; /* We universally use '\n' as the value for line ending tokens */ + if (peek(0) == '\n') + shiftChars(1); /* Shift the CRLF's LF */ } + lexerState->lastToken = token; - return returnedChar; + lexerState->atLineStart = false; + if (token == '\n') + lexerState->atLineStart = true; + + return token; +} + +static char *startCapture(void) +{ + assert(!lexerState->expansions); + + lexerState->capturing = true; + lexerState->captureSize = 0; + lexerState->disableMacroArgs = true; + + if (lexerState->isMmapped) { + return &lexerState->ptr[lexerState->offset]; + } else { + lexerState->captureCapacity = 128; /* The initial size will be twice that */ + reallocCaptureBuf(); + return lexerState->captureBuf; + } +} + +void lexer_CaptureRept(char **capture, size_t *size) +{ + char *captureStart = startCapture(); + unsigned int level = 0; + int c; + + /* + * Due to parser internals, it reads the EOL after the expression before calling this. + * Thus, we don't need to keep one in the buffer afterwards. + * The following assertion checks that. + */ + assert(lexerState->atLineStart); + for (;;) { + lexerState->lineNo++; + /* We're at line start, so attempt to match a `REPT` or `ENDR` token */ + do { /* Discard initial whitespace */ + c = nextChar(); + } while (isWhitespace(c)); + /* Now, try to match either `REPT` or `ENDR` as a **whole** identifier */ + if (startsIdentifier(c)) { + switch (readIdentifier(c)) { + case T_POP_REPT: + level++; + /* Ignore the rest of that line */ + break; + + case T_POP_ENDR: + if (!level) { + /* Read (but don't capture) until EOL or EOF */ + lexerState->capturing = false; + do { + c = nextChar(); + } while (c != EOF && c != '\r' && c != '\n'); + /* Handle Windows CRLF */ + if (c == '\r' && peek(0) == '\n') + shiftChars(1); + goto finish; + } + level--; + } + } + + /* Just consume characters until EOL or EOF */ + for (;;) { + if (c == EOF) { + error("Unterminated REPT block\n"); + lexerState->capturing = false; + goto finish; + } else if (c == '\n') { + break; + } else if (c == '\r') { + if (peek(0) == '\n') + shiftChars(1); + break; + } + c = nextChar(); + } + } + +finish: + assert(!lexerState->capturing); + *capture = captureStart; + *size = lexerState->captureSize - strlen("ENDR"); + lexerState->captureBuf = NULL; + lexerState->disableMacroArgs = false; +} + +void lexer_CaptureMacroBody(char **capture, size_t *size) +{ + char *captureStart = startCapture(); + unsigned int level = 0; + int c = peek(0); + + /* If the file is `mmap`ed, we need not to unmap it to keep access to the macro */ + if (lexerState->isMmapped) + lexerState->isReferenced = true; + + /* + * Due to parser internals, it does not read the EOL after the T_POP_MACRO before calling + * this. Thus, we need to keep one in the buffer afterwards. + * (Note that this also means the captured buffer begins with a newline and maybe comment) + * The following assertion checks that. + */ + assert(!lexerState->atLineStart); + for (;;) { + /* Just consume characters until EOL or EOF */ + for (;;) { + if (c == EOF) { + error("Unterminated macro definition\n"); + lexerState->capturing = false; + goto finish; + } else if (c == '\n') { + break; + } else if (c == '\r') { + if (peek(0) == '\n') + shiftChars(1); + break; + } + c = nextChar(); + } + + /* We're at line start, attempt to match a `label: MACRO` line or `ENDM` token */ + do { /* Discard initial whitespace */ + c = nextChar(); + } while (isWhitespace(c)); + /* Now, try to match either `REPT` or `ENDR` as a **whole** identifier */ + if (startsIdentifier(c)) { + switch (readIdentifier(c)) { + case T_ID: + /* We have an initial label, look for a single colon */ + do { + c = nextChar(); + } while (isWhitespace(c)); + if (c != ':') /* If not a colon, give up */ + break; + /* And finally, a `MACRO` token */ + do { + c = nextChar(); + } while (isWhitespace(c)); + if (!startsIdentifier(c)) + break; + if (readIdentifier(c) != T_POP_MACRO) + break; + level++; + break; + + case T_POP_ENDM: + if (!level) { + /* Read (but don't capture) until EOL or EOF */ + lexerState->capturing = false; + do { + c = peek(0); + if (c == EOF || c == '\r' || c == '\n') + break; + shiftChars(1); + } while (c != EOF && c != '\r' && c != '\n'); + /* Handle Windows CRLF */ + if (c == '\r' && peek(1) == '\n') + shiftChars(1); + goto finish; + } + level--; + } + } + lexerState->lineNo++; + } + +finish: + assert(!lexerState->capturing); + *capture = captureStart; + *size = lexerState->captureSize - strlen("ENDM"); + lexerState->captureBuf = NULL; + lexerState->disableMacroArgs = false; } diff --git a/src/asm/macro.c b/src/asm/macro.c index 852a4bbd..31037284 100644 --- a/src/asm/macro.c +++ b/src/asm/macro.c @@ -29,7 +29,8 @@ struct MacroArgs { sizeof(((struct MacroArgs){0}).args[0]) * (nbArgs)) static struct MacroArgs *macroArgs = NULL; -static uint32_t uniqueID = -1; +static uint32_t uniqueID = 0; +static uint32_t maxUniqueID = 0; /* * The initialization is somewhat harmful, since it is never used, but it * guarantees the size of the buffer will be correct. I was unable to find a @@ -61,7 +62,7 @@ void macro_AppendArg(struct MacroArgs **argPtr, char *s) #define macArgs (*argPtr) if (macArgs->nbArgs == MAXMACROARGS) error("A maximum of " EXPAND_AND_STR(MAXMACROARGS) - " arguments is allowed\n"); + " arguments is allowed\n"); if (macArgs->nbArgs >= macArgs->capacity) { macArgs->capacity *= 2; /* Check that overflow didn't roll us back */ @@ -88,6 +89,9 @@ void macro_FreeArgs(struct MacroArgs *args) char const *macro_GetArg(uint32_t i) { + if (!macroArgs) + return NULL; + uint32_t realIndex = i + macroArgs->shift - 1; return realIndex >= macroArgs->nbArgs ? NULL @@ -107,15 +111,23 @@ char const *macro_GetUniqueIDStr(void) void macro_SetUniqueID(uint32_t id) { uniqueID = id; - if (id == -1) { + if (id == 0) { uniqueIDPtr = NULL; } else { + if (uniqueID > maxUniqueID) + maxUniqueID = uniqueID; /* The buffer is guaranteed to be the correct size */ sprintf(uniqueIDBuf, "_%" PRIu32, id); uniqueIDPtr = uniqueIDBuf; } } +uint32_t macro_UseNewUniqueID(void) +{ + macro_SetUniqueID(++maxUniqueID); + return maxUniqueID; +} + void macro_ShiftCurrentArgs(void) { if (macroArgs->shift != macroArgs->nbArgs) diff --git a/src/asm/main.c b/src/asm/main.c index 2e94fe4c..c57b7fba 100644 --- a/src/asm/main.c +++ b/src/asm/main.c @@ -6,6 +6,7 @@ * SPDX-License-Identifier: MIT */ +#include #include #include #include @@ -22,8 +23,10 @@ #include "asm/lexer.h" #include "asm/main.h" #include "asm/output.h" +#include "asm/rpn.h" #include "asm/symbol.h" #include "asm/warning.h" +#include "asmy.h" #include "extern/err.h" #include "extern/getopt.h" @@ -31,8 +34,6 @@ #include "helpers.h" #include "version.h" -extern int yyparse(void); - size_t cldefines_index; size_t cldefines_numindices; size_t cldefines_bufsize; @@ -41,10 +42,6 @@ char **cldefines; clock_t nStartClock, nEndClock; uint32_t nTotalLines, nIFDepth; -bool skipElif; -uint32_t unionStart[128], unionSize[128]; - -int32_t nLineNo; #if defined(YYDEBUG) && YYDEBUG extern int yydebug; @@ -74,66 +71,11 @@ struct sOptionStackEntry { struct sOptionStackEntry *pOptionStack; -void opt_SetCurrentOptions(struct sOptions *pOpt) +void opt_SetCurrentOptions(struct sOptions *opt) { - if (nGBGfxID != -1) { - lex_FloatDeleteRange(nGBGfxID, CurrentOptions.gbgfx[0], - CurrentOptions.gbgfx[0]); - lex_FloatDeleteRange(nGBGfxID, CurrentOptions.gbgfx[1], - CurrentOptions.gbgfx[1]); - lex_FloatDeleteRange(nGBGfxID, CurrentOptions.gbgfx[2], - CurrentOptions.gbgfx[2]); - lex_FloatDeleteRange(nGBGfxID, CurrentOptions.gbgfx[3], - CurrentOptions.gbgfx[3]); - lex_FloatDeleteSecondRange(nGBGfxID, CurrentOptions.gbgfx[0], - CurrentOptions.gbgfx[0]); - lex_FloatDeleteSecondRange(nGBGfxID, CurrentOptions.gbgfx[1], - CurrentOptions.gbgfx[1]); - lex_FloatDeleteSecondRange(nGBGfxID, CurrentOptions.gbgfx[2], - CurrentOptions.gbgfx[2]); - lex_FloatDeleteSecondRange(nGBGfxID, CurrentOptions.gbgfx[3], - CurrentOptions.gbgfx[3]); - } - if (nBinaryID != -1) { - lex_FloatDeleteRange(nBinaryID, CurrentOptions.binary[0], - CurrentOptions.binary[0]); - lex_FloatDeleteRange(nBinaryID, CurrentOptions.binary[1], - CurrentOptions.binary[1]); - lex_FloatDeleteSecondRange(nBinaryID, CurrentOptions.binary[0], - CurrentOptions.binary[0]); - lex_FloatDeleteSecondRange(nBinaryID, CurrentOptions.binary[1], - CurrentOptions.binary[1]); - } - CurrentOptions = *pOpt; - - if (nGBGfxID != -1) { - lex_FloatAddRange(nGBGfxID, CurrentOptions.gbgfx[0], - CurrentOptions.gbgfx[0]); - lex_FloatAddRange(nGBGfxID, CurrentOptions.gbgfx[1], - CurrentOptions.gbgfx[1]); - lex_FloatAddRange(nGBGfxID, CurrentOptions.gbgfx[2], - CurrentOptions.gbgfx[2]); - lex_FloatAddRange(nGBGfxID, CurrentOptions.gbgfx[3], - CurrentOptions.gbgfx[3]); - lex_FloatAddSecondRange(nGBGfxID, CurrentOptions.gbgfx[0], - CurrentOptions.gbgfx[0]); - lex_FloatAddSecondRange(nGBGfxID, CurrentOptions.gbgfx[1], - CurrentOptions.gbgfx[1]); - lex_FloatAddSecondRange(nGBGfxID, CurrentOptions.gbgfx[2], - CurrentOptions.gbgfx[2]); - lex_FloatAddSecondRange(nGBGfxID, CurrentOptions.gbgfx[3], - CurrentOptions.gbgfx[3]); - } - if (nBinaryID != -1) { - lex_FloatAddRange(nBinaryID, CurrentOptions.binary[0], - CurrentOptions.binary[0]); - lex_FloatAddRange(nBinaryID, CurrentOptions.binary[1], - CurrentOptions.binary[1]); - lex_FloatAddSecondRange(nBinaryID, CurrentOptions.binary[0], - CurrentOptions.binary[0]); - lex_FloatAddSecondRange(nBinaryID, CurrentOptions.binary[1], - CurrentOptions.binary[1]); - } + CurrentOptions = *opt; + lexer_SetGfxDigits(CurrentOptions.gbgfx); + lexer_SetBinDigits(CurrentOptions.binary); } void opt_Parse(char *s) @@ -251,6 +193,22 @@ static void opt_ParseDefines(void) sym_AddString(cldefines[i], cldefines[i + 1]); } +void upperstring(char *s) +{ + while (*s) { + *s = toupper(*s); + s++; + } +} + +void lowerstring(char *s) +{ + while (*s) { + *s = tolower(*s); + s++; + } +} + /* Escapes Make-special chars from a string */ static char *make_escape(const char *str) { @@ -350,11 +308,11 @@ int main(int argc, char *argv[]) yydebug = 1; #endif - nMaxRecursionDepth = 64; oGeneratePhonyDeps = false; oGeneratedMissingIncludes = false; oFailedOnMissingInclude = false; tzTargetFileName = NULL; + uint32_t maxRecursionDepth = 64; size_t nTargetFileNameLen = 0; DefaultOptions.gbgfx[0] = '0'; @@ -433,7 +391,7 @@ int main(int argc, char *argv[]) break; case 'r': - nMaxRecursionDepth = strtoul(optarg, &ep, 0); + maxRecursionDepth = strtoul(optarg, &ep, 0); if (optarg[0] == '\0' || *ep != '\0') errx(1, "Invalid argument for option 'r'"); @@ -516,8 +474,6 @@ int main(int argc, char *argv[]) tzMainfile = argv[argc - 1]; - setup_lexer(); - if (verbose) printf("Assembling %s\n", tzMainfile); @@ -528,19 +484,20 @@ int main(int argc, char *argv[]) fprintf(dependfile, "%s: %s\n", tzTargetFileName, tzMainfile); } + /* Init file stack; important to do first, since it provides the file name, line, etc */ + lexer_Init(); + fstk_Init(tzMainfile, maxRecursionDepth); + nStartClock = clock(); - nLineNo = 1; nTotalLines = 0; nIFDepth = 0; - skipElif = true; sym_Init(); sym_SetExportAll(exportall); - fstk_Init(tzMainfile); + opt_ParseDefines(); charmap_New("main", NULL); - yy_set_state(LEX_STATE_NORMAL); opt_SetCurrentOptions(&DefaultOptions); if (yyparse() != 0 || nbErrors != 0) diff --git a/src/asm/output.c b/src/asm/output.c index 45f2f1e1..a5168338 100644 --- a/src/asm/output.c +++ b/src/asm/output.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -33,7 +34,8 @@ #include "platform.h" // strdup struct Patch { - char tzFilename[_MAX_PATH + 1]; + struct FileStackNode const *src; + uint32_t lineNo; uint32_t nOffset; struct Section *pcSection; uint32_t pcOffset; @@ -62,19 +64,17 @@ static uint32_t nbSymbols = 0; /* Length of the above list */ static struct Assertion *assertions = NULL; +static struct FileStackNode *fileStackNodes = NULL; + /* * Count the number of sections used in this object */ static uint32_t countsections(void) { - struct Section *sect; uint32_t count = 0; - sect = pSectionList; - while (sect) { + for (struct Section const *sect = pSectionList; sect; sect = sect->next) count++; - sect = sect->next; - } return count; } @@ -129,16 +129,60 @@ static void fputstring(char const *s, FILE *f) fputc(0, f); } +static uint32_t getNbFileStackNodes(void) +{ + return fileStackNodes ? fileStackNodes->ID + 1 : 0; +} + +void out_RegisterNode(struct FileStackNode *node) +{ + /* If node is not already registered, register it (and parents), and give it a unique ID */ + while (node->ID == -1) { + node->ID = getNbFileStackNodes(); + if (node->ID == -1) + fatalerror("Reached too many file stack nodes; try splitting the file up\n"); + node->next = fileStackNodes; + fileStackNodes = node; + + /* Also register the node's parents */ + node = node->parent; + if (!node) + break; + } +} + +void out_ReplaceNode(struct FileStackNode *node) +{ + (void)node; +#if 0 +This is code intended to replace a node, which is pretty useless until ref counting is added... + + struct FileStackNode **ptr = &fileStackNodes; + + /* + * The linked list is supposed to have decrementing IDs, so iterate with less memory reads, + * to hopefully hit the cache less. A debug check is added after, in case a change is made + * that breaks this assumption. + */ + for (uint32_t i = fileStackNodes->ID; i != node->ID; i--) + ptr = &(*ptr)->next; + assert((*ptr)->ID == node->ID); + + node->next = (*ptr)->next; + assert(!node->next || node->next->ID == node->ID - 1); /* Catch inconsistencies early */ + /* TODO: unreference the node */ + *ptr = node; +#endif +} + /* * Return a section's ID */ static uint32_t getsectid(struct Section const *sect) { - struct Section const *sec; + struct Section const *sec = pSectionList; uint32_t ID = 0; - sec = pSectionList; - while (sec) { if (sec == sect) return ID; @@ -159,7 +203,10 @@ static uint32_t getSectIDIfAny(struct Section const *sect) */ static void writepatch(struct Patch const *patch, FILE *f) { - fputstring(patch->tzFilename, f); + assert(patch->src->ID != -1); + + fputlong(patch->src->ID, f); + fputlong(patch->lineNo, f); fputlong(patch->nOffset, f); fputlong(getSectIDIfAny(patch->pcSection), f); fputlong(patch->pcOffset, f); @@ -206,26 +253,35 @@ static void writesymbol(struct Symbol const *sym, FILE *f) if (!sym_IsDefined(sym)) { fputc(SYMTYPE_IMPORT, f); } else { + assert(sym->src->ID != -1); + fputc(sym->isExported ? SYMTYPE_EXPORT : SYMTYPE_LOCAL, f); - fputstring(sym->fileName, f); + fputlong(sym->src->ID, f); fputlong(sym->fileLine, f); fputlong(getSectIDIfAny(sym_GetSection(sym)), f); fputlong(sym->value, f); } } +static void registerSymbol(struct Symbol *sym) +{ + *objectSymbolsTail = sym; + objectSymbolsTail = &sym->next; + out_RegisterNode(sym->src); + if (nbSymbols == -1) + fatalerror("Registered too many symbols (%" PRIu32 + "); try splitting up your files\n", (uint32_t)-1); + sym->ID = nbSymbols++; +} + /* * Returns a symbol's ID within the object file * If the symbol does not have one, one is assigned by registering the symbol */ static uint32_t getSymbolID(struct Symbol *sym) { - if (sym->ID == -1) { - sym->ID = nbSymbols++; - - *objectSymbolsTail = sym; - objectSymbolsTail = &sym->next; - } + if (sym->ID == -1 && !sym_IsPC(sym)) + registerSymbol(sym); return sym->ID; } @@ -303,22 +359,25 @@ static void writerpn(uint8_t *rpnexpr, uint32_t *rpnptr, uint8_t *rpn, /* * Allocate a new patch structure and link it into the list + * WARNING: all patches are assumed to eventually be written, so the file stack node is registered */ -static struct Patch *allocpatch(uint32_t type, struct Expression const *expr, - uint32_t ofs) +static struct Patch *allocpatch(uint32_t type, struct Expression const *expr, uint32_t ofs) { struct Patch *patch = malloc(sizeof(struct Patch)); uint32_t rpnSize = expr->isKnown ? 5 : expr->nRPNPatchSize; + struct FileStackNode *node = fstk_GetFileStack(); if (!patch) fatalerror("No memory for patch: %s\n", strerror(errno)); - patch->pRPN = malloc(sizeof(*patch->pRPN) * rpnSize); + patch->pRPN = malloc(sizeof(*patch->pRPN) * rpnSize); if (!patch->pRPN) fatalerror("No memory for patch's RPN expression: %s\n", strerror(errno)); patch->type = type; - fstk_DumpToStr(patch->tzFilename, sizeof(patch->tzFilename)); + patch->src = node; + out_RegisterNode(node); + patch->lineNo = lexer_GetLineNo(); patch->nOffset = ofs; patch->pcSection = sect_GetSymbolSection(); patch->pcOffset = sect_GetSymbolOffset(); @@ -382,13 +441,28 @@ static void writeassert(struct Assertion *assert, FILE *f) fputstring(assert->message, f); } +static void writeFileStackNode(struct FileStackNode const *node, FILE *f) +{ + fputlong(node->parent ? node->parent->ID : -1, f); + fputlong(node->lineNo, f); + fputc(node->type, f); + if (node->type != NODE_REPT) { + fputstring(((struct FileStackNamedNode const *)node)->name, f); + } else { + struct FileStackReptNode const *reptNode = (struct FileStackReptNode const *)node; + + fputlong(reptNode->reptDepth, f); + /* Iters are stored by decreasing depth, so reverse the order for output */ + for (uint32_t i = reptNode->reptDepth; i--; ) + fputlong(reptNode->iters[i], f); + } +} + static void registerExportedSymbol(struct Symbol *symbol, void *arg) { (void)arg; if (sym_IsExported(symbol) && symbol->ID == -1) { - *objectSymbolsTail = symbol; - objectSymbolsTail = &symbol->next; - nbSymbols++; + registerSymbol(symbol); } } @@ -411,6 +485,15 @@ void out_WriteObject(void) fputlong(nbSymbols, f); fputlong(countsections(), f); + fputlong(getNbFileStackNodes(), f); + for (struct FileStackNode const *node = fileStackNodes; node; node = node->next) { + writeFileStackNode(node, f); + if (node->next && node->next->ID != node->ID - 1) + fatalerror("Internal error: fstack node #%" PRIu32 " follows #%" PRIu32 + ". Please report this to the developers!\n", + node->next->ID, node->ID); + } + for (struct Symbol const *sym = objectSymbols; sym; sym = sym->next) writesymbol(sym, f); diff --git a/src/asm/rpn.c b/src/asm/rpn.c index 991dbb3f..bb7aa871 100644 --- a/src/asm/rpn.c +++ b/src/asm/rpn.c @@ -258,8 +258,8 @@ static int32_t shift(int32_t shiftee, int32_t amount) if (amount >= 0) { // Left shift if (amount >= 32) { - warning(WARNING_SHIFT_AMOUNT, "Shifting left by large amount %" PRId32 "\n", - amount); + warning(WARNING_SHIFT_AMOUNT, "Shifting left by large amount %" + PRId32 "\n", amount); return 0; } else { diff --git a/src/asm/section.c b/src/asm/section.c index 3658dd67..ee5d5144 100644 --- a/src/asm/section.c +++ b/src/asm/section.c @@ -656,9 +656,15 @@ void out_BinaryFile(char const *s, int32_t startPos) startPos = 0; } - FILE *f = fstk_FindFile(s, NULL); + char *fullPath = NULL; + size_t size = 0; + FILE *f = NULL; + + if (fstk_FindFile(s, &fullPath, &size)) + f = fopen(fullPath, "rb"); if (!f) { + free(fullPath); if (oGeneratedMissingIncludes) { oFailedOnMissingInclude = true; return; @@ -699,6 +705,7 @@ void out_BinaryFile(char const *s, int32_t startPos) error("Error reading INCBIN file '%s': %s\n", s, strerror(errno)); fclose(f); + free(fullPath); } void out_BinaryFileSlice(char const *s, int32_t start_pos, int32_t length) @@ -715,9 +722,15 @@ void out_BinaryFileSlice(char const *s, int32_t start_pos, int32_t length) if (length == 0) /* Don't even bother with 0-byte slices */ return; - FILE *f = fstk_FindFile(s, NULL); + char *fullPath = NULL; + size_t size = 0; + FILE *f = NULL; + + if (fstk_FindFile(s, &fullPath, &size)) + f = fopen(fullPath, "rb"); if (!f) { + free(fullPath); if (oGeneratedMissingIncludes) { oFailedOnMissingInclude = true; return; @@ -767,6 +780,7 @@ void out_BinaryFileSlice(char const *s, int32_t start_pos, int32_t length) } fclose(f); + free(fullPath); } /* diff --git a/src/asm/symbol.c b/src/asm/symbol.c index d7d219e6..59c77541 100644 --- a/src/asm/symbol.c +++ b/src/asm/symbol.c @@ -23,6 +23,7 @@ #include "asm/macro.h" #include "asm/main.h" #include "asm/mymath.h" +#include "asm/output.h" #include "asm/section.h" #include "asm/symbol.h" #include "asm/util.h" @@ -77,12 +78,55 @@ void sym_ForEach(void (*func)(struct Symbol *, void *), void *arg) static int32_t Callback_NARG(void) { + if (!macro_GetCurrentArgs()) { + error("_NARG does not make sense outside of a macro\n"); + return 0; + } return macro_NbArgs(); } static int32_t Callback__LINE__(void) { - return nLineNo; + return lexer_GetLineNo(); +} + +static char const *Callback__FILE__(void) +{ + /* + * FIXME: this is dangerous, and here's why this is CURRENTLY okay. It's still bad, fix it. + * There are only two call sites for this; one copies the contents directly, the other is + * EQUS expansions, which cannot straddle file boundaries. So this should be fine. + */ + static char *buf = NULL; + static size_t bufsize = 0; + char const *fileName = fstk_GetFileName(); + size_t j = 1; + + /* TODO: is there a way for a file name to be empty? */ + assert(fileName[0]); + /* The assertion above ensures the loop runs at least once */ + for (size_t i = 0; fileName[i]; i++, j++) { + /* Account for the extra backslash inserted below */ + if (fileName[i] == '"') + j++; + /* Ensure there will be enough room; DO NOT PRINT ANYTHING ABOVE THIS!! */ + if (j + 2 >= bufsize) { /* Always keep room for 2 tail chars */ + bufsize = bufsize ? bufsize * 2 : 64; + buf = realloc(buf, bufsize); + if (!buf) + fatalerror("Failed to grow buffer for file name: %s\n", + strerror(errno)); + } + /* Escape quotes, since we're returning a string */ + if (fileName[i] == '"') + buf[j - 1] = '\\'; + buf[j] = fileName[i]; + } + /* Write everything after the loop, to ensure the buffer has been allocated */ + buf[0] = '"'; + buf[j++] = '"'; + buf[j] = '\0'; + return buf; } static int32_t CallbackPC(void) @@ -97,8 +141,8 @@ static int32_t CallbackPC(void) */ int32_t sym_GetValue(struct Symbol const *sym) { - if (sym_IsNumeric(sym) && sym->callback) - return sym->callback(); + if (sym_IsNumeric(sym) && sym->hasCallback) + return sym->numCallback(); if (sym->type == SYM_LABEL) /* TODO: do not use section's org directly */ @@ -107,15 +151,35 @@ int32_t sym_GetValue(struct Symbol const *sym) return sym->value; } +static void dumpFilename(struct Symbol const *sym) +{ + if (!sym->src) + fputs("", stderr); + else + fstk_Dump(sym->src, sym->fileLine); +} + +/* + * Set a symbol's definition filename and line + */ +static void setSymbolFilename(struct Symbol *sym) +{ + sym->src = fstk_GetFileStack(); + sym->fileLine = lexer_GetLineNo(); +} + /* * Update a symbol's definition filename and line */ static void updateSymbolFilename(struct Symbol *sym) { - if (snprintf(sym->fileName, _MAX_PATH + 1, "%s", - tzCurrentFileName) > _MAX_PATH) - fatalerror("%s: File name is too long: '%s'\n", __func__, tzCurrentFileName); - sym->fileLine = fstk_GetLine(); + struct FileStackNode *oldSrc = sym->src; + + setSymbolFilename(sym); + /* If the old node was referenced, ensure the new one is */ + if (oldSrc->referenced && oldSrc->ID != -1) + out_RegisterNode(sym->src); + /* TODO: unref the old node, and use `out_ReplaceNode` instead if deleting it */ } /* @@ -133,8 +197,9 @@ static struct Symbol *createsymbol(char const *s) symbol->isExported = false; symbol->isBuiltin = false; + symbol->hasCallback = false; symbol->section = NULL; - updateSymbolFilename(symbol); + setSymbolFilename(symbol); symbol->ID = -1; symbol->next = NULL; @@ -209,8 +274,7 @@ void sym_Purge(char const *symName) labelScope = NULL; hash_RemoveElement(symbols, symbol->name); - if (symbol->type == SYM_MACRO) - free(symbol->macro); + /* TODO: ideally, also unref the file stack nodes */ free(symbol); } } @@ -229,7 +293,22 @@ uint32_t sym_GetPCValue(void) } /* - * Return a constant symbols value + * Return a constant symbol's value, assuming it's defined + */ +uint32_t sym_GetConstantSymValue(struct Symbol const *sym) +{ + if (sym == PCSymbol) + return sym_GetPCValue(); + else if (!sym_IsConstant(sym)) + error("\"%s\" does not have a constant value\n", sym->name); + else + return sym_GetValue(sym); + + return 0; +} + +/* + * Return a constant symbol's value */ uint32_t sym_GetConstantValue(char const *s) { @@ -237,12 +316,8 @@ uint32_t sym_GetConstantValue(char const *s) if (sym == NULL) error("'%s' not defined\n", s); - else if (sym == PCSymbol) - return sym_GetPCValue(); - else if (!sym_IsConstant(sym)) - error("\"%s\" does not have a constant value\n", s); else - return sym_GetValue(sym); + return sym_GetConstantSymValue(sym); return 0; } @@ -285,9 +360,11 @@ static struct Symbol *createNonrelocSymbol(char const *symbolName) if (!symbol) symbol = createsymbol(symbolName); - else if (sym_IsDefined(symbol)) - error("'%s' already defined at %s(%" PRIu32 ")\n", symbolName, - symbol->fileName, symbol->fileLine); + else if (sym_IsDefined(symbol)) { + error("'%s' already defined at ", symbolName); + dumpFilename(symbol); + putc('\n', stderr); + } return symbol; } @@ -300,7 +377,6 @@ struct Symbol *sym_AddEqu(char const *symName, int32_t value) struct Symbol *sym = createNonrelocSymbol(symName); sym->type = SYM_EQU; - sym->callback = NULL; sym->value = value; return sym; @@ -343,18 +419,19 @@ struct Symbol *sym_AddSet(char const *symName, int32_t value) { struct Symbol *sym = findsymbol(symName, NULL); - if (sym == NULL) + if (sym == NULL) { sym = createsymbol(symName); - else if (sym_IsDefined(sym) && sym->type != SYM_SET) - error("'%s' already defined as %s at %s(%" PRIu32 ")\n", - symName, sym->type == SYM_LABEL ? "label" : "constant", - sym->fileName, sym->fileLine); - else - /* TODO: can the scope be incorrect when talking over refs? */ + } else if (sym_IsDefined(sym) && sym->type != SYM_SET) { + error("'%s' already defined as %s at ", + symName, sym->type == SYM_LABEL ? "label" : "constant"); + dumpFilename(sym); + putc('\n', stderr); + } else { + /* TODO: can the scope be incorrect when taking over refs? */ updateSymbolFilename(sym); + } sym->type = SYM_SET; - sym->callback = NULL; sym->value = value; return sym; @@ -365,7 +442,7 @@ struct Symbol *sym_AddSet(char const *symName, int32_t value) * @param name The label's full name (so `.name` is invalid) * @return The created symbol */ -static struct Symbol *addSectionlessLabel(char const *name) +static struct Symbol *addLabel(char const *name) { assert(name[0] != '.'); /* The symbol name must have been expanded prior */ struct Symbol *sym = findsymbol(name, NULL); /* Due to this, don't look for expansions */ @@ -373,25 +450,19 @@ static struct Symbol *addSectionlessLabel(char const *name) if (!sym) { sym = createsymbol(name); } else if (sym_IsDefined(sym)) { - error("'%s' already defined in %s(%" PRIu32 ")\n", - name, sym->fileName, sym->fileLine); + error("'%s' already defined at ", name); + dumpFilename(sym); + putc('\n', stderr); return NULL; + } else { + updateSymbolFilename(sym); } /* If the symbol already exists as a ref, just "take over" it */ sym->type = SYM_LABEL; - sym->callback = NULL; sym->value = sect_GetSymbolOffset(); if (exportall) sym->isExported = true; sym->section = sect_GetSymbolSection(); - updateSymbolFilename(sym); - - return sym; -} - -static struct Symbol *addLabel(char const *name) -{ - struct Symbol *sym = addSectionlessLabel(name); if (sym && !sym->section) error("Label \"%s\" created outside of a SECTION\n", name); @@ -467,14 +538,14 @@ void sym_Export(char const *symName) /* * Add a macro definition */ -struct Symbol *sym_AddMacro(char const *symName, int32_t defLineNo) +struct Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char *body, size_t size) { struct Symbol *sym = createNonrelocSymbol(symName); sym->type = SYM_MACRO; - sym->macroSize = ulNewMacroSize; - sym->macro = tzNewMacro; - updateSymbolFilename(sym); + sym->macroSize = size; + sym->macro = body; + setSymbolFilename(sym); /* TODO: is this really necessary? */ /* * The symbol is created at the line after the `endm`, * override this with the actual definition line @@ -528,21 +599,36 @@ static inline char const *removeLeadingZeros(char const *ptr) return ptr; } +static inline struct Symbol *createBuiltinSymbol(char const *name) +{ + struct Symbol *sym = createsymbol(name); + + sym->isBuiltin = true; + sym->hasCallback = true; + sym->src = NULL; + sym->fileLine = 0; + return sym; +} + /* * Initialize the symboltable */ void sym_Init(void) { - struct Symbol *_NARGSymbol = sym_AddEqu("_NARG", 0); - struct Symbol *__LINE__Symbol = sym_AddEqu("__LINE__", 0); + PCSymbol = createBuiltinSymbol("@"); + struct Symbol *_NARGSymbol = createBuiltinSymbol("_NARG"); + struct Symbol *__LINE__Symbol = createBuiltinSymbol("__LINE__"); + struct Symbol *__FILE__Symbol = createBuiltinSymbol("__FILE__"); - PCSymbol = addSectionlessLabel("@"); - PCSymbol->isBuiltin = true; - PCSymbol->callback = CallbackPC; - _NARGSymbol->isBuiltin = true; - _NARGSymbol->callback = Callback_NARG; - __LINE__Symbol->isBuiltin = true; - __LINE__Symbol->callback = Callback__LINE__; + PCSymbol->type = SYM_LABEL; + PCSymbol->section = NULL; + PCSymbol->numCallback = CallbackPC; + _NARGSymbol->type = SYM_EQU; + _NARGSymbol->numCallback = Callback_NARG; + __LINE__Symbol->type = SYM_EQU; + __LINE__Symbol->numCallback = Callback__LINE__; + __FILE__Symbol->type = SYM_EQUS; + __FILE__Symbol->strCallback = Callback__FILE__; sym_AddSet("_RS", 0)->isBuiltin = true; sym_AddEqu("__RGBDS_MAJOR__", PACKAGE_VERSION_MAJOR)->isBuiltin = true; diff --git a/src/asm/util.c b/src/asm/util.c index 51f88ba8..9af73ec0 100644 --- a/src/asm/util.c +++ b/src/asm/util.c @@ -6,6 +6,7 @@ * SPDX-License-Identifier: MIT */ +#include #include #include "asm/main.h" @@ -27,6 +28,40 @@ uint32_t calchash(const char *s) return hash; } +char const *print(int c) +{ + static char buf[5]; /* '\xNN' + '\0' */ + + if (c == EOF) + return "EOF"; + + if (isprint(c)) { + buf[0] = c; + buf[1] = '\0'; + return buf; + } + + buf[0] = '\\'; + switch (c) { + case '\n': + buf[1] = 'n'; + break; + case '\r': + buf[1] = 'r'; + break; + case '\t': + buf[1] = 't'; + break; + + default: /* Print as hex */ + buf[1] = 'x'; + sprintf(&buf[2], "%02hhx", c); + return buf; + } + buf[2] = '\0'; + return buf; +} + size_t readUTF8Char(uint8_t *dest, char const *src) { uint32_t state = 0; diff --git a/src/asm/warning.c b/src/asm/warning.c index f1fe00a1..f8ea7cb9 100644 --- a/src/asm/warning.c +++ b/src/asm/warning.c @@ -198,14 +198,14 @@ void processWarningFlag(char const *flag) warnx("Unknown warning `%s`", flag); } -void verror(const char *fmt, va_list args, char const *flag) +void printDiag(const char *fmt, va_list args, char const *type, + char const *flagfmt, char const *flag) { - fputs("ERROR: ", stderr); - fstk_Dump(); - fprintf(stderr, flag ? ": [-Werror=%s]\n " : ":\n ", flag); + fputs(type, stderr); + fstk_DumpCurrent(); + fprintf(stderr, flagfmt, flag); vfprintf(stderr, fmt, args); - fstk_DumpStringExpansions(); - nbErrors++; + lexer_DumpStringExpansions(); } void error(const char *fmt, ...) @@ -213,8 +213,9 @@ void error(const char *fmt, ...) va_list args; va_start(args, fmt); - verror(fmt, args, NULL); + printDiag(fmt, args, "ERROR: ", ":\n ", NULL); va_end(args); + nbErrors++; } noreturn_ void fatalerror(const char *fmt, ...) @@ -222,7 +223,7 @@ noreturn_ void fatalerror(const char *fmt, ...) va_list args; va_start(args, fmt); - verror(fmt, args, NULL); + printDiag(fmt, args, "FATAL: ", ":\n ", NULL); va_end(args); exit(1); @@ -240,7 +241,7 @@ void warning(enum WarningID id, char const *fmt, ...) return; case WARNING_ERROR: - verror(fmt, args, flag); + printDiag(fmt, args, "ERROR: ", ": [-Werror=%s]\n ", flag); va_end(args); return; @@ -252,11 +253,7 @@ void warning(enum WarningID id, char const *fmt, ...) break; } - fputs("warning: ", stderr); - fstk_Dump(); - fprintf(stderr, ": [-W%s]\n ", flag); - vfprintf(stderr, fmt, args); - fstk_DumpStringExpansions(); + printDiag(fmt, args, "warning: ", ": [-W%s]\n ", flag); va_end(args); } diff --git a/src/link/assign.c b/src/link/assign.c index c209ed9c..64244f01 100644 --- a/src/link/assign.c +++ b/src/link/assign.c @@ -81,14 +81,14 @@ static void processLinkerScript(void) /* Check if this doesn't conflict with what the code says */ if (section->isBankFixed && placement->bank != section->bank) - error("Linker script contradicts \"%s\"'s bank placement", + error(NULL, 0, "Linker script contradicts \"%s\"'s bank placement", section->name); if (section->isAddressFixed && placement->org != section->org) - error("Linker script contradicts \"%s\"'s address placement", + error(NULL, 0, "Linker script contradicts \"%s\"'s address placement", section->name); if (section->isAlignFixed && (placement->org & section->alignMask) != 0) - error("Linker script contradicts \"%s\"'s alignment", + error(NULL, 0, "Linker script contradicts \"%s\"'s alignment", section->name); section->isAddressFixed = true; diff --git a/src/link/main.c b/src/link/main.c index d88bc054..8d2b5a58 100644 --- a/src/link/main.c +++ b/src/link/main.c @@ -6,8 +6,10 @@ * SPDX-License-Identifier: MIT */ +#include #include #include +#include #include #include #include @@ -39,11 +41,55 @@ bool disablePadding; /* -x */ static uint32_t nbErrors = 0; -void error(char const *fmt, ...) +/***** Helper function to dump a file stack to stderr *****/ + +char const *dumpFileStack(struct FileStackNode const *node) +{ + char const *lastName; + + if (node->parent) { + lastName = dumpFileStack(node->parent); + /* REPT nodes use their parent's name */ + if (node->type != NODE_REPT) + lastName = node->name; + fprintf(stderr, "(%" PRIu32 ") -> %s", node->lineNo, lastName); + if (node->type == NODE_REPT) { + for (uint32_t i = 0; i < node->reptDepth; i++) + fprintf(stderr, "::REPT~%" PRIu32, node->iters[i]); + } + } else { + assert(node->type != NODE_REPT); + lastName = node->name; + fputs(lastName, stderr); + } + + return lastName; +} + +void warning(struct FileStackNode const *where, uint32_t lineNo, char const *fmt, ...) { va_list ap; - fprintf(stderr, "error: "); + fputs("warning: ", stderr); + if (where) { + dumpFileStack(where); + fprintf(stderr, "(%" PRIu32 "): ", lineNo); + } + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + putc('\n', stderr); +} + +void error(struct FileStackNode const *where, uint32_t lineNo, char const *fmt, ...) +{ + va_list ap; + + fputs("error: ", stderr); + if (where) { + dumpFileStack(where); + fprintf(stderr, "(%" PRIu32 "): ", lineNo); + } va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); @@ -53,11 +99,15 @@ void error(char const *fmt, ...) nbErrors++; } -noreturn_ void fatal(char const *fmt, ...) +noreturn_ void fatal(struct FileStackNode const *where, uint32_t lineNo, char const *fmt, ...) { va_list ap; - fprintf(stderr, "fatal: "); + fputs("fatal: ", stderr); + if (where) { + dumpFileStack(where); + fprintf(stderr, "(%" PRIu32 "): ", lineNo); + } va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); @@ -177,11 +227,11 @@ int main(int argc, char *argv[]) case 'p': value = strtoul(optarg, &endptr, 0); if (optarg[0] == '\0' || *endptr != '\0') { - error("Invalid argument for option 'p'"); + error(NULL, 0, "Invalid argument for option 'p'"); value = 0xFF; } if (value > 0xFF) { - error("Argument for 'p' must be a byte (between 0 and 0xFF)"); + error(NULL, 0, "Argument for 'p' must be a byte (between 0 and 0xFF)"); value = 0xFF; } padValue = value; @@ -189,7 +239,7 @@ int main(int argc, char *argv[]) case 's': /* FIXME: nobody knows what this does, figure it out */ (void)optarg; - warnx("Nobody has any idea what `-s` does"); + warning(NULL, 0, "Nobody has any idea what `-s` does"); break; case 't': is32kMode = true; @@ -234,8 +284,8 @@ int main(int argc, char *argv[]) bankranges[SECTTYPE_VRAM][1] = BANK_MIN_VRAM; /* Read all object files first, */ - while (curArgIndex < argc) - obj_ReadFile(argv[curArgIndex++]); + for (obj_Setup(argc - curArgIndex); curArgIndex < argc; curArgIndex++) + obj_ReadFile(argv[curArgIndex], argc - curArgIndex - 1); /* then process them, */ obj_DoSanityChecks(); diff --git a/src/link/object.c b/src/link/object.c index bd3c05ab..8e63bd2a 100644 --- a/src/link/object.c +++ b/src/link/object.c @@ -31,6 +31,11 @@ static struct SymbolList { struct SymbolList *next; } *symbolLists; +unsigned int nbObjFiles; +static struct { + struct FileStackNode *nodes; + uint32_t nbNodes; +} *nodes; static struct Assertion *assertions; /***** Helper functions for reading object files *****/ @@ -170,12 +175,56 @@ static char *readstr(FILE *file) /***** Functions to parse object files *****/ /** - * Reads a RGB6 symbol from a file. + * Reads a file stack node form a file. + * @param file The file to read from + * @param nodes The file's array of nodes + * @param i The ID of the node in the array + * @param fileName The filename to report in errors + */ +static void readFileStackNode(FILE *file, struct FileStackNode fileNodes[], uint32_t i, + char const *fileName) +{ + uint32_t parentID; + + tryReadlong(parentID, file, + "%s: Cannot read node #%" PRIu32 "'s parent ID: %s", fileName, i); + fileNodes[i].parent = parentID == -1 ? NULL : &fileNodes[parentID]; + tryReadlong(fileNodes[i].lineNo, file, + "%s: Cannot read node #%" PRIu32 "'s line number: %s", fileName, i); + tryGetc(fileNodes[i].type, file, "%s: Cannot read node #%" PRIu32 "'s type: %s", + fileName, i); + switch (fileNodes[i].type) { + case NODE_FILE: + case NODE_MACRO: + tryReadstr(fileNodes[i].name, file, + "%s: Cannot read node #%" PRIu32 "'s file name: %s", fileName, i); + break; + + case NODE_REPT: + tryReadlong(fileNodes[i].reptDepth, file, + "%s: Cannot read node #%" PRIu32 "'s rept depth: %s", fileName, i); + fileNodes[i].iters = malloc(sizeof(*fileNodes[i].iters) * fileNodes[i].reptDepth); + if (!fileNodes[i].iters) + fatal(NULL, 0, "%s: Failed to alloc node #%" PRIu32 "'s iters: %s", + fileName, i, strerror(errno)); + for (uint32_t k = 0; k < fileNodes[i].reptDepth; k++) + tryReadlong(fileNodes[i].iters[k], file, + "%s: Cannot read node #%" PRIu32 "'s iter #%" PRIu32 ": %s", + fileName, i, k); + if (!fileNodes[i].parent) + fatal(NULL, 0, "%s is not a valid object file: root node (#%" + PRIu32 ") may not be REPT", fileName, i); + } +} + +/** + * Reads a symbol from a file. * @param file The file to read from * @param symbol The struct to fill * @param fileName The filename to report in errors */ -static void readSymbol(FILE *file, struct Symbol *symbol, char const *fileName) +static void readSymbol(FILE *file, struct Symbol *symbol, + char const *fileName, struct FileStackNode fileNodes[]) { tryReadstr(symbol->name, file, "%s: Cannot read symbol name: %s", fileName); @@ -184,9 +233,12 @@ static void readSymbol(FILE *file, struct Symbol *symbol, char const *fileName) /* If the symbol is defined in this file, read its definition */ if (symbol->type != SYMTYPE_IMPORT) { symbol->objFileName = fileName; - tryReadstr(symbol->fileName, file, - "%s: Cannot read \"%s\"'s file name: %s", + uint32_t nodeID; + + tryReadlong(nodeID, file, + "%s: Cannot read \"%s\"'s node ID: %s", fileName, symbol->name); + symbol->src = &fileNodes[nodeID]; tryReadlong(symbol->lineNo, file, "%s: Cannot read \"%s\"'s line number: %s", fileName, symbol->name); @@ -202,7 +254,7 @@ static void readSymbol(FILE *file, struct Symbol *symbol, char const *fileName) } /** - * Reads a RGB6 patch from a file. + * Reads a patch from a file. * @param file The file to read from * @param patch The struct to fill * @param fileName The filename to report in errors @@ -210,20 +262,25 @@ static void readSymbol(FILE *file, struct Symbol *symbol, char const *fileName) */ static void readPatch(FILE *file, struct Patch *patch, char const *fileName, char const *sectName, uint32_t i, - struct Section *fileSections[]) + struct Section *fileSections[], struct FileStackNode fileNodes[]) { - tryReadstr(patch->fileName, file, - "%s: Unable to read \"%s\"'s patch #%" PRIu32 "'s name: %s", + uint32_t nodeID; + + tryReadlong(nodeID, file, + "%s: Unable to read \"%s\"'s patch #%" PRIu32 "'s node ID: %s", fileName, sectName, i); + patch->src = &fileNodes[nodeID]; + tryReadlong(patch->lineNo, file, + "%s: Unable to read \"%s\"'s patch #%" PRIu32 "'s line number: %s", + fileName, sectName, i); tryReadlong(patch->offset, file, "%s: Unable to read \"%s\"'s patch #%" PRIu32 "'s offset: %s", fileName, sectName, i); tryReadlong(patch->pcSectionID, file, "%s: Unable to read \"%s\"'s patch #%" PRIu32 "'s PC offset: %s", fileName, sectName, i); - patch->pcSection = patch->pcSectionID == -1 - ? NULL - : fileSections[patch->pcSectionID]; + patch->pcSection = patch->pcSectionID == -1 ? NULL + : fileSections[patch->pcSectionID]; tryReadlong(patch->pcOffset, file, "%s: Unable to read \"%s\"'s patch #%" PRIu32 "'s PC offset: %s", fileName, sectName, i); @@ -234,16 +291,17 @@ static void readPatch(FILE *file, struct Patch *patch, char const *fileName, "%s: Unable to read \"%s\"'s patch #%" PRIu32 "'s RPN size: %s", fileName, sectName, i); - uint8_t *rpnExpression = - malloc(sizeof(*rpnExpression) * patch->rpnSize); - size_t nbElementsRead = fread(rpnExpression, sizeof(*rpnExpression), + patch->rpnExpression = malloc(sizeof(*patch->rpnExpression) * patch->rpnSize); + if (!patch->rpnExpression) + err(1, "%s: Failed to alloc \"%s\"'s patch #%" PRIu32 "'s RPN expression", + fileName, sectName, i); + size_t nbElementsRead = fread(patch->rpnExpression, sizeof(*patch->rpnExpression), patch->rpnSize, file); if (nbElementsRead != patch->rpnSize) errx(1, "%s: Cannot read \"%s\"'s patch #%" PRIu32 "'s RPN expression: %s", fileName, sectName, i, feof(file) ? "Unexpected end of file" : strerror(errno)); - patch->rpnExpression = rpnExpression; } /** @@ -252,8 +310,8 @@ static void readPatch(FILE *file, struct Patch *patch, char const *fileName, * @param section The struct to fill * @param fileName The filename to report in errors */ -static void readSection(FILE *file, struct Section *section, - char const *fileName, struct Section *fileSections[]) +static void readSection(FILE *file, struct Section *section, char const *fileName, + struct Section *fileSections[], struct FileStackNode fileNodes[]) { int32_t tmp; uint8_t byte; @@ -280,7 +338,7 @@ static void readSection(FILE *file, struct Section *section, fileName, section->name); section->isAddressFixed = tmp >= 0; if (tmp > UINT16_MAX) { - error("\"%s\"'s org is too large (%" PRId32 ")", + error(NULL, 0, "\"%s\"'s org is too large (%" PRId32 ")", section->name, tmp); tmp = UINT16_MAX; } @@ -296,7 +354,7 @@ static void readSection(FILE *file, struct Section *section, tryReadlong(tmp, file, "%s: Cannot read \"%s\"'s alignment offset: %s", fileName, section->name); if (tmp > UINT16_MAX) { - error("\"%s\"'s alignment offset is too large (%" PRId32 ")", + error(NULL, 0, "\"%s\"'s alignment offset is too large (%" PRId32 ")", section->name, tmp); tmp = UINT16_MAX; } @@ -332,7 +390,7 @@ static void readSection(FILE *file, struct Section *section, section->name); for (uint32_t i = 0; i < section->nbPatches; i++) { readPatch(file, &patches[i], fileName, section->name, - i, fileSections); + i, fileSections, fileNodes); } section->patches = patches; } @@ -375,13 +433,13 @@ static void linkSymToSect(struct Symbol const *symbol, struct Section *section) */ static void readAssertion(FILE *file, struct Assertion *assert, char const *fileName, uint32_t i, - struct Section *fileSections[]) + struct Section *fileSections[], struct FileStackNode fileNodes[]) { char assertName[sizeof("Assertion #" EXPAND_AND_STR(UINT32_MAX))]; snprintf(assertName, sizeof(assertName), "Assertion #%" PRIu32, i); - readPatch(file, &assert->patch, fileName, assertName, 0, fileSections); + readPatch(file, &assert->patch, fileName, assertName, 0, fileSections, fileNodes); tryReadstr(assert->message, file, "%s: Cannot read assertion's message: %s", fileName); } @@ -394,11 +452,7 @@ static inline struct Section *getMainSection(struct Section *section) return section; } -/** - * Reads an object file of any supported format - * @param fileName The filename to report for errors - */ -void obj_ReadFile(char const *fileName) +void obj_ReadFile(char const *fileName, unsigned int fileID) { FILE *file = strcmp("-", fileName) ? fopen(fileName, "rb") : stdin; @@ -438,6 +492,14 @@ void obj_ReadFile(char const *fileName) nbSectionsToAssign += nbSections; + tryReadlong(nodes[fileID].nbNodes, file, "%s: Cannot read number of nodes: %s", fileName); + nodes[fileID].nodes = calloc(nodes[fileID].nbNodes, sizeof(nodes[fileID].nodes[0])); + if (!nodes[fileID].nodes) + err(1, "Failed to get memory for %s's nodes", fileName); + verbosePrint("Reading %u nodes...\n", nodes[fileID].nbNodes); + for (uint32_t i = 0; i < nodes[fileID].nbNodes; i++) + readFileStackNode(file, nodes[fileID].nodes, i, fileName); + /* This file's symbols, kept to link sections to them */ struct Symbol **fileSymbols = malloc(sizeof(*fileSymbols) * nbSymbols + 1); @@ -464,7 +526,7 @@ void obj_ReadFile(char const *fileName) if (!symbol) err(1, "%s: Couldn't create new symbol", fileName); - readSymbol(file, symbol, fileName); + readSymbol(file, symbol, fileName, nodes[fileID].nodes); fileSymbols[i] = symbol; if (symbol->type == SYMTYPE_EXPORT) @@ -485,7 +547,7 @@ void obj_ReadFile(char const *fileName) err(1, "%s: Couldn't create new section", fileName); fileSections[i]->nextu = NULL; - readSection(file, fileSections[i], fileName, fileSections); + readSection(file, fileSections[i], fileName, fileSections, nodes[fileID].nodes); fileSections[i]->fileSymbols = fileSymbols; if (nbSymPerSect[i]) { fileSections[i]->symbols = malloc(nbSymPerSect[i] @@ -535,7 +597,7 @@ void obj_ReadFile(char const *fileName) if (!assertion) err(1, "%s: Couldn't create new assertion", fileName); - readAssertion(file, assertion, fileName, i, fileSections); + readAssertion(file, assertion, fileName, i, fileSections, nodes[fileID].nodes); assertion->fileSymbols = fileSymbols; assertion->next = assertions; assertions = assertion; @@ -555,6 +617,15 @@ void obj_CheckAssertions(void) patch_CheckAssertions(assertions); } +void obj_Setup(unsigned int nbFiles) +{ + nbObjFiles = nbFiles; + + if (nbFiles > SIZE_MAX / sizeof(*nodes)) + fatal(NULL, 0, "Impossible to link more than %zu files!", SIZE_MAX / sizeof(*nodes)); + nodes = malloc(sizeof(*nodes) * nbFiles); +} + static void freeSection(struct Section *section, void *arg) { (void)arg; @@ -562,12 +633,8 @@ static void freeSection(struct Section *section, void *arg) free(section->name); if (sect_HasData(section->type)) { free(section->data); - for (int32_t i = 0; i < section->nbPatches; i++) { - struct Patch *patch = §ion->patches[i]; - - free(patch->fileName); - free(patch->rpnExpression); - } + for (int32_t i = 0; i < section->nbPatches; i++) + free(section->patches[i].rpnExpression); free(section->patches); } free(section->symbols); @@ -577,13 +644,20 @@ static void freeSection(struct Section *section, void *arg) static void freeSymbol(struct Symbol *symbol) { free(symbol->name); - if (symbol->type != SYMTYPE_IMPORT) - free(symbol->fileName); free(symbol); } void obj_Cleanup(void) { + for (unsigned int i = 0; i < nbObjFiles; i++) { + for (uint32_t j = 0; j < nodes[i].nbNodes; j++) { + if (nodes[i].nodes[j].type == NODE_REPT) + free(nodes[i].nodes[j].iters); + } + free(nodes[i].nodes); + } + free(nodes); + sym_CleanupSymbols(); sect_ForEach(freeSection, NULL); diff --git a/src/link/patch.c b/src/link/patch.c index 20b31c6e..97dbb762 100644 --- a/src/link/patch.c +++ b/src/link/patch.c @@ -6,11 +6,13 @@ * SPDX-License-Identifier: MIT */ +#include #include #include #include #include +#include "link/object.h" #include "link/patch.h" #include "link/section.h" #include "link/symbol.h" @@ -104,10 +106,10 @@ static void pushRPN(int32_t value) stack.size++; } -static int32_t popRPN(char const *fileName) +static int32_t popRPN(struct FileStackNode const *node, uint32_t lineNo) { if (stack.size == 0) - errx(1, "%s: Internal error, RPN stack empty", fileName); + fatal(node, lineNo, "Internal error, RPN stack empty"); stack.size--; return stack.buf[stack.size]; @@ -121,16 +123,18 @@ static inline void freeRPNStack(void) /* RPN operators */ static uint32_t getRPNByte(uint8_t const **expression, int32_t *size, - char const *fileName) + struct FileStackNode const *node, uint32_t lineNo) { if (!(*size)--) - errx(1, "%s: RPN expression overread", fileName); + fatal(node, lineNo, "Internal error, RPN expression overread"); + return *(*expression)++; } static struct Symbol const *getSymbol(struct Symbol const * const *symbolList, uint32_t index) { + assert(index != -1); /* PC needs to be handled specially, not here */ struct Symbol const *symbol = symbolList[index]; /* If the symbol is defined elsewhere... */ @@ -150,7 +154,7 @@ static int32_t computeRPNExpr(struct Patch const *patch, struct Symbol const * const *fileSymbols) { /* Small shortcut to avoid a lot of repetition */ -#define popRPN() popRPN(patch->fileName) +#define popRPN() popRPN(patch->src, patch->lineNo) uint8_t const *expression = patch->rpnExpression; int32_t size = patch->rpnSize; @@ -159,7 +163,7 @@ static int32_t computeRPNExpr(struct Patch const *patch, while (size > 0) { enum RPNCommand command = getRPNByte(&expression, &size, - patch->fileName); + patch->src, patch->lineNo); int32_t value; /* @@ -187,7 +191,7 @@ static int32_t computeRPNExpr(struct Patch const *patch, case RPN_DIV: value = popRPN(); if (value == 0) { - error("%s: Division by 0", patch->fileName); + error(patch->src, patch->lineNo, "Division by 0"); popRPN(); value = INT32_MAX; } else { @@ -197,7 +201,7 @@ static int32_t computeRPNExpr(struct Patch const *patch, case RPN_MOD: value = popRPN(); if (value == 0) { - error("%s: Modulo by 0", patch->fileName); + error(patch->src, patch->lineNo, "Modulo by 0"); popRPN(); value = 0; } else { @@ -269,17 +273,17 @@ static int32_t computeRPNExpr(struct Patch const *patch, value = 0; for (uint8_t shift = 0; shift < 32; shift += 8) value |= getRPNByte(&expression, &size, - patch->fileName) << shift; + patch->src, patch->lineNo) << shift; symbol = getSymbol(fileSymbols, value); if (!symbol) { - error("%s: Requested BANK() of symbol \"%s\", which was not found", - patch->fileName, + error(patch->src, patch->lineNo, + "Requested BANK() of symbol \"%s\", which was not found", fileSymbols[value]->name); value = 1; } else if (!symbol->section) { - error("%s: Requested BANK() of non-label symbol \"%s\"", - patch->fileName, + error(patch->src, patch->lineNo, + "Requested BANK() of non-label symbol \"%s\"", fileSymbols[value]->name); value = 1; } else { @@ -289,14 +293,15 @@ static int32_t computeRPNExpr(struct Patch const *patch, case RPN_BANK_SECT: name = (char const *)expression; - while (getRPNByte(&expression, &size, patch->fileName)) + while (getRPNByte(&expression, &size, patch->src, patch->lineNo)) ; sect = sect_GetSection(name); if (!sect) { - error("%s: Requested BANK() of section \"%s\", which was not found", - patch->fileName, name); + error(patch->src, patch->lineNo, + "Requested BANK() of section \"%s\", which was not found", + name); value = 1; } else { value = sect->bank; @@ -305,7 +310,8 @@ static int32_t computeRPNExpr(struct Patch const *patch, case RPN_BANK_SELF: if (!patch->pcSection) { - error("%s: PC has no bank outside a section"); + error(patch->src, patch->lineNo, + "PC has no bank outside a section"); value = 1; } else { value = patch->pcSection->bank; @@ -317,8 +323,8 @@ static int32_t computeRPNExpr(struct Patch const *patch, if (value < 0 || (value > 0xFF && value < 0xFF00) || value > 0xFFFF) - error("%s: Value %" PRId32 " is not in HRAM range", - patch->fileName, value); + error(patch->src, patch->lineNo, + "Value %" PRId32 " is not in HRAM range", value); value &= 0xFF; break; @@ -328,8 +334,8 @@ static int32_t computeRPNExpr(struct Patch const *patch, * They can be easily checked with a bitmask */ if (value & ~0x38) - error("%s: Value %" PRId32 " is not a RST vector", - patch->fileName, value); + error(patch->src, patch->lineNo, + "Value %" PRId32 " is not a RST vector", value); value |= 0xC7; break; @@ -337,32 +343,35 @@ static int32_t computeRPNExpr(struct Patch const *patch, value = 0; for (uint8_t shift = 0; shift < 32; shift += 8) value |= getRPNByte(&expression, &size, - patch->fileName) << shift; + patch->src, patch->lineNo) << shift; break; case RPN_SYM: value = 0; for (uint8_t shift = 0; shift < 32; shift += 8) value |= getRPNByte(&expression, &size, - patch->fileName) << shift; + patch->src, patch->lineNo) << shift; - symbol = getSymbol(fileSymbols, value); - - if (!symbol) { - error("%s: Unknown symbol \"%s\"", - patch->fileName, - fileSymbols[value]->name); - } else if (strcmp(symbol->name, "@")) { - value = symbol->value; - /* Symbols attached to sections have offsets */ - if (symbol->section) - value += symbol->section->org; - } else if (!patch->pcSection) { - error("%s: PC has no value outside a section", - patch->fileName); - value = 0; + if (value == -1) { /* PC */ + if (!patch->pcSection) { + error(patch->src, patch->lineNo, + "PC has no value outside a section"); + value = 0; + } else { + value = patch->pcOffset + patch->pcSection->org; + } } else { - value = patch->pcOffset + patch->pcSection->org; + symbol = getSymbol(fileSymbols, value); + + if (!symbol) { + error(patch->src, patch->lineNo, + "Unknown symbol \"%s\"", fileSymbols[value]->name); + } else { + value = symbol->value; + /* Symbols attached to sections have offsets */ + if (symbol->section) + value += symbol->section->org; + } } break; } @@ -371,8 +380,8 @@ static int32_t computeRPNExpr(struct Patch const *patch, } if (stack.size > 1) - error("%s: RPN stack has %zu entries on exit, not 1", - patch->fileName, stack.size); + error(patch->src, patch->lineNo, + "RPN stack has %zu entries on exit, not 1", stack.size); return popRPN(); @@ -390,20 +399,20 @@ void patch_CheckAssertions(struct Assertion *assert) assert->fileSymbols)) { switch ((enum AssertionType)assert->patch.type) { case ASSERT_FATAL: - fatal("%s: %s", assert->patch.fileName, + fatal(assert->patch.src, assert->patch.lineNo, "%s", assert->message[0] ? assert->message : "assert failure"); /* Not reached */ break; /* Here so checkpatch doesn't complain */ case ASSERT_ERROR: - error("%s: %s", assert->patch.fileName, + error(assert->patch.src, assert->patch.lineNo, "%s", assert->message[0] ? assert->message : "assert failure"); break; case ASSERT_WARN: - warnx("%s: %s", assert->patch.fileName, - assert->message[0] ? assert->message - : "assert failure"); + warning(assert->patch.src, assert->patch.lineNo, "%s", + assert->message[0] ? assert->message + : "assert failure"); break; } } @@ -442,8 +451,9 @@ static void applyFilePatches(struct Section *section, struct Section *dataSectio int16_t jumpOffset = value - address; if (jumpOffset < -128 || jumpOffset > 127) - error("%s: jr target out of reach (expected -129 < %" PRId16 " < 128)", - patch->fileName, jumpOffset); + error(patch->src, patch->lineNo, + "jr target out of reach (expected -129 < %" PRId16 " < 128)", + jumpOffset); dataSection->data[offset] = jumpOffset & 0xFF; } else { /* Patch a certain number of bytes */ @@ -459,9 +469,9 @@ static void applyFilePatches(struct Section *section, struct Section *dataSectio if (value < types[patch->type].min || value > types[patch->type].max) - error("%s: Value %#" PRIx32 "%s is not %u-bit", - patch->fileName, value, - value < 0 ? " (maybe negative?)" : "", + error(patch->src, patch->lineNo, + "Value %#" PRIx32 "%s is not %u-bit", + value, value < 0 ? " (maybe negative?)" : "", types[patch->type].size * 8U); for (uint8_t i = 0; i < types[patch->type].size; i++) { dataSection->data[offset + i] = value & 0xFF; diff --git a/src/link/symbol.c b/src/link/symbol.c index 051a7db9..483c2c04 100644 --- a/src/link/symbol.c +++ b/src/link/symbol.c @@ -8,9 +8,12 @@ #include #include +#include +#include "link/object.h" #include "link/symbol.h" #include "link/main.h" + #include "extern/err.h" #include "hashmap.h" @@ -40,11 +43,15 @@ void sym_AddSymbol(struct Symbol *symbol) /* Check if the symbol already exists */ struct Symbol *other = hash_GetElement(symbols, symbol->name); - if (other) - errx(1, "\"%s\" both in %s from %s(%" PRId32 ") and in %s from %s(%" PRId32 ")", - symbol->name, - symbol->objFileName, symbol->fileName, symbol->lineNo, - other->objFileName, other->fileName, other->lineNo); + if (other) { + fprintf(stderr, "error: \"%s\" both in %s from ", symbol->name, symbol->objFileName); + dumpFileStack(symbol->src); + fprintf(stderr, "(%" PRIu32 ") and in %s from ", + symbol->lineNo, other->objFileName); + dumpFileStack(other->src); + fprintf(stderr, "(%" PRIu32 ")\n", other->lineNo); + exit(1); + } /* If not, add it */ bool collided = hash_AddElement(symbols, symbol->name, symbol); diff --git a/src/rgbds.5 b/src/rgbds.5 index 70de8e94..9c4b5530 100644 --- a/src/rgbds.5 +++ b/src/rgbds.5 @@ -16,7 +16,7 @@ This is the description of the object files used by .Xr rgbasm 1 and .Xr rgblink 1 . -.Em Please note that the specifications may change. +.Em Please note that the specifications may change . This toolchain is in development and new features may require adding more information to the current format, or modifying some fields, which would break compatibility with older versions. .Pp .Sh FILE STRUCTURE @@ -34,34 +34,67 @@ is a 0‐terminated string of ; Header BYTE ID[4] ; "RGB9" -LONG RevisionNumber ; The format's revision number this file uses -LONG NumberOfSymbols ; The number of symbols used in this file -LONG NumberOfSections ; The number of sections used in this file +LONG RevisionNumber ; The format's revision number this file uses. +LONG NumberOfSymbols ; The number of symbols used in this file. +LONG NumberOfSections ; The number of sections used in this file. + +; File info + +LONG NumberOfNodes ; The number of nodes contained in this file. + +REPT NumberOfNodes ; IMPORTANT NOTE: the nodes are actually written in + ; **reverse** order, meaningthe node with ID 0 is + ; the last one in the file! + + LONG ParentID ; ID of the parent node, -1 means this is the root. + + LONG ParentLineNo ; Line at which the parent context was exited. + ; Meaningless on the root node. + + BYTE Type ; 0 = REPT node + ; 1 = File node + ; 2 = Macro node + + IF Type != 0 ; If the node is not a REPT... + + STRING Name ; The node's name: either a file name, or macro name + ; prefixed by its definition file name. + + ELSE ; If the node is a REPT, it also contains the iter + ; counts of all the parent REPTs. + + LONG Depth ; Size of the array below. + + LONG Iter[Depth] ; The number of REPT iterations by increasing depth. + + ENDC + +ENDR ; Symbols -REPT NumberOfSymbols ; Number of symbols defined in this object file. +REPT NumberOfSymbols ; Number of symbols defined in this object file. - STRING Name ; The name of this symbol. Local symbols are stored - ; as "Scope.Symbol". + STRING Name ; The name of this symbol. Local symbols are stored + ; as "Scope.Symbol". - BYTE Type ; 0 = LOCAL symbol only used in this file. - ; 1 = IMPORT this symbol from elsewhere - ; 2 = EXPORT this symbol to other objects. + BYTE Type ; 0 = LOCAL symbol only used in this file. + ; 1 = IMPORT this symbol from elsewhere + ; 2 = EXPORT this symbol to other objects. - IF (Type & 0x7F) != 1 ; If symbol is defined in this object file. + IF (Type & 0x7F) != 1 ; If symbol is defined in this object file. - STRING FileName ; File where the symbol is defined. + LONG SourceFile ; File where the symbol is defined. - LONG LineNum ; Line number in the file where the symbol is defined. + LONG LineNum ; Line number in the file where the symbol is defined. - LONG SectionID ; The section number (of this object file) in which - ; this symbol is defined. If it doesn't belong to any - ; specific section (like a constant), this field has - ; the value -1. + LONG SectionID ; The section number (of this object file) in which + ; this symbol is defined. If it doesn't belong to any + ; specific section (like a constant), this field has + ; the value -1. - LONG Value ; The symbols value. It's the offset into that - ; symbol's section. + LONG Value ; The symbols value. It's the offset into that + ; symbol's section. ENDC @@ -107,8 +140,10 @@ REPT NumberOfSections REPT NumberOfPatches - STRING SourceFile ; Name of the source file (for printing error - ; messages). + LONG SourceFile ; ID of the source file node (for printing + ; error messages). + + LONG LineNo ; Line at which the patch was created. LONG Offset ; Offset into the section where patch should ; be applied (in bytes). @@ -145,7 +180,9 @@ LONG NumberOfAssertions REPT NumberOfAssertions - STRING SourceFile ; Name of the source file (for printing the failure). + LONG SourceFile ; ID of the source file node (for printing the failure). + + LONG LineNo ; Line at which the assertion was created. LONG Offset ; Offset into the section where the assertion is located. @@ -209,7 +246,7 @@ with some bytes being special prefixes for integers and symbols. .It Li $50 Ta Li BANK(symbol) , a .Ar LONG -Symbol ID follows. +Symbol ID follows, where -1 means PC .It Li $51 Ta Li BANK(section_name) , a null-terminated string follows. .It Li $52 Ta Li Current BANK() diff --git a/test/asm/assert.err b/test/asm/assert.err index e4ef868d..8ce4402f 100644 --- a/test/asm/assert.err +++ b/test/asm/assert.err @@ -6,5 +6,5 @@ ERROR: assert.asm(18): Expected constant expression: 'FloatingBase' is not constant at assembly time ERROR: assert.asm(18): Assertion failed -ERROR: assert.asm(21): +FATAL: assert.asm(21): Assertion failed diff --git a/test/asm/divzero-instr.err b/test/asm/divzero-instr.err index e1e0e3e2..6a7ed04c 100644 --- a/test/asm/divzero-instr.err +++ b/test/asm/divzero-instr.err @@ -1,2 +1,2 @@ -ERROR: divzero-instr.asm(2): +FATAL: divzero-instr.asm(2): Division by zero diff --git a/test/asm/divzero-section-bank.err b/test/asm/divzero-section-bank.err index b382e48f..31044dc0 100644 --- a/test/asm/divzero-section-bank.err +++ b/test/asm/divzero-section-bank.err @@ -1,2 +1,2 @@ -ERROR: divzero-section-bank.asm(1): +FATAL: divzero-section-bank.asm(1): Division by zero diff --git a/test/asm/equs-nest.asm b/test/asm/equs-nest.asm new file mode 100644 index 00000000..f2f23a8a --- /dev/null +++ b/test/asm/equs-nest.asm @@ -0,0 +1,4 @@ +X1 equs "Y1 equs \"\\\"Success!\\\\n\\\"\"" +Y1 equs "Z1" +X1 + PRINTT Z1 diff --git a/test/asm/equs-nest.err b/test/asm/equs-nest.err new file mode 100644 index 00000000..e69de29b diff --git a/test/asm/equs-nest.out b/test/asm/equs-nest.out new file mode 100644 index 00000000..f985b46a --- /dev/null +++ b/test/asm/equs-nest.out @@ -0,0 +1 @@ +Success! diff --git a/test/asm/equs-newline.asm b/test/asm/equs-newline.asm new file mode 100644 index 00000000..571577ec --- /dev/null +++ b/test/asm/equs-newline.asm @@ -0,0 +1,4 @@ + +ACT equs "WARN \"First\"\nWARN \"Second\"" + ACT + WARN "Third" diff --git a/test/asm/equs-newline.err b/test/asm/equs-newline.err new file mode 100644 index 00000000..04ceb7be --- /dev/null +++ b/test/asm/equs-newline.err @@ -0,0 +1,7 @@ +warning: equs-newline.asm(3): [-Wuser] + First +while expanding symbol "ACT" +warning: equs-newline.asm(3): [-Wuser] + Second +warning: equs-newline.asm(4): [-Wuser] + Third diff --git a/test/asm/equs-newline.out b/test/asm/equs-newline.out new file mode 100644 index 00000000..e69de29b diff --git a/test/asm/equs-purge.asm b/test/asm/equs-purge.asm new file mode 100644 index 00000000..2cd7659c --- /dev/null +++ b/test/asm/equs-purge.asm @@ -0,0 +1,2 @@ +BYE equs "PURGE BYE\nWARN \"Crash?\"\n \n" +BYE diff --git a/test/asm/equs-purge.err b/test/asm/equs-purge.err new file mode 100644 index 00000000..c42c2bac --- /dev/null +++ b/test/asm/equs-purge.err @@ -0,0 +1,3 @@ +warning: equs-purge.asm(2): [-Wuser] + Crash? +while expanding symbol "BYE" diff --git a/test/asm/equs-purge.out b/test/asm/equs-purge.out new file mode 100644 index 00000000..e69de29b diff --git a/test/asm/equs-recursion.asm b/test/asm/equs-recursion.asm index 794f4960..3aae9053 100644 --- a/test/asm/equs-recursion.asm +++ b/test/asm/equs-recursion.asm @@ -1,2 +1,6 @@ -recurse EQUS "recurse" -recurse \ No newline at end of file +recurse EQUS "recurse " +recurse + +; FIXME: also handle the following: +; recurse EQUS "recurse" +; recurse diff --git a/test/asm/equs-recursion.err b/test/asm/equs-recursion.err index c6180ed3..002d0490 100644 --- a/test/asm/equs-recursion.err +++ b/test/asm/equs-recursion.err @@ -1,4 +1,4 @@ -ERROR: equs-recursion.asm(2): +FATAL: equs-recursion.asm(2): Recursion limit (64) exceeded while expanding symbol "recurse" while expanding symbol "recurse" @@ -64,3 +64,4 @@ while expanding symbol "recurse" while expanding symbol "recurse" while expanding symbol "recurse" while expanding symbol "recurse" +while expanding symbol "recurse" diff --git a/test/asm/file-sym.asm b/test/asm/file-sym.asm new file mode 100644 index 00000000..25782859 --- /dev/null +++ b/test/asm/file-sym.asm @@ -0,0 +1 @@ +PRINTT "{__FILE__}\n" diff --git a/test/asm/file-sym.err b/test/asm/file-sym.err new file mode 100644 index 00000000..e69de29b diff --git a/test/asm/file-sym.out b/test/asm/file-sym.out new file mode 100644 index 00000000..c3259a8b --- /dev/null +++ b/test/asm/file-sym.out @@ -0,0 +1 @@ +"file-sym.asm" diff --git a/test/asm/garbage_char.asm b/test/asm/garbage_char.asm index ca5f132c..29e181eb 100644 --- a/test/asm/garbage_char.asm +++ b/test/asm/garbage_char.asm @@ -1 +1 @@ -x \ No newline at end of file + diff --git a/test/asm/garbage_char.err b/test/asm/garbage_char.err index b2a30cb7..7f51a3e4 100644 --- a/test/asm/garbage_char.err +++ b/test/asm/garbage_char.err @@ -1,2 +1,3 @@ ERROR: garbage_char.asm(1): - Found garbage character: 0xFF + Unknown character 0xFF +error: Assembly aborted (1 errors)! diff --git a/test/asm/if-macro.asm b/test/asm/if-macro.asm new file mode 100644 index 00000000..427e5eb3 --- /dev/null +++ b/test/asm/if-macro.asm @@ -0,0 +1,13 @@ +m: macro + if 0 + WARN "3" + else + WARN "5" + endc +endm + +if 1 + m +else + WARN "12" +endc diff --git a/test/asm/if-macro.err b/test/asm/if-macro.err new file mode 100644 index 00000000..23463fe9 --- /dev/null +++ b/test/asm/if-macro.err @@ -0,0 +1,2 @@ +warning: if-macro.asm(10) -> if-macro.asm::m(5): [-Wuser] + 5 diff --git a/test/asm/if-macro.out b/test/asm/if-macro.out new file mode 100644 index 00000000..e69de29b diff --git a/test/asm/if@-no-sect.asm b/test/asm/if@-no-sect.asm index 7c6072d9..5ea733f7 100644 --- a/test/asm/if@-no-sect.asm +++ b/test/asm/if@-no-sect.asm @@ -1,2 +1,2 @@ -if {@} +if "{@}" endc diff --git a/test/asm/include-recursion.err b/test/asm/include-recursion.err index 8a256e6b..1d598df3 100644 --- a/test/asm/include-recursion.err +++ b/test/asm/include-recursion.err @@ -1,2 +1,2 @@ -ERROR: include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1): +FATAL: include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1): Recursion limit (64) exceeded diff --git a/test/asm/label-redefinition.err b/test/asm/label-redefinition.err index 09eb79b3..59664cf5 100644 --- a/test/asm/label-redefinition.err +++ b/test/asm/label-redefinition.err @@ -1,3 +1,3 @@ ERROR: label-redefinition.asm(7): - 'Sym' already defined in label-redefinition.asm::m(6) + 'Sym' already defined at label-redefinition.asm(6) -> label-redefinition.asm::m(4) error: Assembly aborted (1 errors)! diff --git a/test/asm/line-continuation-macro.asm b/test/asm/line-continuation-macro.asm index b6b40907..4ea1f6bf 100644 --- a/test/asm/line-continuation-macro.asm +++ b/test/asm/line-continuation-macro.asm @@ -2,6 +2,7 @@ m: MACRO ENDM m2: MACRO - m \ ENDM + m \ +ENDM m2 diff --git a/test/asm/line-continuation-rept.asm b/test/asm/line-continuation-rept.asm index 90a354ad..e62fddef 100644 --- a/test/asm/line-continuation-rept.asm +++ b/test/asm/line-continuation-rept.asm @@ -2,7 +2,9 @@ m: MACRO ENDM REPT 1 - m ENDR + m +ENDR REPT 1 - m \ ENDR + m \ +ENDR diff --git a/test/asm/line-continuation-whitespace.asm b/test/asm/line-continuation-whitespace.asm index 43b1fa33..404e536a 100644 --- a/test/asm/line-continuation-whitespace.asm +++ b/test/asm/line-continuation-whitespace.asm @@ -4,4 +4,4 @@ bar: MACRO ENDM -foo: bar baz\ +foo: bar baz\ diff --git a/test/asm/load-overflow.err b/test/asm/load-overflow.err index be3e50b9..68bcfc5a 100644 --- a/test/asm/load-overflow.err +++ b/test/asm/load-overflow.err @@ -1,2 +1,2 @@ -ERROR: load-overflow.asm(4): +FATAL: load-overflow.asm(4): Section 'Overflow' grew too big (max size = 0x8000 bytes, reached 0x8001). diff --git a/test/asm/local-purge.err b/test/asm/local-purge.err index 4fb44b6e..3daacd32 100644 --- a/test/asm/local-purge.err +++ b/test/asm/local-purge.err @@ -1,3 +1,3 @@ ERROR: local-purge.asm(8): - '.loc' not defined + Interpolated symbol ".loc" does not exist error: Assembly aborted (1 errors)! diff --git a/test/asm/local-purge.out b/test/asm/local-purge.out index ebfabbe4..8b137891 100644 --- a/test/asm/local-purge.out +++ b/test/asm/local-purge.out @@ -1 +1 @@ -$0 + diff --git a/test/asm/local-ref-without-parent.err b/test/asm/local-ref-without-parent.err index fd784ef3..d332bdf9 100644 --- a/test/asm/local-ref-without-parent.err +++ b/test/asm/local-ref-without-parent.err @@ -1,2 +1,2 @@ -ERROR: local-ref-without-parent.asm(3): +FATAL: local-ref-without-parent.asm(3): Local label reference '.test' in main scope diff --git a/test/asm/macro-line-no.asm b/test/asm/macro-line-no.asm new file mode 100644 index 00000000..89adebbb --- /dev/null +++ b/test/asm/macro-line-no.asm @@ -0,0 +1,8 @@ + +WARN "Line 2" +m: macro + WARN "Line 4" +endm +WARN "Line 6" + m +WARN "Line 8" diff --git a/test/asm/macro-line-no.err b/test/asm/macro-line-no.err new file mode 100644 index 00000000..947639e5 --- /dev/null +++ b/test/asm/macro-line-no.err @@ -0,0 +1,8 @@ +warning: macro-line-no.asm(2): [-Wuser] + Line 2 +warning: macro-line-no.asm(6): [-Wuser] + Line 6 +warning: macro-line-no.asm(7) -> macro-line-no.asm::m(4): [-Wuser] + Line 4 +warning: macro-line-no.asm(8): [-Wuser] + Line 8 diff --git a/test/asm/macro-line-no.out b/test/asm/macro-line-no.out new file mode 100644 index 00000000..e69de29b diff --git a/test/asm/macro-recursion.err b/test/asm/macro-recursion.err index e69c4c5f..d515ed61 100644 --- a/test/asm/macro-recursion.err +++ b/test/asm/macro-recursion.err @@ -1,2 +1,2 @@ -ERROR: macro-recursion.asm(4) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2): +FATAL: macro-recursion.asm(4) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2): Recursion limit (64) exceeded diff --git a/test/asm/narg-nosect.asm b/test/asm/narg-nosect.asm new file mode 100644 index 00000000..54099434 --- /dev/null +++ b/test/asm/narg-nosect.asm @@ -0,0 +1 @@ + PRINTT "{_NARG}\n" diff --git a/test/asm/narg-nosect.err b/test/asm/narg-nosect.err new file mode 100644 index 00000000..918c3b11 --- /dev/null +++ b/test/asm/narg-nosect.err @@ -0,0 +1,3 @@ +ERROR: narg-nosect.asm(1): + _NARG does not make sense outside of a macro +error: Assembly aborted (1 errors)! diff --git a/test/asm/narg-nosect.out b/test/asm/narg-nosect.out new file mode 100644 index 00000000..ebfabbe4 --- /dev/null +++ b/test/asm/narg-nosect.out @@ -0,0 +1 @@ +$0 diff --git a/test/asm/nested-brackets.asm b/test/asm/nested-brackets.asm index 0823286a..344d9f8c 100644 --- a/test/asm/nested-brackets.asm +++ b/test/asm/nested-brackets.asm @@ -3,3 +3,4 @@ WRAPPER equs "TRIN" PRINTT "{S{WRAPPER}G}\n" PRINTT "{S{WRAPPER}G" + PRINTT "\n" diff --git a/test/asm/nested-brackets.err b/test/asm/nested-brackets.err index b3e00643..24035a4f 100644 --- a/test/asm/nested-brackets.err +++ b/test/asm/nested-brackets.err @@ -1,2 +1,3 @@ ERROR: nested-brackets.asm(5): Missing } +error: Assembly aborted (1 errors)! diff --git a/test/asm/nested-brackets.out b/test/asm/nested-brackets.out index d86bac9d..2c94e483 100644 --- a/test/asm/nested-brackets.out +++ b/test/asm/nested-brackets.out @@ -1 +1,2 @@ OK +OK diff --git a/test/asm/null-in-macro.asm b/test/asm/null-in-macro.asm index c9f87a3d..944922e8 100644 Binary files a/test/asm/null-in-macro.asm and b/test/asm/null-in-macro.asm differ diff --git a/test/asm/null-in-macro.err b/test/asm/null-in-macro.err index 90943ee0..4c11181a 100644 --- a/test/asm/null-in-macro.err +++ b/test/asm/null-in-macro.err @@ -1,2 +1,3 @@ -ERROR: null-in-macro.asm(2): - Found null character +ERROR: null-in-macro.asm(4) -> null-in-macro.asm::foo(2): + Unknown character 0x00 +error: Assembly aborted (1 errors)! diff --git a/test/asm/opt-b.asm b/test/asm/opt-b.asm new file mode 100644 index 00000000..293421ed --- /dev/null +++ b/test/asm/opt-b.asm @@ -0,0 +1,3 @@ +OPT b.X +PRINTV %..X.X.X. +PRINTT "\n" diff --git a/test/asm/opt-b.err b/test/asm/opt-b.err new file mode 100644 index 00000000..e69de29b diff --git a/test/asm/opt-b.out b/test/asm/opt-b.out new file mode 100644 index 00000000..72555870 --- /dev/null +++ b/test/asm/opt-b.out @@ -0,0 +1 @@ +$2A diff --git a/test/asm/opt-g.asm b/test/asm/opt-g.asm new file mode 100644 index 00000000..a4cd3a38 --- /dev/null +++ b/test/asm/opt-g.asm @@ -0,0 +1,3 @@ +OPT g.x0X +PRINTV `.x.x0X0X +PRINTT "\n" diff --git a/test/asm/opt-g.err b/test/asm/opt-g.err new file mode 100644 index 00000000..e69de29b diff --git a/test/asm/opt-g.out b/test/asm/opt-g.out new file mode 100644 index 00000000..78f6c0a7 --- /dev/null +++ b/test/asm/opt-g.out @@ -0,0 +1 @@ +$F55 diff --git a/test/asm/overflow.err b/test/asm/overflow.err index b51baa5e..a3b44942 100644 --- a/test/asm/overflow.err +++ b/test/asm/overflow.err @@ -3,6 +3,6 @@ warning: overflow.asm(24): [-Wdiv] warning: overflow.asm(25): [-Wdiv] Division of -2147483648 by -1 yields -2147483648 warning: overflow.asm(39): [-Wlarge-constant] - Integer constant '4294967296' is too large + Integer constant is too large warning: overflow.asm(42): [-Wlarge-constant] - Graphics constant '`333333333' is too long + Graphics constant is too long, only 8 first pixels considered diff --git a/test/asm/pops-no-pushed-sections.err b/test/asm/pops-no-pushed-sections.err index 4f8c06c1..7dfcea48 100644 --- a/test/asm/pops-no-pushed-sections.err +++ b/test/asm/pops-no-pushed-sections.err @@ -1,2 +1,2 @@ -ERROR: pops-no-pushed-sections.asm(1): +FATAL: pops-no-pushed-sections.asm(1): No entries in the section stack diff --git a/test/asm/pops-restore-no-section.err b/test/asm/pops-restore-no-section.err index 4eac4442..c00efd30 100644 --- a/test/asm/pops-restore-no-section.err +++ b/test/asm/pops-restore-no-section.err @@ -1,4 +1,4 @@ ERROR: pops-restore-no-section.asm(9): Label "DisallowedContent" created outside of a SECTION -ERROR: pops-restore-no-section.asm(10): +FATAL: pops-restore-no-section.asm(10): Code generation before SECTION directive diff --git a/test/asm/remote-local-noexist.err b/test/asm/remote-local-noexist.err index f1fbac28..3193664b 100644 --- a/test/asm/remote-local-noexist.err +++ b/test/asm/remote-local-noexist.err @@ -1,2 +1,2 @@ -ERROR: remote-local-noexist.asm(7): +FATAL: remote-local-noexist.asm(7): 'Parent.child.fail' is a nonsensical reference to a nested local symbol diff --git a/test/asm/rept-0.asm b/test/asm/rept-0.asm new file mode 100644 index 00000000..13a52a69 --- /dev/null +++ b/test/asm/rept-0.asm @@ -0,0 +1,3 @@ +REPT 0 + WARN "2" +ENDR diff --git a/test/asm/rept-0.err b/test/asm/rept-0.err new file mode 100644 index 00000000..e69de29b diff --git a/test/asm/rept-0.out b/test/asm/rept-0.out new file mode 100644 index 00000000..e69de29b diff --git a/test/asm/rept-shift.err b/test/asm/rept-shift.err index 531d3cd3..30fd3490 100644 --- a/test/asm/rept-shift.err +++ b/test/asm/rept-shift.err @@ -1,2 +1,2 @@ -ERROR: rept-shift.asm(17) -> rept-shift.asm::m(14): +FATAL: rept-shift.asm(17) -> rept-shift.asm::m(14): Macro argument '\1' not defined diff --git a/test/asm/section-union.err b/test/asm/section-union.err index e50f5ceb..c0c2921f 100644 --- a/test/asm/section-union.err +++ b/test/asm/section-union.err @@ -6,5 +6,5 @@ ERROR: section-union.asm(37): Section "test" already declared as fixed at $c000 ERROR: section-union.asm(37): Section "test" already declared as aligned to 256 bytes -ERROR: section-union.asm(37): +FATAL: section-union.asm(37): Cannot create section "test" (3 errors) diff --git a/test/asm/sym-collision.err b/test/asm/sym-collision.err index 53a25652..29a82c43 100644 --- a/test/asm/sym-collision.err +++ b/test/asm/sym-collision.err @@ -1,3 +1,3 @@ ERROR: sym-collision.asm(26): - 'dork' not defined + Interpolated symbol "dork" does not exist error: Assembly aborted (1 errors)! diff --git a/test/asm/sym-collision.out b/test/asm/sym-collision.out index 5f7cb010..cab13a55 100644 --- a/test/asm/sym-collision.out +++ b/test/asm/sym-collision.out @@ -1,7 +1,7 @@ aqfj: $FE00 cxje: $FE01 dgsd: $FE02 -dork: $0 +dork: lxok: $FE04 psgp: $FE05 sfly: $FE06 diff --git a/test/asm/symbol-invalid-macro-arg.err b/test/asm/symbol-invalid-macro-arg.err index ff94c3c2..2e03eab6 100644 --- a/test/asm/symbol-invalid-macro-arg.err +++ b/test/asm/symbol-invalid-macro-arg.err @@ -1,2 +1,2 @@ -ERROR: symbol-invalid-macro-arg.asm(1): - Invalid macro argument '\0' in symbol +FATAL: symbol-invalid-macro-arg.asm(1): + Invalid macro argument '\0' diff --git a/test/asm/test.sh b/test/asm/test.sh index a4ad6a23..678bb0db 100755 --- a/test/asm/test.sh +++ b/test/asm/test.sh @@ -12,6 +12,7 @@ rc=0 bold=$(tput bold) resbold=$(tput sgr0) red=$(tput setaf 1) +green=$(tput setaf 2) rescolors=$(tput op) tryDiff () { diff -u --strip-trailing-cr $1 $2 || (echo "${bold}${red}${i%.asm}${variant}.$3 mismatch!${rescolors}${resbold}"; false) @@ -36,6 +37,7 @@ fi for i in *.asm; do for variant in '' '.pipe'; do + echo -e "${bold}${green}${i%.asm}${variant}...${rescolors}${resbold}" if [ -z "$variant" ]; then ../../rgbasm -Weverything -o $o $i > $output 2> $errput desired_output=${i%.asm}.out @@ -59,8 +61,8 @@ for i in *.asm; do # Escape regex metacharacters subst="$(printf '%s\n' "$i" | sed 's:[][\/.^$*]:\\&:g')" # Replace the file name with a dash to match changed output - sed "s/$subst/-/g" ${i%.asm}.out > $desired_output - sed "s/$subst/-/g" ${i%.asm}.err > $desired_errput + sed "s/$subst//g" ${i%.asm}.out > $desired_output + sed "s/$subst//g" ${i%.asm}.err > $desired_errput fi tryDiff $desired_output $output out diff --git a/test/asm/unique-id.err b/test/asm/unique-id.err index a39a3fa8..9c5fdd47 100644 --- a/test/asm/unique-id.err +++ b/test/asm/unique-id.err @@ -1,19 +1,19 @@ warning: unique-id.asm(12) -> unique-id.asm::m(4): [-Wuser] - _0 -warning: unique-id.asm(12) -> unique-id.asm::m(5) -> unique-id.asm::m::REPT~1(6): [-Wuser] _1 -warning: unique-id.asm(12) -> unique-id.asm::m(5) -> unique-id.asm::m::REPT~2(6): [-Wuser] +warning: unique-id.asm(12) -> unique-id.asm::m(5) -> unique-id.asm::m::REPT~1(6): [-Wuser] _2 +warning: unique-id.asm(12) -> unique-id.asm::m(5) -> unique-id.asm::m::REPT~2(6): [-Wuser] + _3 warning: unique-id.asm(12) -> unique-id.asm::m(8): [-Wuser] - _0 + _1 warning: unique-id.asm(14) -> unique-id.asm::m(4): [-Wuser] - _3 -warning: unique-id.asm(14) -> unique-id.asm::m(5) -> unique-id.asm::m::REPT~1(6): [-Wuser] _4 -warning: unique-id.asm(14) -> unique-id.asm::m(5) -> unique-id.asm::m::REPT~2(6): [-Wuser] +warning: unique-id.asm(14) -> unique-id.asm::m(5) -> unique-id.asm::m::REPT~1(6): [-Wuser] _5 +warning: unique-id.asm(14) -> unique-id.asm::m(5) -> unique-id.asm::m::REPT~2(6): [-Wuser] + _6 warning: unique-id.asm(14) -> unique-id.asm::m(8): [-Wuser] - _3 -ERROR: unique-id.asm(15): + _4 +FATAL: unique-id.asm(15): Macro argument '\@' not defined while expanding symbol "print" diff --git a/test/link/section-union/align-conflict.out b/test/link/section-union/align-conflict.out index d29b7b97..0a9a98eb 100644 --- a/test/link/section-union/align-conflict.out +++ b/test/link/section-union/align-conflict.out @@ -1,6 +1,6 @@ error: Section "conflicting alignment" is defined with conflicting 4-byte alignment (offset 0) and address $cafe --- -ERROR: -(18): +ERROR: (18): Section "conflicting alignment" already declared as aligned to 4 bytes (offset 0) -ERROR: -(18): +FATAL: (18): Cannot create section "conflicting alignment" (1 errors) diff --git a/test/link/section-union/align-ofs-conflict.out b/test/link/section-union/align-ofs-conflict.out index 08cc5929..6a545f34 100644 --- a/test/link/section-union/align-ofs-conflict.out +++ b/test/link/section-union/align-ofs-conflict.out @@ -1,6 +1,6 @@ error: Section "conflicting alignment" is defined with conflicting 8-byte alignment (offset 7) and 16-byte alignment (offset 14) --- -ERROR: -(18): +ERROR: (18): Section "conflicting alignment" already declared with incompatible 3-byte alignment (offset 7) -ERROR: -(18): +FATAL: (18): Cannot create section "conflicting alignment" (1 errors) diff --git a/test/link/section-union/assert.out b/test/link/section-union/assert.out index c50272d9..fa3c90e5 100644 --- a/test/link/section-union/assert.out +++ b/test/link/section-union/assert.out @@ -1,6 +1,6 @@ error: section-union/assert.asm(11): Force failing the build Linking failed with 1 error --- -ERROR: -(30): +ERROR: (30): Assertion failed: Force failing the build error: Assembly aborted (1 errors)! diff --git a/test/link/section-union/bad-types.out b/test/link/section-union/bad-types.out index bd5fdfcf..1fcd01a1 100644 --- a/test/link/section-union/bad-types.out +++ b/test/link/section-union/bad-types.out @@ -1,6 +1,6 @@ error: Section "conflicting types" is defined with conflicting types HRAM and WRAM0 --- -ERROR: -(18): +ERROR: (18): Section "conflicting types" already exists but with type HRAM -ERROR: -(18): +FATAL: (18): Cannot create section "conflicting types" (1 errors) diff --git a/test/link/section-union/bank-conflict.out b/test/link/section-union/bank-conflict.out index c806274f..2d16cff8 100644 --- a/test/link/section-union/bank-conflict.out +++ b/test/link/section-union/bank-conflict.out @@ -1,6 +1,6 @@ error: Section "conflicting banks" is defined with conflicting banks 4 and 1 --- -ERROR: -(14): +ERROR: (14): Section "conflicting banks" already declared with different bank 4 -ERROR: -(14): +FATAL: (14): Cannot create section "conflicting banks" (1 errors) diff --git a/test/link/section-union/data-overlay.out b/test/link/section-union/data-overlay.out index b2cfb039..d504ce95 100644 --- a/test/link/section-union/data-overlay.out +++ b/test/link/section-union/data-overlay.out @@ -1,6 +1,6 @@ error: Section "overlaid data" is of type ROM0, which cannot be unionized --- -ERROR: -(18): +ERROR: (18): Cannot declare ROM sections as UNION -ERROR: -(18): +FATAL: (18): Cannot create section "overlaid data" (1 errors) diff --git a/test/link/section-union/different-data.out b/test/link/section-union/different-data.out index 46619e1c..4357cd59 100644 --- a/test/link/section-union/different-data.out +++ b/test/link/section-union/different-data.out @@ -1,6 +1,6 @@ error: Section "different data" is of type ROM0, which cannot be unionized --- -ERROR: -(16): +ERROR: (16): Cannot declare ROM sections as UNION -ERROR: -(16): +FATAL: (16): Cannot create section "different data" (1 errors) diff --git a/test/link/section-union/different-ofs.out b/test/link/section-union/different-ofs.out index c897549f..ee86d285 100644 --- a/test/link/section-union/different-ofs.out +++ b/test/link/section-union/different-ofs.out @@ -1,6 +1,6 @@ error: Section "conflicting alignment" is defined with conflicting 8-byte alignment (offset 7) and 8-byte alignment (offset 6) --- -ERROR: -(18): +ERROR: (18): Section "conflicting alignment" already declared with incompatible 3-byte alignment (offset 7) -ERROR: -(18): +FATAL: (18): Cannot create section "conflicting alignment" (1 errors) diff --git a/test/link/section-union/different-size.out b/test/link/section-union/different-size.out index bc6131f6..16089d5e 100644 --- a/test/link/section-union/different-size.out +++ b/test/link/section-union/different-size.out @@ -1,6 +1,6 @@ error: Section "different section sizes" is of type ROM0, which cannot be unionized --- -ERROR: -(16): +ERROR: (16): Cannot declare ROM sections as UNION -ERROR: -(16): +FATAL: (16): Cannot create section "different section sizes" (1 errors) diff --git a/test/link/section-union/different-syntaxes.out b/test/link/section-union/different-syntaxes.out index 52dd7709..8e5b8084 100644 --- a/test/link/section-union/different-syntaxes.out +++ b/test/link/section-union/different-syntaxes.out @@ -1,6 +1,6 @@ error: Section "different syntaxes" is of type ROM0, which cannot be unionized --- -ERROR: -(18): +ERROR: (18): Cannot declare ROM sections as UNION -ERROR: -(18): +FATAL: (18): Cannot create section "different syntaxes" (1 errors) diff --git a/test/link/section-union/org-conflict.out b/test/link/section-union/org-conflict.out index b5a332ec..7c0140da 100644 --- a/test/link/section-union/org-conflict.out +++ b/test/link/section-union/org-conflict.out @@ -1,6 +1,6 @@ error: Section "conflicting address" is defined with conflicting addresses $beef and $babe --- -ERROR: -(16): +ERROR: (16): Section "conflicting address" already declared as fixed at different address $beef -ERROR: -(16): +FATAL: (16): Cannot create section "conflicting address" (1 errors) diff --git a/test/link/section-union/split-data.out b/test/link/section-union/split-data.out index 36db397f..8bd67193 100644 --- a/test/link/section-union/split-data.out +++ b/test/link/section-union/split-data.out @@ -1,6 +1,6 @@ error: Section "mutually-overlaid data" is of type ROM0, which cannot be unionized --- -ERROR: -(18): +ERROR: (18): Cannot declare ROM sections as UNION -ERROR: -(18): +FATAL: (18): Cannot create section "mutually-overlaid data" (1 errors)