diff --git a/include/asm/lexer.h b/include/asm/lexer.h index e4fcd844..32df75d3 100644 --- a/include/asm/lexer.h +++ b/include/asm/lexer.h @@ -43,10 +43,11 @@ void lexer_SetMode(enum LexerMode mode); void lexer_ToggleStringExpansion(bool enable); char const *lexer_GetFileName(void); -unsigned int lexer_GetLineNo(void); +uint32_t lexer_GetLineNo(void); +uint32_t lexer_GetColNo(void); void lexer_DumpStringExpansions(void); int yylex(void); void lexer_SkipToBlockEnd(int blockStartToken, int blockEndToken, int endToken, - char **capture, size_t *size, char const *name); + char const **capture, size_t *size, char const *name); #endif /* RGBDS_ASM_LEXER_H */ diff --git a/src/asm/asmy.y b/src/asm/asmy.y index 9f3bc873..a8ec303a 100644 --- a/src/asm/asmy.y +++ b/src/asm/asmy.y @@ -604,7 +604,7 @@ load : T_POP_LOAD string ',' sectiontype sectorg sectattrs { rept : T_POP_REPT uconst { uint32_t nDefinitionLineNo = lexer_GetLineNo(); - char *body; + char const *body; size_t size; lexer_SkipToBlockEnd(T_POP_REPT, T_POP_ENDR, T_POP_ENDR, &body, &size, "REPT block"); @@ -614,7 +614,7 @@ rept : T_POP_REPT uconst { macrodef : T_LABEL ':' T_POP_MACRO { int32_t nDefinitionLineNo = lexer_GetLineNo(); - char *body; + char const *body; size_t size; lexer_SkipToBlockEnd(T_POP_MACRO, T_POP_ENDM, T_POP_ENDM, &body, &size, "macro definition"); diff --git a/src/asm/fstack.c b/src/asm/fstack.c index b094f14b..c2893780 100644 --- a/src/asm/fstack.c +++ b/src/asm/fstack.c @@ -250,7 +250,8 @@ void fstk_Dump(void) pLastFile = pLastFile->next; } - fprintf(stderr, "%s(%" PRId32 ")", lexer_GetFileName(), lexer_GetLineNo()); + fprintf(stderr, "%s(%" PRId32 ",%" PRId32 ")", + lexer_GetFileName(), lexer_GetLineNo(), lexer_GetColNo()); } void fstk_DumpToStr(char *buf, size_t buflen) diff --git a/src/asm/globlex.c b/src/asm/globlex.c deleted file mode 100644 index 89d1556f..00000000 --- a/src/asm/globlex.c +++ /dev/null @@ -1,698 +0,0 @@ -/* - * This file is part of RGBDS. - * - * Copyright (c) 1997-2018, Carsten Sorensen and RGBDS contributors. - * - * SPDX-License-Identifier: MIT - */ - -#include -#include -#include -#include -#include -#include - -#include "asm/asm.h" -#include "asm/lexer.h" -#include "asm/macro.h" -#include "asm/main.h" -#include "asm/rpn.h" -#include "asm/section.h" -#include "asm/warning.h" - -#include "helpers.h" - -#include "asmy.h" - -bool oDontExpandStrings; -int32_t nGBGfxID = -1; -int32_t nBinaryID = -1; - -static int32_t gbgfx2bin(char ch) -{ - int32_t i; - - for (i = 0; i <= 3; i++) { - if (CurrentOptions.gbgfx[i] == ch) - return i; - } - - return 0; -} - -static int32_t binary2bin(char ch) -{ - int32_t i; - - for (i = 0; i <= 1; i++) { - if (CurrentOptions.binary[i] == ch) - return i; - } - - return 0; -} - -static int32_t char2bin(char ch) -{ - if (ch >= 'a' && ch <= 'f') - return (ch - 'a' + 10); - - if (ch >= 'A' && ch <= 'F') - return (ch - 'A' + 10); - - if (ch >= '0' && ch <= '9') - return (ch - '0'); - - return 0; -} - -typedef int32_t(*x2bin) (char ch); - -static int32_t ascii2bin(char *s) -{ - char *start = s; - uint32_t radix = 10; - uint32_t result = 0; - x2bin convertfunc = char2bin; - - switch (*s) { - case '$': - radix = 16; - s++; - convertfunc = char2bin; - break; - case '&': - radix = 8; - s++; - convertfunc = char2bin; - break; - case '`': - radix = 4; - s++; - convertfunc = gbgfx2bin; - break; - case '%': - radix = 2; - s++; - convertfunc = binary2bin; - break; - default: - /* Handle below */ - break; - } - - const uint32_t max_q = UINT32_MAX / radix; - const uint32_t max_r = UINT32_MAX % radix; - - if (*s == '\0') { - /* - * There are no digits after the radix prefix - * (or the string is empty, which shouldn't happen). - */ - error("Invalid integer constant\n"); - } else if (radix == 4) { - int32_t size = 0; - int32_t c; - - while (*s != '\0') { - c = convertfunc(*s++); - result = result * 2 + ((c & 2) << 7) + (c & 1); - size++; - } - - /* - * Extending a graphics constant longer than 8 pixels, - * the Game Boy tile width, produces a nonsensical result. - */ - if (size > 8) { - warning(WARNING_LARGE_CONSTANT, "Graphics constant '%s' is too long\n", - start); - } - } else { - bool overflow = false; - - while (*s != '\0') { - int32_t digit = convertfunc(*s++); - - if (result > max_q - || (result == max_q && digit > max_r)) { - overflow = true; - } - result = result * radix + digit; - } - - if (overflow) - warning(WARNING_LARGE_CONSTANT, "Integer constant '%s' is too large\n", - start); - } - - return result; -} - -uint32_t ParseFixedPoint(char *s, uint32_t size) -{ - uint32_t i; - uint32_t dot = 0; - - for (i = 0; i < size; i++) { - if (s[i] == '.') { - dot++; - - if (dot == 2) - break; - } - } - - yyskipbytes(i); - - yylval.nConstValue = (int32_t)(atof(s) * 65536); - - return 1; -} - -uint32_t ParseNumber(char *s, uint32_t size) -{ - char dest[256]; - - if (size > 255) - fatalerror("Number token too long\n"); - - strncpy(dest, s, size); - dest[size] = 0; - yylval.nConstValue = ascii2bin(dest); - - yyskipbytes(size); - - return 1; -} - -/* - * If the symbol name ends before the end of the macro arg, - * return a pointer to the rest of the macro arg. - * Otherwise, return NULL. - */ -char const *AppendMacroArg(char whichArg, char *dest, size_t *destIndex) -{ - char const *marg; - - if (whichArg == '@') - marg = macro_GetUniqueIDStr(); - else if (whichArg >= '1' && whichArg <= '9') - marg = macro_GetArg(whichArg - '0'); - else - fatalerror("Invalid macro argument '\\%c' in symbol\n", whichArg); - - if (!marg) - fatalerror("Macro argument '\\%c' not defined\n", whichArg); - - char ch; - - while ((ch = *marg) != 0) { - if ((ch >= 'a' && ch <= 'z') - || (ch >= 'A' && ch <= 'Z') - || (ch >= '0' && ch <= '9') - || ch == '_' - || ch == '@' - || ch == '#' - || ch == '.') { - if (*destIndex >= MAXSYMLEN) - fatalerror("Symbol too long\n"); - - dest[*destIndex] = ch; - (*destIndex)++; - } else { - return marg; - } - - marg++; - } - - return NULL; -} - -uint32_t ParseSymbol(char *src, uint32_t size) -{ - char dest[MAXSYMLEN + 1]; - size_t srcIndex = 0; - size_t destIndex = 0; - char const *rest = NULL; - - while (srcIndex < size) { - char ch = src[srcIndex++]; - - if (ch == '\\') { - /* - * We don't check if srcIndex is still less than size, - * but that can only fail to be true when the - * following char is neither '@' nor a digit. - * In that case, AppendMacroArg() will catch the error. - */ - ch = src[srcIndex++]; - - rest = AppendMacroArg(ch, dest, &destIndex); - /* If the symbol's end was in the middle of the token */ - if (rest) - break; - } else { - if (destIndex >= MAXSYMLEN) - fatalerror("Symbol too long\n"); - dest[destIndex++] = ch; - } - } - - dest[destIndex] = 0; - - /* Tell the lexer we read all bytes that we did */ - yyskipbytes(srcIndex); - - /* - * If an escape's expansion left some chars after the symbol's end, - * such as the `::` in a `Backup\1` expanded to `BackupCamX::`, - * put those into the buffer. - * Note that this NEEDS to be done after the `yyskipbytes` above. - */ - if (rest) - yyunputstr(rest); - - /* If the symbol is an EQUS, expand it */ - if (!oDontExpandStrings) { - struct Symbol const *sym = sym_FindSymbol(dest); - - if (sym && sym->type == SYM_EQUS) { - char const *s; - - lex_BeginStringExpansion(dest); - - /* Feed the symbol's contents into the buffer */ - yyunputstr(s = sym_GetStringValue(sym)); - - /* Lines inserted this way shall not increase lexer_GetLineNo() */ - while (*s) { - if (*s++ == '\n') - lexer_GetLineNo()--; - } - return 0; - } - } - - strcpy(yylval.tzSym, dest); - return 1; -} - -uint32_t PutMacroArg(char *src, uint32_t size) -{ - char const *s; - - yyskipbytes(size); - if ((size == 2 && src[1] >= '1' && src[1] <= '9')) { - s = macro_GetArg(src[1] - '0'); - - if (s != NULL) - yyunputstr(s); - else - error("Macro argument '\\%c' not defined\n", src[1]); - } else { - error("Invalid macro argument '\\%c'\n", src[1]); - } - return 0; -} - -uint32_t PutUniqueID(char *src, uint32_t size) -{ - (void)src; - char const *s; - - yyskipbytes(size); - - s = macro_GetUniqueIDStr(); - - if (s != NULL) - yyunputstr(s); - else - error("Macro unique label string not defined\n"); - - return 0; -} - -enum { - T_LEX_MACROARG = 3000, - T_LEX_MACROUNIQUE -}; - -const struct sLexInitString lexer_strings[] = { - {"adc", T_Z80_ADC}, - {"add", T_Z80_ADD}, - {"and", T_Z80_AND}, - {"bit", T_Z80_BIT}, - {"call", T_Z80_CALL}, - {"ccf", T_Z80_CCF}, - {"cpl", T_Z80_CPL}, - {"cp", T_Z80_CP}, - {"daa", T_Z80_DAA}, - {"dec", T_Z80_DEC}, - {"di", T_Z80_DI}, - {"ei", T_Z80_EI}, - {"halt", T_Z80_HALT}, - {"inc", T_Z80_INC}, - {"jp", T_Z80_JP}, - {"jr", T_Z80_JR}, - {"ld", T_Z80_LD}, - {"ldi", T_Z80_LDI}, - {"ldd", T_Z80_LDD}, - {"ldio", T_Z80_LDIO}, - {"ldh", T_Z80_LDIO}, - {"nop", T_Z80_NOP}, - {"or", T_Z80_OR}, - {"pop", T_Z80_POP}, - {"push", T_Z80_PUSH}, - {"res", T_Z80_RES}, - {"reti", T_Z80_RETI}, - {"ret", T_Z80_RET}, - {"rlca", T_Z80_RLCA}, - {"rlc", T_Z80_RLC}, - {"rla", T_Z80_RLA}, - {"rl", T_Z80_RL}, - {"rrc", T_Z80_RRC}, - {"rrca", T_Z80_RRCA}, - {"rra", T_Z80_RRA}, - {"rr", T_Z80_RR}, - {"rst", T_Z80_RST}, - {"sbc", T_Z80_SBC}, - {"scf", T_Z80_SCF}, - {"set", T_POP_SET}, - {"sla", T_Z80_SLA}, - {"sra", T_Z80_SRA}, - {"srl", T_Z80_SRL}, - {"stop", T_Z80_STOP}, - {"sub", T_Z80_SUB}, - {"swap", T_Z80_SWAP}, - {"xor", T_Z80_XOR}, - - {"nz", T_CC_NZ}, - {"z", T_CC_Z}, - {"nc", T_CC_NC}, - /* Handled in list of registers */ - /* { "c", T_TOKEN_C }, */ - - {"hli", T_MODE_HL_INC}, - {"hld", T_MODE_HL_DEC}, - {"$ff00+c", T_MODE_HW_C}, - {"$ff00 + c", T_MODE_HW_C}, - {"af", T_MODE_AF}, - {"bc", T_MODE_BC}, - {"de", T_MODE_DE}, - {"hl", T_MODE_HL}, - {"sp", T_MODE_SP}, - - {"a", T_TOKEN_A}, - {"b", T_TOKEN_B}, - {"c", T_TOKEN_C}, - {"d", T_TOKEN_D}, - {"e", T_TOKEN_E}, - {"h", T_TOKEN_H}, - {"l", T_TOKEN_L}, - - {"||", T_OP_LOGICOR}, - {"&&", T_OP_LOGICAND}, - {"==", T_OP_LOGICEQU}, - {">", T_OP_LOGICGT}, - {"<", T_OP_LOGICLT}, - {">=", T_OP_LOGICGE}, - {"<=", T_OP_LOGICLE}, - {"!=", T_OP_LOGICNE}, - {"!", T_OP_LOGICNOT}, - {"|", T_OP_OR}, - {"^", T_OP_XOR}, - {"&", T_OP_AND}, - {"<<", T_OP_SHL}, - {">>", T_OP_SHR}, - {"+", T_OP_ADD}, - {"-", T_OP_SUB}, - {"*", T_OP_MUL}, - {"/", T_OP_DIV}, - {"%", T_OP_MOD}, - {"~", T_OP_NOT}, - - {"def", T_OP_DEF}, - - {"fragment", T_POP_FRAGMENT}, - {"bank", T_OP_BANK}, - {"align", T_OP_ALIGN}, - - {"round", T_OP_ROUND}, - {"ceil", T_OP_CEIL}, - {"floor", T_OP_FLOOR}, - {"div", T_OP_FDIV}, - {"mul", T_OP_FMUL}, - {"sin", T_OP_SIN}, - {"cos", T_OP_COS}, - {"tan", T_OP_TAN}, - {"asin", T_OP_ASIN}, - {"acos", T_OP_ACOS}, - {"atan", T_OP_ATAN}, - {"atan2", T_OP_ATAN2}, - - {"high", T_OP_HIGH}, - {"low", T_OP_LOW}, - {"isconst", T_OP_ISCONST}, - - {"strcmp", T_OP_STRCMP}, - {"strin", T_OP_STRIN}, - {"strsub", T_OP_STRSUB}, - {"strlen", T_OP_STRLEN}, - {"strcat", T_OP_STRCAT}, - {"strupr", T_OP_STRUPR}, - {"strlwr", T_OP_STRLWR}, - - {"include", T_POP_INCLUDE}, - {"printt", T_POP_PRINTT}, - {"printi", T_POP_PRINTI}, - {"printv", T_POP_PRINTV}, - {"printf", T_POP_PRINTF}, - {"export", T_POP_EXPORT}, - {"xdef", T_POP_XDEF}, - {"global", T_POP_GLOBAL}, - {"ds", T_POP_DS}, - {"db", T_POP_DB}, - {"dw", T_POP_DW}, - {"dl", T_POP_DL}, - {"section", T_POP_SECTION}, - {"purge", T_POP_PURGE}, - - {"rsreset", T_POP_RSRESET}, - {"rsset", T_POP_RSSET}, - - {"incbin", T_POP_INCBIN}, - {"charmap", T_POP_CHARMAP}, - {"newcharmap", T_POP_NEWCHARMAP}, - {"setcharmap", T_POP_SETCHARMAP}, - {"pushc", T_POP_PUSHC}, - {"popc", T_POP_POPC}, - - {"fail", T_POP_FAIL}, - {"warn", T_POP_WARN}, - {"fatal", T_POP_FATAL}, - {"assert", T_POP_ASSERT}, - {"static_assert", T_POP_STATIC_ASSERT}, - - {"macro", T_POP_MACRO}, - /* Not needed but we have it here just to protect the name */ - {"endm", T_POP_ENDM}, - {"shift", T_POP_SHIFT}, - - {"rept", T_POP_REPT}, - /* Not needed but we have it here just to protect the name */ - {"endr", T_POP_ENDR}, - - {"load", T_POP_LOAD}, - {"endl", T_POP_ENDL}, - - {"if", T_POP_IF}, - {"else", T_POP_ELSE}, - {"elif", T_POP_ELIF}, - {"endc", T_POP_ENDC}, - - {"union", T_POP_UNION}, - {"nextu", T_POP_NEXTU}, - {"endu", T_POP_ENDU}, - - {"wram0", T_SECT_WRAM0}, - {"vram", T_SECT_VRAM}, - {"romx", T_SECT_ROMX}, - {"rom0", T_SECT_ROM0}, - {"hram", T_SECT_HRAM}, - {"wramx", T_SECT_WRAMX}, - {"sram", T_SECT_SRAM}, - {"oam", T_SECT_OAM}, - - {"rb", T_POP_RB}, - {"rw", T_POP_RW}, - {"equ", T_POP_EQU}, - {"equs", T_POP_EQUS}, - - /* Handled before in list of CPU instructions */ - /* {"set", T_POP_SET}, */ - {"=", T_POP_EQUAL}, - - {"pushs", T_POP_PUSHS}, - {"pops", T_POP_POPS}, - {"pusho", T_POP_PUSHO}, - {"popo", T_POP_POPO}, - - {"opt", T_POP_OPT}, - - {NULL, 0} -}; - -const struct sLexFloat tNumberToken = { - ParseNumber, - T_NUMBER -}; - -const struct sLexFloat tFixedPointToken = { - ParseFixedPoint, - T_NUMBER -}; - -const struct sLexFloat tIDToken = { - ParseSymbol, - T_ID -}; - -const struct sLexFloat tMacroArgToken = { - PutMacroArg, - T_LEX_MACROARG -}; - -const struct sLexFloat tMacroUniqueToken = { - PutUniqueID, - T_LEX_MACROUNIQUE -}; - -void setup_lexer(void) -{ - uint32_t id; - - lex_Init(); - lex_AddStrings(lexer_strings); - - //Macro arguments - - id = lex_FloatAlloc(&tMacroArgToken); - lex_FloatAddFirstRange(id, '\\', '\\'); - lex_FloatAddSecondRange(id, '1', '9'); - id = lex_FloatAlloc(&tMacroUniqueToken); - lex_FloatAddFirstRange(id, '\\', '\\'); - lex_FloatAddSecondRange(id, '@', '@'); - - //Decimal constants - - id = lex_FloatAlloc(&tNumberToken); - lex_FloatAddFirstRange(id, '0', '9'); - lex_FloatAddSecondRange(id, '0', '9'); - lex_FloatAddRange(id, '0', '9'); - - //Binary constants - - id = lex_FloatAlloc(&tNumberToken); - nBinaryID = id; - lex_FloatAddFirstRange(id, '%', '%'); - lex_FloatAddSecondRange(id, CurrentOptions.binary[0], - CurrentOptions.binary[0]); - lex_FloatAddSecondRange(id, CurrentOptions.binary[1], - CurrentOptions.binary[1]); - lex_FloatAddRange(id, CurrentOptions.binary[0], - CurrentOptions.binary[0]); - lex_FloatAddRange(id, CurrentOptions.binary[1], - CurrentOptions.binary[1]); - - //Octal constants - - id = lex_FloatAlloc(&tNumberToken); - lex_FloatAddFirstRange(id, '&', '&'); - lex_FloatAddSecondRange(id, '0', '7'); - lex_FloatAddRange(id, '0', '7'); - - //Gameboy gfx constants - - id = lex_FloatAlloc(&tNumberToken); - nGBGfxID = id; - lex_FloatAddFirstRange(id, '`', '`'); - lex_FloatAddSecondRange(id, CurrentOptions.gbgfx[0], - CurrentOptions.gbgfx[0]); - lex_FloatAddSecondRange(id, CurrentOptions.gbgfx[1], - CurrentOptions.gbgfx[1]); - lex_FloatAddSecondRange(id, CurrentOptions.gbgfx[2], - CurrentOptions.gbgfx[2]); - lex_FloatAddSecondRange(id, CurrentOptions.gbgfx[3], - CurrentOptions.gbgfx[3]); - lex_FloatAddRange(id, CurrentOptions.gbgfx[0], CurrentOptions.gbgfx[0]); - lex_FloatAddRange(id, CurrentOptions.gbgfx[1], CurrentOptions.gbgfx[1]); - lex_FloatAddRange(id, CurrentOptions.gbgfx[2], CurrentOptions.gbgfx[2]); - lex_FloatAddRange(id, CurrentOptions.gbgfx[3], CurrentOptions.gbgfx[3]); - - //Hex constants - - id = lex_FloatAlloc(&tNumberToken); - lex_FloatAddFirstRange(id, '$', '$'); - lex_FloatAddSecondRange(id, '0', '9'); - lex_FloatAddSecondRange(id, 'A', 'F'); - lex_FloatAddSecondRange(id, 'a', 'f'); - lex_FloatAddRange(id, '0', '9'); - lex_FloatAddRange(id, 'A', 'F'); - lex_FloatAddRange(id, 'a', 'f'); - - //ID 's - - id = lex_FloatAlloc(&tIDToken); - lex_FloatAddFirstRange(id, 'a', 'z'); - lex_FloatAddFirstRange(id, 'A', 'Z'); - lex_FloatAddFirstRange(id, '_', '_'); - lex_FloatAddSecondRange(id, '.', '.'); - lex_FloatAddSecondRange(id, 'a', 'z'); - lex_FloatAddSecondRange(id, 'A', 'Z'); - lex_FloatAddSecondRange(id, '0', '9'); - lex_FloatAddSecondRange(id, '_', '_'); - lex_FloatAddSecondRange(id, '\\', '\\'); - lex_FloatAddSecondRange(id, '@', '@'); - lex_FloatAddSecondRange(id, '#', '#'); - lex_FloatAddRange(id, '.', '.'); - lex_FloatAddRange(id, 'a', 'z'); - lex_FloatAddRange(id, 'A', 'Z'); - lex_FloatAddRange(id, '0', '9'); - lex_FloatAddRange(id, '_', '_'); - lex_FloatAddRange(id, '\\', '\\'); - lex_FloatAddRange(id, '@', '@'); - lex_FloatAddRange(id, '#', '#'); - - //Local ID - - id = lex_FloatAlloc(&tIDToken); - lex_FloatAddFirstRange(id, '.', '.'); - lex_FloatAddSecondRange(id, 'a', 'z'); - lex_FloatAddSecondRange(id, 'A', 'Z'); - lex_FloatAddSecondRange(id, '_', '_'); - lex_FloatAddRange(id, 'a', 'z'); - lex_FloatAddRange(id, 'A', 'Z'); - lex_FloatAddRange(id, '0', '9'); - lex_FloatAddRange(id, '_', '_'); - lex_FloatAddRange(id, '\\', '\\'); - lex_FloatAddRange(id, '@', '@'); - lex_FloatAddRange(id, '#', '#'); - - // "@" - - id = lex_FloatAlloc(&tIDToken); - lex_FloatAddFirstRange(id, '@', '@'); - - //Fixed point constants - - id = lex_FloatAlloc(&tFixedPointToken); - lex_FloatAddFirstRange(id, '.', '.'); - lex_FloatAddFirstRange(id, '0', '9'); - lex_FloatAddSecondRange(id, '.', '.'); - lex_FloatAddSecondRange(id, '0', '9'); - lex_FloatAddRange(id, '.', '.'); - lex_FloatAddRange(id, '0', '9'); -} diff --git a/src/asm/lexer.c b/src/asm/lexer.c index 937b8250..902e0c83 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -30,6 +30,13 @@ /* This caps the size of buffer reads, and according to POSIX, passing more than SSIZE_MAX is UB */ static_assert(LEXER_BUF_SIZE <= SSIZE_MAX); +struct Expansion { + uint8_t distance; /* How far the expansion's beginning is from the current position */ + char const *contents; + size_t len; + struct Expansion *parent; +}; + struct LexerState { char const *path; @@ -37,14 +44,13 @@ struct LexerState { bool isMmapped; union { struct { /* If mmap()ed */ - char *ptr; + char *ptr; /* Technically `const` during the lexer's execution */ off_t size; off_t offset; }; struct { /* Otherwise */ int fd; size_t index; /* Read index into the buffer */ - size_t nbChars; /* Number of chars in front of the buffer */ char buf[LEXER_BUF_SIZE]; /* Circular buffer */ }; }; @@ -52,12 +58,17 @@ struct LexerState { /* Common state */ enum LexerMode mode; bool atLineStart; - unsigned int lineNo; + uint32_t lineNo; + uint32_t colNo; + bool capturing; /* Whether the text being lexed should be captured */ size_t captureSize; /* Amount of text captured */ char *captureBuf; /* Buffer to send the captured text to if non-NULL */ size_t captureCapacity; /* Size of the buffer above */ + + size_t nbChars; /* Number of chars of lookahead, for processing expansions */ bool expandStrings; + struct Expansion *expansion; }; struct LexerState *lexerState = NULL; @@ -116,14 +127,18 @@ struct LexerState *lexer_OpenFile(char const *path) /* Sometimes mmap() fails or isn't available, so have a fallback */ lseek(state->fd, 0, SEEK_SET); state->index = 0; - state->nbChars = 0; } state->mode = LEXER_NORMAL; - state->atLineStart = true; + state->atLineStart = true; /* yylex() will init colNo due to this */ state->lineNo = 0; + state->capturing = false; state->captureBuf = NULL; + + state->nbChars = 0; + state->expandStrings = true; + state->expansion = NULL; return state; } @@ -164,28 +179,50 @@ static void reallocCaptureBuf(void) /* If at any point we need more than 255 characters of lookahead, something went VERY wrong. */ static int peek(uint8_t distance) { + if (distance >= LEXER_BUF_SIZE) + fatalerror("Internal lexer error: buffer has insufficient size for peeking (%u >= %u)\n", + distance, LEXER_BUF_SIZE); + if (lexerState->isMmapped) { if (lexerState->offset + distance >= lexerState->size) return EOF; + + if (!lexerState->capturing) { + bool escaped = false; + + while (lexerState->nbChars < distance && !escaped) { + char c = lexerState->ptr[lexerState->offset + + lexerState->nbChars++]; + + if (escaped) { + escaped = false; + if ((c >= '1' && c <= '9') || c == '@') + fatalerror("Macro arg expansion is not implemented yet\n"); + } else if (c == '\\') { + escaped = true; + } + } + } + return lexerState->ptr[lexerState->offset + distance]; } if (lexerState->nbChars <= distance) { /* Buffer isn't full enough, read some chars in */ + size_t target = LEXER_BUF_SIZE - lexerState->nbChars; /* Aim: making the buf full */ /* Compute the index we'll start writing to */ size_t writeIndex = (lexerState->index + lexerState->nbChars) % LEXER_BUF_SIZE; - size_t target = LEXER_BUF_SIZE - lexerState->nbChars; /* Aim: making the buf full */ - ssize_t nbCharsRead = 0; + ssize_t nbCharsRead = 0, totalCharsRead = 0; #define readChars(size) do { \ nbCharsRead = read(lexerState->fd, &lexerState->buf[writeIndex], (size)); \ if (nbCharsRead == -1) \ fatalerror("Error while reading \"%s\": %s\n", lexerState->path, errno); \ + totalCharsRead += nbCharsRead; \ writeIndex += nbCharsRead; \ if (writeIndex == LEXER_BUF_SIZE) \ writeIndex = 0; \ - lexerState->nbChars += nbCharsRead; /* Count all those chars in */ \ target -= nbCharsRead; \ } while (0) @@ -201,6 +238,40 @@ static int peek(uint8_t distance) #undef readChars + /* Do not perform expansions when capturing */ + if (!lexerState->capturing) { + /* Scan the newly-inserted chars for any expansions */ + bool escaped = false; + size_t index = (lexerState->index + lexerState->nbChars) % LEXER_BUF_SIZE; + + for (ssize_t i = 0; i < totalCharsRead; i++) { + char c = lexerState->buf[index++]; + + if (escaped) { + escaped = false; + if ((c >= '1' && c <= '9') || c == '@') + fatalerror("Macro arg expansion is not implemented yet\n"); + } else if (c == '\\') { + escaped = true; + } + if (index == LEXER_BUF_SIZE) /* Wrap around buffer */ + index = 0; + } + + /* + * If last char read was a backslash, pretend we didn't read it; this is + * important, otherwise we may miss an expansion that straddles refills + */ + if (escaped) { + totalCharsRead--; + /* However, if that prevents having enough characters, error out */ + if (lexerState->nbChars + totalCharsRead <= distance) + fatalerror("Internal lexer error: cannot read far enough due to backslash\n"); + } + } + + lexerState->nbChars += totalCharsRead; + /* If there aren't enough chars even after refilling, give up */ if (lexerState->nbChars <= distance) return EOF; @@ -231,6 +302,8 @@ static void shiftChars(uint8_t distance) if (lexerState->index >= LEXER_BUF_SIZE) lexerState->index %= LEXER_BUF_SIZE; } + + lexerState->colNo += distance; } static int nextChar(void) @@ -250,11 +323,16 @@ char const *lexer_GetFileName(void) return lexerState->path; } -unsigned int lexer_GetLineNo(void) +uint32_t lexer_GetLineNo(void) { return lexerState->lineNo; } +uint32_t lexer_GetColNo(void) +{ + return lexerState->colNo; +} + void lexer_DumpStringExpansions(void) { /* TODO */ @@ -278,6 +356,20 @@ static int yylex_NORMAL(void) case '\t': break; + /* Handle single-char tokens */ + case '+': + return T_OP_ADD; + case '-': + return T_OP_SUB; + + /* Handle accepted single chars */ + case '[': + case ']': + case '(': + case ')': + case ',': + return c; + case EOF: /* Captures end at their buffer's boundary no matter what */ if (!lexerState->capturing) { @@ -288,6 +380,7 @@ static int yylex_NORMAL(void) default: error("Unknown character '%c'\n"); } + lexerState->atLineStart = false; } } @@ -298,8 +391,10 @@ static int yylex_RAW(void) int yylex(void) { - if (lexerState->atLineStart) + if (lexerState->atLineStart) { lexerState->lineNo++; + lexerState->colNo = 0; + } static int (* const lexerModeFuncs[])(void) = { [LEXER_NORMAL] = yylex_NORMAL, @@ -316,7 +411,7 @@ int yylex(void) } void lexer_SkipToBlockEnd(int blockStartToken, int blockEndToken, int endToken, - char **capture, size_t *size, char const *name) + char const **capture, size_t *size, char const *name) { lexerState->capturing = true; lexerState->captureSize = 0; diff --git a/src/asm/main.c b/src/asm/main.c index 2f44f453..9f522d3a 100644 --- a/src/asm/main.c +++ b/src/asm/main.c @@ -483,6 +483,13 @@ int main(int argc, char *argv[]) fprintf(dependfile, "%s: %s\n", tzTargetFileName, tzMainfile); } + /* Init lexer; important to do first, since that's what provides the file name, line, etc */ + struct LexerState *state = lexer_OpenFile(tzMainfile); + + if (!state) + fatalerror("Failed to open main file!\n"); + lexer_SetState(state); + nStartClock = clock(); nTotalLines = 0; @@ -490,11 +497,6 @@ int main(int argc, char *argv[]) sym_Init(); sym_SetExportAll(exportall); fstk_Init(tzMainfile); - struct LexerState *state = lexer_OpenFile(tzMainfile); - - if (!state) - fatalerror("Failed to open main file!"); - lexer_SetState(state); opt_ParseDefines(); charmap_New("main", NULL);