mirror of
https://github.com/gbdev/rgbds.git
synced 2025-11-20 18:22:07 +00:00
Implement more functionality
Macro arg detection, first emitted tokens, primitive (bad) column counting
This commit is contained in:
@@ -43,10 +43,11 @@ void lexer_SetMode(enum LexerMode mode);
|
||||
void lexer_ToggleStringExpansion(bool enable);
|
||||
|
||||
char const *lexer_GetFileName(void);
|
||||
unsigned int lexer_GetLineNo(void);
|
||||
uint32_t lexer_GetLineNo(void);
|
||||
uint32_t lexer_GetColNo(void);
|
||||
void lexer_DumpStringExpansions(void);
|
||||
int yylex(void);
|
||||
void lexer_SkipToBlockEnd(int blockStartToken, int blockEndToken, int endToken,
|
||||
char **capture, size_t *size, char const *name);
|
||||
char const **capture, size_t *size, char const *name);
|
||||
|
||||
#endif /* RGBDS_ASM_LEXER_H */
|
||||
|
||||
@@ -604,7 +604,7 @@ load : T_POP_LOAD string ',' sectiontype sectorg sectattrs {
|
||||
|
||||
rept : T_POP_REPT uconst {
|
||||
uint32_t nDefinitionLineNo = lexer_GetLineNo();
|
||||
char *body;
|
||||
char const *body;
|
||||
size_t size;
|
||||
lexer_SkipToBlockEnd(T_POP_REPT, T_POP_ENDR, T_POP_ENDR,
|
||||
&body, &size, "REPT block");
|
||||
@@ -614,7 +614,7 @@ rept : T_POP_REPT uconst {
|
||||
|
||||
macrodef : T_LABEL ':' T_POP_MACRO {
|
||||
int32_t nDefinitionLineNo = lexer_GetLineNo();
|
||||
char *body;
|
||||
char const *body;
|
||||
size_t size;
|
||||
lexer_SkipToBlockEnd(T_POP_MACRO, T_POP_ENDM, T_POP_ENDM,
|
||||
&body, &size, "macro definition");
|
||||
|
||||
@@ -250,7 +250,8 @@ void fstk_Dump(void)
|
||||
pLastFile = pLastFile->next;
|
||||
}
|
||||
|
||||
fprintf(stderr, "%s(%" PRId32 ")", lexer_GetFileName(), lexer_GetLineNo());
|
||||
fprintf(stderr, "%s(%" PRId32 ",%" PRId32 ")",
|
||||
lexer_GetFileName(), lexer_GetLineNo(), lexer_GetColNo());
|
||||
}
|
||||
|
||||
void fstk_DumpToStr(char *buf, size_t buflen)
|
||||
|
||||
@@ -1,698 +0,0 @@
|
||||
/*
|
||||
* This file is part of RGBDS.
|
||||
*
|
||||
* Copyright (c) 1997-2018, Carsten Sorensen and RGBDS contributors.
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "asm/asm.h"
|
||||
#include "asm/lexer.h"
|
||||
#include "asm/macro.h"
|
||||
#include "asm/main.h"
|
||||
#include "asm/rpn.h"
|
||||
#include "asm/section.h"
|
||||
#include "asm/warning.h"
|
||||
|
||||
#include "helpers.h"
|
||||
|
||||
#include "asmy.h"
|
||||
|
||||
bool oDontExpandStrings;
|
||||
int32_t nGBGfxID = -1;
|
||||
int32_t nBinaryID = -1;
|
||||
|
||||
static int32_t gbgfx2bin(char ch)
|
||||
{
|
||||
int32_t i;
|
||||
|
||||
for (i = 0; i <= 3; i++) {
|
||||
if (CurrentOptions.gbgfx[i] == ch)
|
||||
return i;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t binary2bin(char ch)
|
||||
{
|
||||
int32_t i;
|
||||
|
||||
for (i = 0; i <= 1; i++) {
|
||||
if (CurrentOptions.binary[i] == ch)
|
||||
return i;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t char2bin(char ch)
|
||||
{
|
||||
if (ch >= 'a' && ch <= 'f')
|
||||
return (ch - 'a' + 10);
|
||||
|
||||
if (ch >= 'A' && ch <= 'F')
|
||||
return (ch - 'A' + 10);
|
||||
|
||||
if (ch >= '0' && ch <= '9')
|
||||
return (ch - '0');
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
typedef int32_t(*x2bin) (char ch);
|
||||
|
||||
static int32_t ascii2bin(char *s)
|
||||
{
|
||||
char *start = s;
|
||||
uint32_t radix = 10;
|
||||
uint32_t result = 0;
|
||||
x2bin convertfunc = char2bin;
|
||||
|
||||
switch (*s) {
|
||||
case '$':
|
||||
radix = 16;
|
||||
s++;
|
||||
convertfunc = char2bin;
|
||||
break;
|
||||
case '&':
|
||||
radix = 8;
|
||||
s++;
|
||||
convertfunc = char2bin;
|
||||
break;
|
||||
case '`':
|
||||
radix = 4;
|
||||
s++;
|
||||
convertfunc = gbgfx2bin;
|
||||
break;
|
||||
case '%':
|
||||
radix = 2;
|
||||
s++;
|
||||
convertfunc = binary2bin;
|
||||
break;
|
||||
default:
|
||||
/* Handle below */
|
||||
break;
|
||||
}
|
||||
|
||||
const uint32_t max_q = UINT32_MAX / radix;
|
||||
const uint32_t max_r = UINT32_MAX % radix;
|
||||
|
||||
if (*s == '\0') {
|
||||
/*
|
||||
* There are no digits after the radix prefix
|
||||
* (or the string is empty, which shouldn't happen).
|
||||
*/
|
||||
error("Invalid integer constant\n");
|
||||
} else if (radix == 4) {
|
||||
int32_t size = 0;
|
||||
int32_t c;
|
||||
|
||||
while (*s != '\0') {
|
||||
c = convertfunc(*s++);
|
||||
result = result * 2 + ((c & 2) << 7) + (c & 1);
|
||||
size++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Extending a graphics constant longer than 8 pixels,
|
||||
* the Game Boy tile width, produces a nonsensical result.
|
||||
*/
|
||||
if (size > 8) {
|
||||
warning(WARNING_LARGE_CONSTANT, "Graphics constant '%s' is too long\n",
|
||||
start);
|
||||
}
|
||||
} else {
|
||||
bool overflow = false;
|
||||
|
||||
while (*s != '\0') {
|
||||
int32_t digit = convertfunc(*s++);
|
||||
|
||||
if (result > max_q
|
||||
|| (result == max_q && digit > max_r)) {
|
||||
overflow = true;
|
||||
}
|
||||
result = result * radix + digit;
|
||||
}
|
||||
|
||||
if (overflow)
|
||||
warning(WARNING_LARGE_CONSTANT, "Integer constant '%s' is too large\n",
|
||||
start);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
uint32_t ParseFixedPoint(char *s, uint32_t size)
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t dot = 0;
|
||||
|
||||
for (i = 0; i < size; i++) {
|
||||
if (s[i] == '.') {
|
||||
dot++;
|
||||
|
||||
if (dot == 2)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
yyskipbytes(i);
|
||||
|
||||
yylval.nConstValue = (int32_t)(atof(s) * 65536);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
uint32_t ParseNumber(char *s, uint32_t size)
|
||||
{
|
||||
char dest[256];
|
||||
|
||||
if (size > 255)
|
||||
fatalerror("Number token too long\n");
|
||||
|
||||
strncpy(dest, s, size);
|
||||
dest[size] = 0;
|
||||
yylval.nConstValue = ascii2bin(dest);
|
||||
|
||||
yyskipbytes(size);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the symbol name ends before the end of the macro arg,
|
||||
* return a pointer to the rest of the macro arg.
|
||||
* Otherwise, return NULL.
|
||||
*/
|
||||
char const *AppendMacroArg(char whichArg, char *dest, size_t *destIndex)
|
||||
{
|
||||
char const *marg;
|
||||
|
||||
if (whichArg == '@')
|
||||
marg = macro_GetUniqueIDStr();
|
||||
else if (whichArg >= '1' && whichArg <= '9')
|
||||
marg = macro_GetArg(whichArg - '0');
|
||||
else
|
||||
fatalerror("Invalid macro argument '\\%c' in symbol\n", whichArg);
|
||||
|
||||
if (!marg)
|
||||
fatalerror("Macro argument '\\%c' not defined\n", whichArg);
|
||||
|
||||
char ch;
|
||||
|
||||
while ((ch = *marg) != 0) {
|
||||
if ((ch >= 'a' && ch <= 'z')
|
||||
|| (ch >= 'A' && ch <= 'Z')
|
||||
|| (ch >= '0' && ch <= '9')
|
||||
|| ch == '_'
|
||||
|| ch == '@'
|
||||
|| ch == '#'
|
||||
|| ch == '.') {
|
||||
if (*destIndex >= MAXSYMLEN)
|
||||
fatalerror("Symbol too long\n");
|
||||
|
||||
dest[*destIndex] = ch;
|
||||
(*destIndex)++;
|
||||
} else {
|
||||
return marg;
|
||||
}
|
||||
|
||||
marg++;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
uint32_t ParseSymbol(char *src, uint32_t size)
|
||||
{
|
||||
char dest[MAXSYMLEN + 1];
|
||||
size_t srcIndex = 0;
|
||||
size_t destIndex = 0;
|
||||
char const *rest = NULL;
|
||||
|
||||
while (srcIndex < size) {
|
||||
char ch = src[srcIndex++];
|
||||
|
||||
if (ch == '\\') {
|
||||
/*
|
||||
* We don't check if srcIndex is still less than size,
|
||||
* but that can only fail to be true when the
|
||||
* following char is neither '@' nor a digit.
|
||||
* In that case, AppendMacroArg() will catch the error.
|
||||
*/
|
||||
ch = src[srcIndex++];
|
||||
|
||||
rest = AppendMacroArg(ch, dest, &destIndex);
|
||||
/* If the symbol's end was in the middle of the token */
|
||||
if (rest)
|
||||
break;
|
||||
} else {
|
||||
if (destIndex >= MAXSYMLEN)
|
||||
fatalerror("Symbol too long\n");
|
||||
dest[destIndex++] = ch;
|
||||
}
|
||||
}
|
||||
|
||||
dest[destIndex] = 0;
|
||||
|
||||
/* Tell the lexer we read all bytes that we did */
|
||||
yyskipbytes(srcIndex);
|
||||
|
||||
/*
|
||||
* If an escape's expansion left some chars after the symbol's end,
|
||||
* such as the `::` in a `Backup\1` expanded to `BackupCamX::`,
|
||||
* put those into the buffer.
|
||||
* Note that this NEEDS to be done after the `yyskipbytes` above.
|
||||
*/
|
||||
if (rest)
|
||||
yyunputstr(rest);
|
||||
|
||||
/* If the symbol is an EQUS, expand it */
|
||||
if (!oDontExpandStrings) {
|
||||
struct Symbol const *sym = sym_FindSymbol(dest);
|
||||
|
||||
if (sym && sym->type == SYM_EQUS) {
|
||||
char const *s;
|
||||
|
||||
lex_BeginStringExpansion(dest);
|
||||
|
||||
/* Feed the symbol's contents into the buffer */
|
||||
yyunputstr(s = sym_GetStringValue(sym));
|
||||
|
||||
/* Lines inserted this way shall not increase lexer_GetLineNo() */
|
||||
while (*s) {
|
||||
if (*s++ == '\n')
|
||||
lexer_GetLineNo()--;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
strcpy(yylval.tzSym, dest);
|
||||
return 1;
|
||||
}
|
||||
|
||||
uint32_t PutMacroArg(char *src, uint32_t size)
|
||||
{
|
||||
char const *s;
|
||||
|
||||
yyskipbytes(size);
|
||||
if ((size == 2 && src[1] >= '1' && src[1] <= '9')) {
|
||||
s = macro_GetArg(src[1] - '0');
|
||||
|
||||
if (s != NULL)
|
||||
yyunputstr(s);
|
||||
else
|
||||
error("Macro argument '\\%c' not defined\n", src[1]);
|
||||
} else {
|
||||
error("Invalid macro argument '\\%c'\n", src[1]);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t PutUniqueID(char *src, uint32_t size)
|
||||
{
|
||||
(void)src;
|
||||
char const *s;
|
||||
|
||||
yyskipbytes(size);
|
||||
|
||||
s = macro_GetUniqueIDStr();
|
||||
|
||||
if (s != NULL)
|
||||
yyunputstr(s);
|
||||
else
|
||||
error("Macro unique label string not defined\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
enum {
|
||||
T_LEX_MACROARG = 3000,
|
||||
T_LEX_MACROUNIQUE
|
||||
};
|
||||
|
||||
const struct sLexInitString lexer_strings[] = {
|
||||
{"adc", T_Z80_ADC},
|
||||
{"add", T_Z80_ADD},
|
||||
{"and", T_Z80_AND},
|
||||
{"bit", T_Z80_BIT},
|
||||
{"call", T_Z80_CALL},
|
||||
{"ccf", T_Z80_CCF},
|
||||
{"cpl", T_Z80_CPL},
|
||||
{"cp", T_Z80_CP},
|
||||
{"daa", T_Z80_DAA},
|
||||
{"dec", T_Z80_DEC},
|
||||
{"di", T_Z80_DI},
|
||||
{"ei", T_Z80_EI},
|
||||
{"halt", T_Z80_HALT},
|
||||
{"inc", T_Z80_INC},
|
||||
{"jp", T_Z80_JP},
|
||||
{"jr", T_Z80_JR},
|
||||
{"ld", T_Z80_LD},
|
||||
{"ldi", T_Z80_LDI},
|
||||
{"ldd", T_Z80_LDD},
|
||||
{"ldio", T_Z80_LDIO},
|
||||
{"ldh", T_Z80_LDIO},
|
||||
{"nop", T_Z80_NOP},
|
||||
{"or", T_Z80_OR},
|
||||
{"pop", T_Z80_POP},
|
||||
{"push", T_Z80_PUSH},
|
||||
{"res", T_Z80_RES},
|
||||
{"reti", T_Z80_RETI},
|
||||
{"ret", T_Z80_RET},
|
||||
{"rlca", T_Z80_RLCA},
|
||||
{"rlc", T_Z80_RLC},
|
||||
{"rla", T_Z80_RLA},
|
||||
{"rl", T_Z80_RL},
|
||||
{"rrc", T_Z80_RRC},
|
||||
{"rrca", T_Z80_RRCA},
|
||||
{"rra", T_Z80_RRA},
|
||||
{"rr", T_Z80_RR},
|
||||
{"rst", T_Z80_RST},
|
||||
{"sbc", T_Z80_SBC},
|
||||
{"scf", T_Z80_SCF},
|
||||
{"set", T_POP_SET},
|
||||
{"sla", T_Z80_SLA},
|
||||
{"sra", T_Z80_SRA},
|
||||
{"srl", T_Z80_SRL},
|
||||
{"stop", T_Z80_STOP},
|
||||
{"sub", T_Z80_SUB},
|
||||
{"swap", T_Z80_SWAP},
|
||||
{"xor", T_Z80_XOR},
|
||||
|
||||
{"nz", T_CC_NZ},
|
||||
{"z", T_CC_Z},
|
||||
{"nc", T_CC_NC},
|
||||
/* Handled in list of registers */
|
||||
/* { "c", T_TOKEN_C }, */
|
||||
|
||||
{"hli", T_MODE_HL_INC},
|
||||
{"hld", T_MODE_HL_DEC},
|
||||
{"$ff00+c", T_MODE_HW_C},
|
||||
{"$ff00 + c", T_MODE_HW_C},
|
||||
{"af", T_MODE_AF},
|
||||
{"bc", T_MODE_BC},
|
||||
{"de", T_MODE_DE},
|
||||
{"hl", T_MODE_HL},
|
||||
{"sp", T_MODE_SP},
|
||||
|
||||
{"a", T_TOKEN_A},
|
||||
{"b", T_TOKEN_B},
|
||||
{"c", T_TOKEN_C},
|
||||
{"d", T_TOKEN_D},
|
||||
{"e", T_TOKEN_E},
|
||||
{"h", T_TOKEN_H},
|
||||
{"l", T_TOKEN_L},
|
||||
|
||||
{"||", T_OP_LOGICOR},
|
||||
{"&&", T_OP_LOGICAND},
|
||||
{"==", T_OP_LOGICEQU},
|
||||
{">", T_OP_LOGICGT},
|
||||
{"<", T_OP_LOGICLT},
|
||||
{">=", T_OP_LOGICGE},
|
||||
{"<=", T_OP_LOGICLE},
|
||||
{"!=", T_OP_LOGICNE},
|
||||
{"!", T_OP_LOGICNOT},
|
||||
{"|", T_OP_OR},
|
||||
{"^", T_OP_XOR},
|
||||
{"&", T_OP_AND},
|
||||
{"<<", T_OP_SHL},
|
||||
{">>", T_OP_SHR},
|
||||
{"+", T_OP_ADD},
|
||||
{"-", T_OP_SUB},
|
||||
{"*", T_OP_MUL},
|
||||
{"/", T_OP_DIV},
|
||||
{"%", T_OP_MOD},
|
||||
{"~", T_OP_NOT},
|
||||
|
||||
{"def", T_OP_DEF},
|
||||
|
||||
{"fragment", T_POP_FRAGMENT},
|
||||
{"bank", T_OP_BANK},
|
||||
{"align", T_OP_ALIGN},
|
||||
|
||||
{"round", T_OP_ROUND},
|
||||
{"ceil", T_OP_CEIL},
|
||||
{"floor", T_OP_FLOOR},
|
||||
{"div", T_OP_FDIV},
|
||||
{"mul", T_OP_FMUL},
|
||||
{"sin", T_OP_SIN},
|
||||
{"cos", T_OP_COS},
|
||||
{"tan", T_OP_TAN},
|
||||
{"asin", T_OP_ASIN},
|
||||
{"acos", T_OP_ACOS},
|
||||
{"atan", T_OP_ATAN},
|
||||
{"atan2", T_OP_ATAN2},
|
||||
|
||||
{"high", T_OP_HIGH},
|
||||
{"low", T_OP_LOW},
|
||||
{"isconst", T_OP_ISCONST},
|
||||
|
||||
{"strcmp", T_OP_STRCMP},
|
||||
{"strin", T_OP_STRIN},
|
||||
{"strsub", T_OP_STRSUB},
|
||||
{"strlen", T_OP_STRLEN},
|
||||
{"strcat", T_OP_STRCAT},
|
||||
{"strupr", T_OP_STRUPR},
|
||||
{"strlwr", T_OP_STRLWR},
|
||||
|
||||
{"include", T_POP_INCLUDE},
|
||||
{"printt", T_POP_PRINTT},
|
||||
{"printi", T_POP_PRINTI},
|
||||
{"printv", T_POP_PRINTV},
|
||||
{"printf", T_POP_PRINTF},
|
||||
{"export", T_POP_EXPORT},
|
||||
{"xdef", T_POP_XDEF},
|
||||
{"global", T_POP_GLOBAL},
|
||||
{"ds", T_POP_DS},
|
||||
{"db", T_POP_DB},
|
||||
{"dw", T_POP_DW},
|
||||
{"dl", T_POP_DL},
|
||||
{"section", T_POP_SECTION},
|
||||
{"purge", T_POP_PURGE},
|
||||
|
||||
{"rsreset", T_POP_RSRESET},
|
||||
{"rsset", T_POP_RSSET},
|
||||
|
||||
{"incbin", T_POP_INCBIN},
|
||||
{"charmap", T_POP_CHARMAP},
|
||||
{"newcharmap", T_POP_NEWCHARMAP},
|
||||
{"setcharmap", T_POP_SETCHARMAP},
|
||||
{"pushc", T_POP_PUSHC},
|
||||
{"popc", T_POP_POPC},
|
||||
|
||||
{"fail", T_POP_FAIL},
|
||||
{"warn", T_POP_WARN},
|
||||
{"fatal", T_POP_FATAL},
|
||||
{"assert", T_POP_ASSERT},
|
||||
{"static_assert", T_POP_STATIC_ASSERT},
|
||||
|
||||
{"macro", T_POP_MACRO},
|
||||
/* Not needed but we have it here just to protect the name */
|
||||
{"endm", T_POP_ENDM},
|
||||
{"shift", T_POP_SHIFT},
|
||||
|
||||
{"rept", T_POP_REPT},
|
||||
/* Not needed but we have it here just to protect the name */
|
||||
{"endr", T_POP_ENDR},
|
||||
|
||||
{"load", T_POP_LOAD},
|
||||
{"endl", T_POP_ENDL},
|
||||
|
||||
{"if", T_POP_IF},
|
||||
{"else", T_POP_ELSE},
|
||||
{"elif", T_POP_ELIF},
|
||||
{"endc", T_POP_ENDC},
|
||||
|
||||
{"union", T_POP_UNION},
|
||||
{"nextu", T_POP_NEXTU},
|
||||
{"endu", T_POP_ENDU},
|
||||
|
||||
{"wram0", T_SECT_WRAM0},
|
||||
{"vram", T_SECT_VRAM},
|
||||
{"romx", T_SECT_ROMX},
|
||||
{"rom0", T_SECT_ROM0},
|
||||
{"hram", T_SECT_HRAM},
|
||||
{"wramx", T_SECT_WRAMX},
|
||||
{"sram", T_SECT_SRAM},
|
||||
{"oam", T_SECT_OAM},
|
||||
|
||||
{"rb", T_POP_RB},
|
||||
{"rw", T_POP_RW},
|
||||
{"equ", T_POP_EQU},
|
||||
{"equs", T_POP_EQUS},
|
||||
|
||||
/* Handled before in list of CPU instructions */
|
||||
/* {"set", T_POP_SET}, */
|
||||
{"=", T_POP_EQUAL},
|
||||
|
||||
{"pushs", T_POP_PUSHS},
|
||||
{"pops", T_POP_POPS},
|
||||
{"pusho", T_POP_PUSHO},
|
||||
{"popo", T_POP_POPO},
|
||||
|
||||
{"opt", T_POP_OPT},
|
||||
|
||||
{NULL, 0}
|
||||
};
|
||||
|
||||
const struct sLexFloat tNumberToken = {
|
||||
ParseNumber,
|
||||
T_NUMBER
|
||||
};
|
||||
|
||||
const struct sLexFloat tFixedPointToken = {
|
||||
ParseFixedPoint,
|
||||
T_NUMBER
|
||||
};
|
||||
|
||||
const struct sLexFloat tIDToken = {
|
||||
ParseSymbol,
|
||||
T_ID
|
||||
};
|
||||
|
||||
const struct sLexFloat tMacroArgToken = {
|
||||
PutMacroArg,
|
||||
T_LEX_MACROARG
|
||||
};
|
||||
|
||||
const struct sLexFloat tMacroUniqueToken = {
|
||||
PutUniqueID,
|
||||
T_LEX_MACROUNIQUE
|
||||
};
|
||||
|
||||
void setup_lexer(void)
|
||||
{
|
||||
uint32_t id;
|
||||
|
||||
lex_Init();
|
||||
lex_AddStrings(lexer_strings);
|
||||
|
||||
//Macro arguments
|
||||
|
||||
id = lex_FloatAlloc(&tMacroArgToken);
|
||||
lex_FloatAddFirstRange(id, '\\', '\\');
|
||||
lex_FloatAddSecondRange(id, '1', '9');
|
||||
id = lex_FloatAlloc(&tMacroUniqueToken);
|
||||
lex_FloatAddFirstRange(id, '\\', '\\');
|
||||
lex_FloatAddSecondRange(id, '@', '@');
|
||||
|
||||
//Decimal constants
|
||||
|
||||
id = lex_FloatAlloc(&tNumberToken);
|
||||
lex_FloatAddFirstRange(id, '0', '9');
|
||||
lex_FloatAddSecondRange(id, '0', '9');
|
||||
lex_FloatAddRange(id, '0', '9');
|
||||
|
||||
//Binary constants
|
||||
|
||||
id = lex_FloatAlloc(&tNumberToken);
|
||||
nBinaryID = id;
|
||||
lex_FloatAddFirstRange(id, '%', '%');
|
||||
lex_FloatAddSecondRange(id, CurrentOptions.binary[0],
|
||||
CurrentOptions.binary[0]);
|
||||
lex_FloatAddSecondRange(id, CurrentOptions.binary[1],
|
||||
CurrentOptions.binary[1]);
|
||||
lex_FloatAddRange(id, CurrentOptions.binary[0],
|
||||
CurrentOptions.binary[0]);
|
||||
lex_FloatAddRange(id, CurrentOptions.binary[1],
|
||||
CurrentOptions.binary[1]);
|
||||
|
||||
//Octal constants
|
||||
|
||||
id = lex_FloatAlloc(&tNumberToken);
|
||||
lex_FloatAddFirstRange(id, '&', '&');
|
||||
lex_FloatAddSecondRange(id, '0', '7');
|
||||
lex_FloatAddRange(id, '0', '7');
|
||||
|
||||
//Gameboy gfx constants
|
||||
|
||||
id = lex_FloatAlloc(&tNumberToken);
|
||||
nGBGfxID = id;
|
||||
lex_FloatAddFirstRange(id, '`', '`');
|
||||
lex_FloatAddSecondRange(id, CurrentOptions.gbgfx[0],
|
||||
CurrentOptions.gbgfx[0]);
|
||||
lex_FloatAddSecondRange(id, CurrentOptions.gbgfx[1],
|
||||
CurrentOptions.gbgfx[1]);
|
||||
lex_FloatAddSecondRange(id, CurrentOptions.gbgfx[2],
|
||||
CurrentOptions.gbgfx[2]);
|
||||
lex_FloatAddSecondRange(id, CurrentOptions.gbgfx[3],
|
||||
CurrentOptions.gbgfx[3]);
|
||||
lex_FloatAddRange(id, CurrentOptions.gbgfx[0], CurrentOptions.gbgfx[0]);
|
||||
lex_FloatAddRange(id, CurrentOptions.gbgfx[1], CurrentOptions.gbgfx[1]);
|
||||
lex_FloatAddRange(id, CurrentOptions.gbgfx[2], CurrentOptions.gbgfx[2]);
|
||||
lex_FloatAddRange(id, CurrentOptions.gbgfx[3], CurrentOptions.gbgfx[3]);
|
||||
|
||||
//Hex constants
|
||||
|
||||
id = lex_FloatAlloc(&tNumberToken);
|
||||
lex_FloatAddFirstRange(id, '$', '$');
|
||||
lex_FloatAddSecondRange(id, '0', '9');
|
||||
lex_FloatAddSecondRange(id, 'A', 'F');
|
||||
lex_FloatAddSecondRange(id, 'a', 'f');
|
||||
lex_FloatAddRange(id, '0', '9');
|
||||
lex_FloatAddRange(id, 'A', 'F');
|
||||
lex_FloatAddRange(id, 'a', 'f');
|
||||
|
||||
//ID 's
|
||||
|
||||
id = lex_FloatAlloc(&tIDToken);
|
||||
lex_FloatAddFirstRange(id, 'a', 'z');
|
||||
lex_FloatAddFirstRange(id, 'A', 'Z');
|
||||
lex_FloatAddFirstRange(id, '_', '_');
|
||||
lex_FloatAddSecondRange(id, '.', '.');
|
||||
lex_FloatAddSecondRange(id, 'a', 'z');
|
||||
lex_FloatAddSecondRange(id, 'A', 'Z');
|
||||
lex_FloatAddSecondRange(id, '0', '9');
|
||||
lex_FloatAddSecondRange(id, '_', '_');
|
||||
lex_FloatAddSecondRange(id, '\\', '\\');
|
||||
lex_FloatAddSecondRange(id, '@', '@');
|
||||
lex_FloatAddSecondRange(id, '#', '#');
|
||||
lex_FloatAddRange(id, '.', '.');
|
||||
lex_FloatAddRange(id, 'a', 'z');
|
||||
lex_FloatAddRange(id, 'A', 'Z');
|
||||
lex_FloatAddRange(id, '0', '9');
|
||||
lex_FloatAddRange(id, '_', '_');
|
||||
lex_FloatAddRange(id, '\\', '\\');
|
||||
lex_FloatAddRange(id, '@', '@');
|
||||
lex_FloatAddRange(id, '#', '#');
|
||||
|
||||
//Local ID
|
||||
|
||||
id = lex_FloatAlloc(&tIDToken);
|
||||
lex_FloatAddFirstRange(id, '.', '.');
|
||||
lex_FloatAddSecondRange(id, 'a', 'z');
|
||||
lex_FloatAddSecondRange(id, 'A', 'Z');
|
||||
lex_FloatAddSecondRange(id, '_', '_');
|
||||
lex_FloatAddRange(id, 'a', 'z');
|
||||
lex_FloatAddRange(id, 'A', 'Z');
|
||||
lex_FloatAddRange(id, '0', '9');
|
||||
lex_FloatAddRange(id, '_', '_');
|
||||
lex_FloatAddRange(id, '\\', '\\');
|
||||
lex_FloatAddRange(id, '@', '@');
|
||||
lex_FloatAddRange(id, '#', '#');
|
||||
|
||||
// "@"
|
||||
|
||||
id = lex_FloatAlloc(&tIDToken);
|
||||
lex_FloatAddFirstRange(id, '@', '@');
|
||||
|
||||
//Fixed point constants
|
||||
|
||||
id = lex_FloatAlloc(&tFixedPointToken);
|
||||
lex_FloatAddFirstRange(id, '.', '.');
|
||||
lex_FloatAddFirstRange(id, '0', '9');
|
||||
lex_FloatAddSecondRange(id, '.', '.');
|
||||
lex_FloatAddSecondRange(id, '0', '9');
|
||||
lex_FloatAddRange(id, '.', '.');
|
||||
lex_FloatAddRange(id, '0', '9');
|
||||
}
|
||||
117
src/asm/lexer.c
117
src/asm/lexer.c
@@ -30,6 +30,13 @@
|
||||
/* This caps the size of buffer reads, and according to POSIX, passing more than SSIZE_MAX is UB */
|
||||
static_assert(LEXER_BUF_SIZE <= SSIZE_MAX);
|
||||
|
||||
struct Expansion {
|
||||
uint8_t distance; /* How far the expansion's beginning is from the current position */
|
||||
char const *contents;
|
||||
size_t len;
|
||||
struct Expansion *parent;
|
||||
};
|
||||
|
||||
struct LexerState {
|
||||
char const *path;
|
||||
|
||||
@@ -37,14 +44,13 @@ struct LexerState {
|
||||
bool isMmapped;
|
||||
union {
|
||||
struct { /* If mmap()ed */
|
||||
char *ptr;
|
||||
char *ptr; /* Technically `const` during the lexer's execution */
|
||||
off_t size;
|
||||
off_t offset;
|
||||
};
|
||||
struct { /* Otherwise */
|
||||
int fd;
|
||||
size_t index; /* Read index into the buffer */
|
||||
size_t nbChars; /* Number of chars in front of the buffer */
|
||||
char buf[LEXER_BUF_SIZE]; /* Circular buffer */
|
||||
};
|
||||
};
|
||||
@@ -52,12 +58,17 @@ struct LexerState {
|
||||
/* Common state */
|
||||
enum LexerMode mode;
|
||||
bool atLineStart;
|
||||
unsigned int lineNo;
|
||||
uint32_t lineNo;
|
||||
uint32_t colNo;
|
||||
|
||||
bool capturing; /* Whether the text being lexed should be captured */
|
||||
size_t captureSize; /* Amount of text captured */
|
||||
char *captureBuf; /* Buffer to send the captured text to if non-NULL */
|
||||
size_t captureCapacity; /* Size of the buffer above */
|
||||
|
||||
size_t nbChars; /* Number of chars of lookahead, for processing expansions */
|
||||
bool expandStrings;
|
||||
struct Expansion *expansion;
|
||||
};
|
||||
|
||||
struct LexerState *lexerState = NULL;
|
||||
@@ -116,14 +127,18 @@ struct LexerState *lexer_OpenFile(char const *path)
|
||||
/* Sometimes mmap() fails or isn't available, so have a fallback */
|
||||
lseek(state->fd, 0, SEEK_SET);
|
||||
state->index = 0;
|
||||
state->nbChars = 0;
|
||||
}
|
||||
|
||||
state->mode = LEXER_NORMAL;
|
||||
state->atLineStart = true;
|
||||
state->atLineStart = true; /* yylex() will init colNo due to this */
|
||||
state->lineNo = 0;
|
||||
|
||||
state->capturing = false;
|
||||
state->captureBuf = NULL;
|
||||
|
||||
state->nbChars = 0;
|
||||
state->expandStrings = true;
|
||||
state->expansion = NULL;
|
||||
return state;
|
||||
}
|
||||
|
||||
@@ -164,28 +179,50 @@ static void reallocCaptureBuf(void)
|
||||
/* If at any point we need more than 255 characters of lookahead, something went VERY wrong. */
|
||||
static int peek(uint8_t distance)
|
||||
{
|
||||
if (distance >= LEXER_BUF_SIZE)
|
||||
fatalerror("Internal lexer error: buffer has insufficient size for peeking (%u >= %u)\n",
|
||||
distance, LEXER_BUF_SIZE);
|
||||
|
||||
if (lexerState->isMmapped) {
|
||||
if (lexerState->offset + distance >= lexerState->size)
|
||||
return EOF;
|
||||
|
||||
if (!lexerState->capturing) {
|
||||
bool escaped = false;
|
||||
|
||||
while (lexerState->nbChars < distance && !escaped) {
|
||||
char c = lexerState->ptr[lexerState->offset
|
||||
+ lexerState->nbChars++];
|
||||
|
||||
if (escaped) {
|
||||
escaped = false;
|
||||
if ((c >= '1' && c <= '9') || c == '@')
|
||||
fatalerror("Macro arg expansion is not implemented yet\n");
|
||||
} else if (c == '\\') {
|
||||
escaped = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return lexerState->ptr[lexerState->offset + distance];
|
||||
}
|
||||
|
||||
if (lexerState->nbChars <= distance) {
|
||||
/* Buffer isn't full enough, read some chars in */
|
||||
size_t target = LEXER_BUF_SIZE - lexerState->nbChars; /* Aim: making the buf full */
|
||||
|
||||
/* Compute the index we'll start writing to */
|
||||
size_t writeIndex = (lexerState->index + lexerState->nbChars) % LEXER_BUF_SIZE;
|
||||
size_t target = LEXER_BUF_SIZE - lexerState->nbChars; /* Aim: making the buf full */
|
||||
ssize_t nbCharsRead = 0;
|
||||
ssize_t nbCharsRead = 0, totalCharsRead = 0;
|
||||
|
||||
#define readChars(size) do { \
|
||||
nbCharsRead = read(lexerState->fd, &lexerState->buf[writeIndex], (size)); \
|
||||
if (nbCharsRead == -1) \
|
||||
fatalerror("Error while reading \"%s\": %s\n", lexerState->path, errno); \
|
||||
totalCharsRead += nbCharsRead; \
|
||||
writeIndex += nbCharsRead; \
|
||||
if (writeIndex == LEXER_BUF_SIZE) \
|
||||
writeIndex = 0; \
|
||||
lexerState->nbChars += nbCharsRead; /* Count all those chars in */ \
|
||||
target -= nbCharsRead; \
|
||||
} while (0)
|
||||
|
||||
@@ -201,6 +238,40 @@ static int peek(uint8_t distance)
|
||||
|
||||
#undef readChars
|
||||
|
||||
/* Do not perform expansions when capturing */
|
||||
if (!lexerState->capturing) {
|
||||
/* Scan the newly-inserted chars for any expansions */
|
||||
bool escaped = false;
|
||||
size_t index = (lexerState->index + lexerState->nbChars) % LEXER_BUF_SIZE;
|
||||
|
||||
for (ssize_t i = 0; i < totalCharsRead; i++) {
|
||||
char c = lexerState->buf[index++];
|
||||
|
||||
if (escaped) {
|
||||
escaped = false;
|
||||
if ((c >= '1' && c <= '9') || c == '@')
|
||||
fatalerror("Macro arg expansion is not implemented yet\n");
|
||||
} else if (c == '\\') {
|
||||
escaped = true;
|
||||
}
|
||||
if (index == LEXER_BUF_SIZE) /* Wrap around buffer */
|
||||
index = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* If last char read was a backslash, pretend we didn't read it; this is
|
||||
* important, otherwise we may miss an expansion that straddles refills
|
||||
*/
|
||||
if (escaped) {
|
||||
totalCharsRead--;
|
||||
/* However, if that prevents having enough characters, error out */
|
||||
if (lexerState->nbChars + totalCharsRead <= distance)
|
||||
fatalerror("Internal lexer error: cannot read far enough due to backslash\n");
|
||||
}
|
||||
}
|
||||
|
||||
lexerState->nbChars += totalCharsRead;
|
||||
|
||||
/* If there aren't enough chars even after refilling, give up */
|
||||
if (lexerState->nbChars <= distance)
|
||||
return EOF;
|
||||
@@ -231,6 +302,8 @@ static void shiftChars(uint8_t distance)
|
||||
if (lexerState->index >= LEXER_BUF_SIZE)
|
||||
lexerState->index %= LEXER_BUF_SIZE;
|
||||
}
|
||||
|
||||
lexerState->colNo += distance;
|
||||
}
|
||||
|
||||
static int nextChar(void)
|
||||
@@ -250,11 +323,16 @@ char const *lexer_GetFileName(void)
|
||||
return lexerState->path;
|
||||
}
|
||||
|
||||
unsigned int lexer_GetLineNo(void)
|
||||
uint32_t lexer_GetLineNo(void)
|
||||
{
|
||||
return lexerState->lineNo;
|
||||
}
|
||||
|
||||
uint32_t lexer_GetColNo(void)
|
||||
{
|
||||
return lexerState->colNo;
|
||||
}
|
||||
|
||||
void lexer_DumpStringExpansions(void)
|
||||
{
|
||||
/* TODO */
|
||||
@@ -278,6 +356,20 @@ static int yylex_NORMAL(void)
|
||||
case '\t':
|
||||
break;
|
||||
|
||||
/* Handle single-char tokens */
|
||||
case '+':
|
||||
return T_OP_ADD;
|
||||
case '-':
|
||||
return T_OP_SUB;
|
||||
|
||||
/* Handle accepted single chars */
|
||||
case '[':
|
||||
case ']':
|
||||
case '(':
|
||||
case ')':
|
||||
case ',':
|
||||
return c;
|
||||
|
||||
case EOF:
|
||||
/* Captures end at their buffer's boundary no matter what */
|
||||
if (!lexerState->capturing) {
|
||||
@@ -288,6 +380,7 @@ static int yylex_NORMAL(void)
|
||||
default:
|
||||
error("Unknown character '%c'\n");
|
||||
}
|
||||
lexerState->atLineStart = false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -298,8 +391,10 @@ static int yylex_RAW(void)
|
||||
|
||||
int yylex(void)
|
||||
{
|
||||
if (lexerState->atLineStart)
|
||||
if (lexerState->atLineStart) {
|
||||
lexerState->lineNo++;
|
||||
lexerState->colNo = 0;
|
||||
}
|
||||
|
||||
static int (* const lexerModeFuncs[])(void) = {
|
||||
[LEXER_NORMAL] = yylex_NORMAL,
|
||||
@@ -316,7 +411,7 @@ int yylex(void)
|
||||
}
|
||||
|
||||
void lexer_SkipToBlockEnd(int blockStartToken, int blockEndToken, int endToken,
|
||||
char **capture, size_t *size, char const *name)
|
||||
char const **capture, size_t *size, char const *name)
|
||||
{
|
||||
lexerState->capturing = true;
|
||||
lexerState->captureSize = 0;
|
||||
|
||||
@@ -483,6 +483,13 @@ int main(int argc, char *argv[])
|
||||
fprintf(dependfile, "%s: %s\n", tzTargetFileName, tzMainfile);
|
||||
}
|
||||
|
||||
/* Init lexer; important to do first, since that's what provides the file name, line, etc */
|
||||
struct LexerState *state = lexer_OpenFile(tzMainfile);
|
||||
|
||||
if (!state)
|
||||
fatalerror("Failed to open main file!\n");
|
||||
lexer_SetState(state);
|
||||
|
||||
nStartClock = clock();
|
||||
|
||||
nTotalLines = 0;
|
||||
@@ -490,11 +497,6 @@ int main(int argc, char *argv[])
|
||||
sym_Init();
|
||||
sym_SetExportAll(exportall);
|
||||
fstk_Init(tzMainfile);
|
||||
struct LexerState *state = lexer_OpenFile(tzMainfile);
|
||||
|
||||
if (!state)
|
||||
fatalerror("Failed to open main file!");
|
||||
lexer_SetState(state);
|
||||
|
||||
opt_ParseDefines();
|
||||
charmap_New("main", NULL);
|
||||
|
||||
Reference in New Issue
Block a user