Implement almost all functionality

Add keywords and identifiers
Add comments
Add number literals
Add strings
Add a lot of new tokens
Add (and clean up) IF etc.
Improve reporting of unexpected chars / garbage bytes
Fix bug with and improved error messages when failing to open file
Add verbose-level messages about how files are opened
Enforce that files finish with a newline
Fix chars returned not being cast to unsigned char (may conflict w/ EOF)
Return null path when no file is open, rather than crash
Unify and improve error printing slightly

Known to be missing: macro expansion, REPT blocks, EQUS expansions
This commit is contained in:
ISSOtm
2020-07-28 22:06:03 +02:00
parent 71f8871702
commit 4c9a929a14
12 changed files with 1139 additions and 235 deletions

View File

@@ -27,9 +27,5 @@
extern uint32_t nTotalLines; extern uint32_t nTotalLines;
extern uint32_t nIFDepth; extern uint32_t nIFDepth;
extern struct Section *pCurrentSection; extern struct Section *pCurrentSection;
extern bool oDontExpandStrings;
size_t symvaluetostring(char *dest, size_t maxLength, char *sym,
const char *mode);
#endif /* RGBDS_ASM_ASM_H */ #endif /* RGBDS_ASM_ASM_H */

View File

@@ -32,7 +32,7 @@ struct sContext {
uint32_t uniqueID; uint32_t uniqueID;
int32_t nLine; int32_t nLine;
uint32_t nStatus; uint32_t nStatus;
char *pREPTBlock; char const *pREPTBlock;
uint32_t nREPTBlockCount; uint32_t nREPTBlockCount;
uint32_t nREPTBlockSize; uint32_t nREPTBlockSize;
int32_t nREPTBodyFirstLine; int32_t nREPTBodyFirstLine;
@@ -47,7 +47,7 @@ void fstk_Dump(void);
void fstk_DumpToStr(char *buf, size_t len); void fstk_DumpToStr(char *buf, size_t len);
void fstk_AddIncludePath(char *s); void fstk_AddIncludePath(char *s);
void fstk_RunMacro(char *s, struct MacroArgs *args); void fstk_RunMacro(char *s, struct MacroArgs *args);
void fstk_RunRept(uint32_t count, int32_t nReptLineNo); void fstk_RunRept(uint32_t count, int32_t nReptLineNo, char const *body, size_t size);
/** /**
* @param path The user-provided file name * @param path The user-provided file name
* @param fullPath The address of a pointer, which will be made to point at the full path * @param fullPath The address of a pointer, which will be made to point at the full path

View File

@@ -33,10 +33,13 @@ static inline void lexer_SetStateAtEOL(struct LexerState *state)
struct LexerState *lexer_OpenFile(char const *path); struct LexerState *lexer_OpenFile(char const *path);
struct LexerState *lexer_OpenFileView(void); struct LexerState *lexer_OpenFileView(void);
void lexer_DeleteState(struct LexerState *state); void lexer_DeleteState(struct LexerState *state);
void lexer_Init(void);
enum LexerMode { enum LexerMode {
LEXER_NORMAL, LEXER_NORMAL,
LEXER_RAW LEXER_RAW,
LEXER_SKIP_TO_ELIF,
LEXER_SKIP_TO_ENDC
}; };
void lexer_SetMode(enum LexerMode mode); void lexer_SetMode(enum LexerMode mode);
@@ -47,7 +50,7 @@ uint32_t lexer_GetLineNo(void);
uint32_t lexer_GetColNo(void); uint32_t lexer_GetColNo(void);
void lexer_DumpStringExpansions(void); void lexer_DumpStringExpansions(void);
int yylex(void); int yylex(void);
void lexer_SkipToBlockEnd(int blockStartToken, int blockEndToken, int endToken, void lexer_CaptureBlock(int blockStartToken, int blockEndToken, char const **capture, size_t *size,
char const **capture, size_t *size, char const *name); char const *name);
#endif /* RGBDS_ASM_LEXER_H */ #endif /* RGBDS_ASM_LEXER_H */

View File

@@ -44,8 +44,8 @@ struct Symbol {
int32_t (*callback)(void); int32_t (*callback)(void);
}; };
struct { /* For SYM_MACRO */ struct { /* For SYM_MACRO */
uint32_t macroSize; size_t macroSize;
char *macro; char const *macro;
}; };
}; };
@@ -114,9 +114,10 @@ void sym_Export(char const *symName);
struct Symbol *sym_AddEqu(char const *symName, int32_t value); struct Symbol *sym_AddEqu(char const *symName, int32_t value);
struct Symbol *sym_AddSet(char const *symName, int32_t value); struct Symbol *sym_AddSet(char const *symName, int32_t value);
uint32_t sym_GetPCValue(void); uint32_t sym_GetPCValue(void);
uint32_t sym_GetConstantSymValue(struct Symbol const *sym);
uint32_t sym_GetConstantValue(char const *s); uint32_t sym_GetConstantValue(char const *s);
struct Symbol *sym_FindSymbol(char const *symName); struct Symbol *sym_FindSymbol(char const *symName);
struct Symbol *sym_AddMacro(char const *symName, int32_t defLineNo); struct Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char const *body, size_t size);
struct Symbol *sym_Ref(char const *symName); struct Symbol *sym_Ref(char const *symName);
struct Symbol *sym_AddString(char const *symName, char const *value); struct Symbol *sym_AddString(char const *symName, char const *value);
uint32_t sym_GetDefinedValue(char const *s); uint32_t sym_GetDefinedValue(char const *s);

View File

@@ -12,6 +12,7 @@
#include <stdint.h> #include <stdint.h>
uint32_t calchash(const char *s); uint32_t calchash(const char *s);
char const *print(char c);
size_t readUTF8Char(uint8_t *dest, char const *src); size_t readUTF8Char(uint8_t *dest, char const *src);
#endif /* RGBDS_UTIL_H */ #endif /* RGBDS_UTIL_H */

View File

@@ -39,63 +39,7 @@ uint32_t nListCountEmpty;
char *tzNewMacro; char *tzNewMacro;
uint32_t ulNewMacroSize; uint32_t ulNewMacroSize;
int32_t nPCOffset; int32_t nPCOffset;
bool skipElifs; /* If this is set, ELIFs cannot be executed anymore */ bool executedIfBlock; /* If this is set, ELIFs cannot be executed anymore */
size_t symvaluetostring(char *dest, size_t maxLength, char *symName,
const char *mode)
{
size_t length;
struct Symbol *sym = sym_FindSymbol(symName);
if (sym && sym->type == SYM_EQUS) {
char const *src = sym_GetStringValue(sym);
size_t i;
if (mode)
error("Print types are only allowed for numbers\n");
for (i = 0; src[i] != 0; i++) {
if (i >= maxLength)
fatalerror("Symbol value too long to fit buffer\n");
dest[i] = src[i];
}
length = i;
} else {
uint32_t value = sym_GetConstantValue(symName);
int32_t fullLength;
/* Special cheat for binary */
if (mode && !mode[0]) {
char binary[33]; /* 32 bits + 1 terminator */
char *write_ptr = binary + 32;
fullLength = 0;
binary[32] = 0;
do {
*(--write_ptr) = (value & 1) + '0';
value >>= 1;
fullLength++;
} while(value);
strncpy(dest, write_ptr, maxLength + 1);
} else {
fullLength = snprintf(dest, maxLength + 1,
mode ? mode : "$%" PRIX32,
value);
}
if (fullLength < 0) {
fatalerror("snprintf encoding error\n");
} else {
length = (size_t)fullLength;
if (length > maxLength)
fatalerror("Symbol value too long to fit buffer\n");
}
}
return length;
}
static uint32_t str2int2(uint8_t *s, int32_t length) static uint32_t str2int2(uint8_t *s, int32_t length)
{ {
@@ -388,16 +332,69 @@ lines : /* empty */
| lines { | lines {
nListCountEmpty = 0; nListCountEmpty = 0;
nPCOffset = 0; nPCOffset = 0;
} line '\n' { } line {
nTotalLines++; nTotalLines++;
} }
; ;
line : label line : label '\n'
| label cpu_command | label cpu_command '\n'
| label macro | label macro '\n'
| label simple_pseudoop | label simple_pseudoop '\n'
| pseudoop | pseudoop '\n'
| conditional /* May not necessarily be followed by a newline, see below */
;
/*
* For "logistical" reasons, conditionals must manage newlines themselves.
* This is because we need to switch the lexer's mode *after* the newline has been read,
* and to avoid causing some grammar conflicts (token reducing is finicky).
* This is DEFINITELY one of the more FRAGILE parts of the codebase, handle with care.
*/
conditional : if
/* It's important that all of these require being at line start for `skipIfBlock` */
| elif
| else
| endc
;
if : T_POP_IF const '\n' {
nIFDepth++;
executedIfBlock = !!$2;
if (!executedIfBlock)
lexer_SetMode(LEXER_SKIP_TO_ELIF);
}
;
elif : T_POP_ELIF const '\n' {
if (nIFDepth <= 0)
fatalerror("Found ELIF outside an IF construct\n");
if (executedIfBlock) {
lexer_SetMode(LEXER_SKIP_TO_ENDC);
} else {
executedIfBlock = !!$2;
if (!executedIfBlock)
lexer_SetMode(LEXER_SKIP_TO_ELIF);
}
}
;
else : T_POP_ELSE '\n' {
if (nIFDepth <= 0)
fatalerror("Found ELSE outside an IF construct\n");
if (executedIfBlock)
lexer_SetMode(LEXER_SKIP_TO_ENDC);
}
;
endc : T_POP_ENDC '\n' {
if (nIFDepth <= 0)
fatalerror("Found ENDC outside an IF construct\n");
nIFDepth--;
}
; ;
scoped_id : T_ID | T_LOCAL_ID ; scoped_id : T_ID | T_LOCAL_ID ;
@@ -460,10 +457,6 @@ simple_pseudoop : include
| printt | printt
| printv | printv
| printi | printi
| if
| elif
| else
| endc
| export | export
| db | db
| dw | dw
@@ -606,9 +599,9 @@ rept : T_POP_REPT uconst {
uint32_t nDefinitionLineNo = lexer_GetLineNo(); uint32_t nDefinitionLineNo = lexer_GetLineNo();
char const *body; char const *body;
size_t size; size_t size;
lexer_SkipToBlockEnd(T_POP_REPT, T_POP_ENDR, T_POP_ENDR, lexer_CaptureBlock(T_POP_REPT, T_POP_ENDR, &body, &size,
&body, &size, "REPT block"); "REPT block");
fstk_RunRept($2, nDefinitionLineNo); fstk_RunRept($2, nDefinitionLineNo, body, size);
} }
; ;
@@ -616,9 +609,9 @@ macrodef : T_LABEL ':' T_POP_MACRO {
int32_t nDefinitionLineNo = lexer_GetLineNo(); int32_t nDefinitionLineNo = lexer_GetLineNo();
char const *body; char const *body;
size_t size; size_t size;
lexer_SkipToBlockEnd(T_POP_MACRO, T_POP_ENDM, T_POP_ENDM, lexer_CaptureBlock(T_POP_MACRO, T_POP_ENDM, &body, &size,
&body, &size, "macro definition"); "macro definition");
sym_AddMacro($1, nDefinitionLineNo); sym_AddMacro($1, nDefinitionLineNo, body, size);
} }
; ;
@@ -786,72 +779,6 @@ printi : T_POP_PRINTI const { printf("%" PRId32, $2); }
printf : T_POP_PRINTF const { math_Print($2); } printf : T_POP_PRINTF const { math_Print($2); }
; ;
if : T_POP_IF const {
nIFDepth++;
if (!$2) {
/* The function is hardcoded to also stop on T_POP_ELSE and ENDC */
lexer_SkipToBlockEnd(T_POP_IF, T_POP_ENDC, T_POP_ELIF,
NULL, NULL, "if block");
skipElifs = false;
} else {
skipElifs = true;
}
}
;
elif : T_POP_ELIF const {
if (nIFDepth <= 0)
fatalerror("Found ELIF outside an IF construct\n");
if (skipElifs) {
/*
* Executed when ELIF is reached at the end of
* an IF or ELIF block for which the condition
* was true.
*
* Continue parsing at ENDC keyword
*/
lexer_SkipToBlockEnd(T_POP_IF, T_POP_ENDC, T_POP_ENDC,
NULL, NULL, "elif block");
} else {
/*
* Executed when ELIF is skipped to because the
* condition of the previous IF or ELIF block
* was false.
*/
if (!$2) {
/*
* Continue parsing after ELSE, or at
* ELIF or ENDC keyword.
*/
lexer_SkipToBlockEnd(T_POP_IF, T_POP_ENDC, T_POP_ELIF,
NULL, NULL, "elif block");
} else {
skipElifs = true;
}
}
}
;
else : T_POP_ELSE {
if (nIFDepth <= 0)
fatalerror("Found ELSE outside an IF construct\n");
/* Continue parsing at ENDC keyword */
lexer_SkipToBlockEnd(T_POP_IF, T_POP_ENDC, T_POP_ENDC,
NULL, NULL, "else block");
}
;
endc : T_POP_ENDC {
if (nIFDepth <= 0)
fatalerror("Found ENDC outside an IF construct\n");
nIFDepth--;
}
;
const_3bit : const { const_3bit : const {
int32_t value = $1; int32_t value = $1;

View File

@@ -41,7 +41,7 @@ static char IncludePaths[MAXINCPATHS][_MAX_PATH + 1];
static int32_t NextIncPath; static int32_t NextIncPath;
static uint32_t nMacroCount; static uint32_t nMacroCount;
static char *pCurrentREPTBlock; static char const *pCurrentREPTBlock;
static uint32_t nCurrentREPTBlockSize; static uint32_t nCurrentREPTBlockSize;
static uint32_t nCurrentREPTBlockCount; static uint32_t nCurrentREPTBlockCount;
static int32_t nCurrentREPTBodyFirstLine; static int32_t nCurrentREPTBodyFirstLine;
@@ -249,9 +249,11 @@ void fstk_Dump(void)
pLastFile->nLine); pLastFile->nLine);
pLastFile = pLastFile->next; pLastFile = pLastFile->next;
} }
char const *fileName = lexer_GetFileName();
fprintf(stderr, "%s(%" PRId32 ",%" PRId32 ")", if (fileName)
lexer_GetFileName(), lexer_GetLineNo(), lexer_GetColNo()); fprintf(stderr, "%s(%" PRId32 ",%" PRId32 "): ",
fileName, lexer_GetLineNo(), lexer_GetColNo());
} }
void fstk_DumpToStr(char *buf, size_t buflen) void fstk_DumpToStr(char *buf, size_t buflen)
@@ -425,15 +427,15 @@ void fstk_RunMacro(char *s, struct MacroArgs *args)
/* /*
* Set up a repeat block for parsing * Set up a repeat block for parsing
*/ */
void fstk_RunRept(uint32_t count, int32_t nReptLineNo) void fstk_RunRept(uint32_t count, int32_t nReptLineNo, char const *body, size_t size)
{ {
if (count) { if (count) {
pushcontext(); pushcontext();
macro_SetUniqueID(nMacroCount++); macro_SetUniqueID(nMacroCount++);
nCurrentREPTBlockCount = count; nCurrentREPTBlockCount = count;
nCurrentStatus = STAT_isREPTBlock; nCurrentStatus = STAT_isREPTBlock;
nCurrentREPTBlockSize = ulNewMacroSize; nCurrentREPTBlockSize = size;
pCurrentREPTBlock = tzNewMacro; pCurrentREPTBlock = body;
nCurrentREPTBodyFirstLine = nReptLineNo + 1; nCurrentREPTBodyFirstLine = nReptLineNo + 1;
} }
} }

File diff suppressed because it is too large Load Diff

View File

@@ -488,6 +488,7 @@ int main(int argc, char *argv[])
if (!state) if (!state)
fatalerror("Failed to open main file!\n"); fatalerror("Failed to open main file!\n");
lexer_Init();
lexer_SetState(state); lexer_SetState(state);
nStartClock = clock(); nStartClock = clock();

View File

@@ -210,8 +210,6 @@ void sym_Purge(char const *symName)
labelScope = NULL; labelScope = NULL;
hash_RemoveElement(symbols, symbol->name); hash_RemoveElement(symbols, symbol->name);
if (symbol->type == SYM_MACRO)
free(symbol->macro);
free(symbol); free(symbol);
} }
} }
@@ -230,7 +228,22 @@ uint32_t sym_GetPCValue(void)
} }
/* /*
* Return a constant symbols value * Return a constant symbol's value, assuming it's defined
*/
uint32_t sym_GetConstantSymValue(struct Symbol const *sym)
{
if (sym == PCSymbol)
return sym_GetPCValue();
else if (!sym_IsConstant(sym))
error("\"%s\" does not have a constant value\n", sym->name);
else
return sym_GetValue(sym);
return 0;
}
/*
* Return a constant symbol's value
*/ */
uint32_t sym_GetConstantValue(char const *s) uint32_t sym_GetConstantValue(char const *s)
{ {
@@ -238,12 +251,8 @@ uint32_t sym_GetConstantValue(char const *s)
if (sym == NULL) if (sym == NULL)
error("'%s' not defined\n", s); error("'%s' not defined\n", s);
else if (sym == PCSymbol)
return sym_GetPCValue();
else if (!sym_IsConstant(sym))
error("\"%s\" does not have a constant value\n", s);
else else
return sym_GetValue(sym); return sym_GetConstantSymValue(sym);
return 0; return 0;
} }
@@ -468,13 +477,13 @@ void sym_Export(char const *symName)
/* /*
* Add a macro definition * Add a macro definition
*/ */
struct Symbol *sym_AddMacro(char const *symName, int32_t defLineNo) struct Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char const *body, size_t size)
{ {
struct Symbol *sym = createNonrelocSymbol(symName); struct Symbol *sym = createNonrelocSymbol(symName);
sym->type = SYM_MACRO; sym->type = SYM_MACRO;
sym->macroSize = ulNewMacroSize; sym->macroSize = size;
sym->macro = tzNewMacro; sym->macro = body;
updateSymbolFilename(sym); updateSymbolFilename(sym);
/* /*
* The symbol is created at the line after the `endm`, * The symbol is created at the line after the `endm`,

View File

@@ -6,6 +6,7 @@
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
*/ */
#include <ctype.h>
#include <stdint.h> #include <stdint.h>
#include "asm/main.h" #include "asm/main.h"
@@ -27,6 +28,37 @@ uint32_t calchash(const char *s)
return hash; return hash;
} }
char const *print(char c)
{
static char buf[5]; /* '\xNN' + '\0' */
if (isprint(c)) {
buf[0] = c;
buf[1] = '\0';
return buf;
}
buf[0] = '\\';
switch (c) {
case '\n':
buf[1] = 'n';
break;
case '\r':
buf[1] = 'r';
break;
case '\t':
buf[1] = 't';
break;
default: /* Print as hex */
buf[1] = 'x';
sprintf(&buf[2], "%02hhx", c);
return buf;
}
buf[2] = '\0';
return buf;
}
size_t readUTF8Char(uint8_t *dest, char const *src) size_t readUTF8Char(uint8_t *dest, char const *src)
{ {
uint32_t state = 0; uint32_t state = 0;

View File

@@ -198,14 +198,14 @@ void processWarningFlag(char const *flag)
warnx("Unknown warning `%s`", flag); warnx("Unknown warning `%s`", flag);
} }
void verror(const char *fmt, va_list args, char const *flag) void printDiag(const char *fmt, va_list args, char const *type,
char const *flagfmt, char const *flag)
{ {
fputs("ERROR: ", stderr); fputs(type, stderr);
fstk_Dump(); fstk_Dump();
fprintf(stderr, flag ? ": [-Werror=%s]\n " : ":\n ", flag); fprintf(stderr, flagfmt, flag);
vfprintf(stderr, fmt, args); vfprintf(stderr, fmt, args);
lexer_DumpStringExpansions(); lexer_DumpStringExpansions();
nbErrors++;
} }
void error(const char *fmt, ...) void error(const char *fmt, ...)
@@ -213,8 +213,9 @@ void error(const char *fmt, ...)
va_list args; va_list args;
va_start(args, fmt); va_start(args, fmt);
verror(fmt, args, NULL); printDiag(fmt, args, "ERROR: ", "\n ", NULL);
va_end(args); va_end(args);
nbErrors++;
} }
noreturn_ void fatalerror(const char *fmt, ...) noreturn_ void fatalerror(const char *fmt, ...)
@@ -222,7 +223,7 @@ noreturn_ void fatalerror(const char *fmt, ...)
va_list args; va_list args;
va_start(args, fmt); va_start(args, fmt);
verror(fmt, args, NULL); printDiag(fmt, args, "FATAL: ", "\n ", NULL);
va_end(args); va_end(args);
exit(1); exit(1);
@@ -240,7 +241,7 @@ void warning(enum WarningID id, char const *fmt, ...)
return; return;
case WARNING_ERROR: case WARNING_ERROR:
verror(fmt, args, flag); printDiag(fmt, args, "ERROR: ", "[-Werror=%s]\n ", flag);
va_end(args); va_end(args);
return; return;
@@ -252,11 +253,7 @@ void warning(enum WarningID id, char const *fmt, ...)
break; break;
} }
fputs("warning: ", stderr); printDiag(fmt, args, "warning: ", "[-W%s]\n ", flag);
fstk_Dump();
fprintf(stderr, ": [-W%s]\n ", flag);
vfprintf(stderr, fmt, args);
lexer_DumpStringExpansions();
va_end(args); va_end(args);
} }