Implement almost all functionality

Add keywords and identifiers
Add comments
Add number literals
Add strings
Add a lot of new tokens
Add (and clean up) IF etc.
Improve reporting of unexpected chars / garbage bytes
Fix bug with and improved error messages when failing to open file
Add verbose-level messages about how files are opened
Enforce that files finish with a newline
Fix chars returned not being cast to unsigned char (may conflict w/ EOF)
Return null path when no file is open, rather than crash
Unify and improve error printing slightly

Known to be missing: macro expansion, REPT blocks, EQUS expansions
This commit is contained in:
ISSOtm
2020-07-28 22:06:03 +02:00
parent 71f8871702
commit 4c9a929a14
12 changed files with 1139 additions and 235 deletions

View File

@@ -27,9 +27,5 @@
extern uint32_t nTotalLines;
extern uint32_t nIFDepth;
extern struct Section *pCurrentSection;
extern bool oDontExpandStrings;
size_t symvaluetostring(char *dest, size_t maxLength, char *sym,
const char *mode);
#endif /* RGBDS_ASM_ASM_H */

View File

@@ -32,7 +32,7 @@ struct sContext {
uint32_t uniqueID;
int32_t nLine;
uint32_t nStatus;
char *pREPTBlock;
char const *pREPTBlock;
uint32_t nREPTBlockCount;
uint32_t nREPTBlockSize;
int32_t nREPTBodyFirstLine;
@@ -47,7 +47,7 @@ void fstk_Dump(void);
void fstk_DumpToStr(char *buf, size_t len);
void fstk_AddIncludePath(char *s);
void fstk_RunMacro(char *s, struct MacroArgs *args);
void fstk_RunRept(uint32_t count, int32_t nReptLineNo);
void fstk_RunRept(uint32_t count, int32_t nReptLineNo, char const *body, size_t size);
/**
* @param path The user-provided file name
* @param fullPath The address of a pointer, which will be made to point at the full path

View File

@@ -33,10 +33,13 @@ static inline void lexer_SetStateAtEOL(struct LexerState *state)
struct LexerState *lexer_OpenFile(char const *path);
struct LexerState *lexer_OpenFileView(void);
void lexer_DeleteState(struct LexerState *state);
void lexer_Init(void);
enum LexerMode {
LEXER_NORMAL,
LEXER_RAW
LEXER_RAW,
LEXER_SKIP_TO_ELIF,
LEXER_SKIP_TO_ENDC
};
void lexer_SetMode(enum LexerMode mode);
@@ -47,7 +50,7 @@ uint32_t lexer_GetLineNo(void);
uint32_t lexer_GetColNo(void);
void lexer_DumpStringExpansions(void);
int yylex(void);
void lexer_SkipToBlockEnd(int blockStartToken, int blockEndToken, int endToken,
char const **capture, size_t *size, char const *name);
void lexer_CaptureBlock(int blockStartToken, int blockEndToken, char const **capture, size_t *size,
char const *name);
#endif /* RGBDS_ASM_LEXER_H */

View File

@@ -44,8 +44,8 @@ struct Symbol {
int32_t (*callback)(void);
};
struct { /* For SYM_MACRO */
uint32_t macroSize;
char *macro;
size_t macroSize;
char const *macro;
};
};
@@ -114,9 +114,10 @@ void sym_Export(char const *symName);
struct Symbol *sym_AddEqu(char const *symName, int32_t value);
struct Symbol *sym_AddSet(char const *symName, int32_t value);
uint32_t sym_GetPCValue(void);
uint32_t sym_GetConstantSymValue(struct Symbol const *sym);
uint32_t sym_GetConstantValue(char const *s);
struct Symbol *sym_FindSymbol(char const *symName);
struct Symbol *sym_AddMacro(char const *symName, int32_t defLineNo);
struct Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char const *body, size_t size);
struct Symbol *sym_Ref(char const *symName);
struct Symbol *sym_AddString(char const *symName, char const *value);
uint32_t sym_GetDefinedValue(char const *s);

View File

@@ -12,6 +12,7 @@
#include <stdint.h>
uint32_t calchash(const char *s);
char const *print(char c);
size_t readUTF8Char(uint8_t *dest, char const *src);
#endif /* RGBDS_UTIL_H */

View File

@@ -39,63 +39,7 @@ uint32_t nListCountEmpty;
char *tzNewMacro;
uint32_t ulNewMacroSize;
int32_t nPCOffset;
bool skipElifs; /* If this is set, ELIFs cannot be executed anymore */
size_t symvaluetostring(char *dest, size_t maxLength, char *symName,
const char *mode)
{
size_t length;
struct Symbol *sym = sym_FindSymbol(symName);
if (sym && sym->type == SYM_EQUS) {
char const *src = sym_GetStringValue(sym);
size_t i;
if (mode)
error("Print types are only allowed for numbers\n");
for (i = 0; src[i] != 0; i++) {
if (i >= maxLength)
fatalerror("Symbol value too long to fit buffer\n");
dest[i] = src[i];
}
length = i;
} else {
uint32_t value = sym_GetConstantValue(symName);
int32_t fullLength;
/* Special cheat for binary */
if (mode && !mode[0]) {
char binary[33]; /* 32 bits + 1 terminator */
char *write_ptr = binary + 32;
fullLength = 0;
binary[32] = 0;
do {
*(--write_ptr) = (value & 1) + '0';
value >>= 1;
fullLength++;
} while(value);
strncpy(dest, write_ptr, maxLength + 1);
} else {
fullLength = snprintf(dest, maxLength + 1,
mode ? mode : "$%" PRIX32,
value);
}
if (fullLength < 0) {
fatalerror("snprintf encoding error\n");
} else {
length = (size_t)fullLength;
if (length > maxLength)
fatalerror("Symbol value too long to fit buffer\n");
}
}
return length;
}
bool executedIfBlock; /* If this is set, ELIFs cannot be executed anymore */
static uint32_t str2int2(uint8_t *s, int32_t length)
{
@@ -388,16 +332,69 @@ lines : /* empty */
| lines {
nListCountEmpty = 0;
nPCOffset = 0;
} line '\n' {
} line {
nTotalLines++;
}
;
line : label
| label cpu_command
| label macro
| label simple_pseudoop
| pseudoop
line : label '\n'
| label cpu_command '\n'
| label macro '\n'
| label simple_pseudoop '\n'
| pseudoop '\n'
| conditional /* May not necessarily be followed by a newline, see below */
;
/*
* For "logistical" reasons, conditionals must manage newlines themselves.
* This is because we need to switch the lexer's mode *after* the newline has been read,
* and to avoid causing some grammar conflicts (token reducing is finicky).
* This is DEFINITELY one of the more FRAGILE parts of the codebase, handle with care.
*/
conditional : if
/* It's important that all of these require being at line start for `skipIfBlock` */
| elif
| else
| endc
;
if : T_POP_IF const '\n' {
nIFDepth++;
executedIfBlock = !!$2;
if (!executedIfBlock)
lexer_SetMode(LEXER_SKIP_TO_ELIF);
}
;
elif : T_POP_ELIF const '\n' {
if (nIFDepth <= 0)
fatalerror("Found ELIF outside an IF construct\n");
if (executedIfBlock) {
lexer_SetMode(LEXER_SKIP_TO_ENDC);
} else {
executedIfBlock = !!$2;
if (!executedIfBlock)
lexer_SetMode(LEXER_SKIP_TO_ELIF);
}
}
;
else : T_POP_ELSE '\n' {
if (nIFDepth <= 0)
fatalerror("Found ELSE outside an IF construct\n");
if (executedIfBlock)
lexer_SetMode(LEXER_SKIP_TO_ENDC);
}
;
endc : T_POP_ENDC '\n' {
if (nIFDepth <= 0)
fatalerror("Found ENDC outside an IF construct\n");
nIFDepth--;
}
;
scoped_id : T_ID | T_LOCAL_ID ;
@@ -460,10 +457,6 @@ simple_pseudoop : include
| printt
| printv
| printi
| if
| elif
| else
| endc
| export
| db
| dw
@@ -606,9 +599,9 @@ rept : T_POP_REPT uconst {
uint32_t nDefinitionLineNo = lexer_GetLineNo();
char const *body;
size_t size;
lexer_SkipToBlockEnd(T_POP_REPT, T_POP_ENDR, T_POP_ENDR,
&body, &size, "REPT block");
fstk_RunRept($2, nDefinitionLineNo);
lexer_CaptureBlock(T_POP_REPT, T_POP_ENDR, &body, &size,
"REPT block");
fstk_RunRept($2, nDefinitionLineNo, body, size);
}
;
@@ -616,9 +609,9 @@ macrodef : T_LABEL ':' T_POP_MACRO {
int32_t nDefinitionLineNo = lexer_GetLineNo();
char const *body;
size_t size;
lexer_SkipToBlockEnd(T_POP_MACRO, T_POP_ENDM, T_POP_ENDM,
&body, &size, "macro definition");
sym_AddMacro($1, nDefinitionLineNo);
lexer_CaptureBlock(T_POP_MACRO, T_POP_ENDM, &body, &size,
"macro definition");
sym_AddMacro($1, nDefinitionLineNo, body, size);
}
;
@@ -786,72 +779,6 @@ printi : T_POP_PRINTI const { printf("%" PRId32, $2); }
printf : T_POP_PRINTF const { math_Print($2); }
;
if : T_POP_IF const {
nIFDepth++;
if (!$2) {
/* The function is hardcoded to also stop on T_POP_ELSE and ENDC */
lexer_SkipToBlockEnd(T_POP_IF, T_POP_ENDC, T_POP_ELIF,
NULL, NULL, "if block");
skipElifs = false;
} else {
skipElifs = true;
}
}
;
elif : T_POP_ELIF const {
if (nIFDepth <= 0)
fatalerror("Found ELIF outside an IF construct\n");
if (skipElifs) {
/*
* Executed when ELIF is reached at the end of
* an IF or ELIF block for which the condition
* was true.
*
* Continue parsing at ENDC keyword
*/
lexer_SkipToBlockEnd(T_POP_IF, T_POP_ENDC, T_POP_ENDC,
NULL, NULL, "elif block");
} else {
/*
* Executed when ELIF is skipped to because the
* condition of the previous IF or ELIF block
* was false.
*/
if (!$2) {
/*
* Continue parsing after ELSE, or at
* ELIF or ENDC keyword.
*/
lexer_SkipToBlockEnd(T_POP_IF, T_POP_ENDC, T_POP_ELIF,
NULL, NULL, "elif block");
} else {
skipElifs = true;
}
}
}
;
else : T_POP_ELSE {
if (nIFDepth <= 0)
fatalerror("Found ELSE outside an IF construct\n");
/* Continue parsing at ENDC keyword */
lexer_SkipToBlockEnd(T_POP_IF, T_POP_ENDC, T_POP_ENDC,
NULL, NULL, "else block");
}
;
endc : T_POP_ENDC {
if (nIFDepth <= 0)
fatalerror("Found ENDC outside an IF construct\n");
nIFDepth--;
}
;
const_3bit : const {
int32_t value = $1;

View File

@@ -41,7 +41,7 @@ static char IncludePaths[MAXINCPATHS][_MAX_PATH + 1];
static int32_t NextIncPath;
static uint32_t nMacroCount;
static char *pCurrentREPTBlock;
static char const *pCurrentREPTBlock;
static uint32_t nCurrentREPTBlockSize;
static uint32_t nCurrentREPTBlockCount;
static int32_t nCurrentREPTBodyFirstLine;
@@ -249,9 +249,11 @@ void fstk_Dump(void)
pLastFile->nLine);
pLastFile = pLastFile->next;
}
char const *fileName = lexer_GetFileName();
fprintf(stderr, "%s(%" PRId32 ",%" PRId32 ")",
lexer_GetFileName(), lexer_GetLineNo(), lexer_GetColNo());
if (fileName)
fprintf(stderr, "%s(%" PRId32 ",%" PRId32 "): ",
fileName, lexer_GetLineNo(), lexer_GetColNo());
}
void fstk_DumpToStr(char *buf, size_t buflen)
@@ -425,15 +427,15 @@ void fstk_RunMacro(char *s, struct MacroArgs *args)
/*
* Set up a repeat block for parsing
*/
void fstk_RunRept(uint32_t count, int32_t nReptLineNo)
void fstk_RunRept(uint32_t count, int32_t nReptLineNo, char const *body, size_t size)
{
if (count) {
pushcontext();
macro_SetUniqueID(nMacroCount++);
nCurrentREPTBlockCount = count;
nCurrentStatus = STAT_isREPTBlock;
nCurrentREPTBlockSize = ulNewMacroSize;
pCurrentREPTBlock = tzNewMacro;
nCurrentREPTBlockSize = size;
pCurrentREPTBlock = body;
nCurrentREPTBodyFirstLine = nReptLineNo + 1;
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -488,6 +488,7 @@ int main(int argc, char *argv[])
if (!state)
fatalerror("Failed to open main file!\n");
lexer_Init();
lexer_SetState(state);
nStartClock = clock();

View File

@@ -210,8 +210,6 @@ void sym_Purge(char const *symName)
labelScope = NULL;
hash_RemoveElement(symbols, symbol->name);
if (symbol->type == SYM_MACRO)
free(symbol->macro);
free(symbol);
}
}
@@ -230,7 +228,22 @@ uint32_t sym_GetPCValue(void)
}
/*
* Return a constant symbols value
* Return a constant symbol's value, assuming it's defined
*/
uint32_t sym_GetConstantSymValue(struct Symbol const *sym)
{
if (sym == PCSymbol)
return sym_GetPCValue();
else if (!sym_IsConstant(sym))
error("\"%s\" does not have a constant value\n", sym->name);
else
return sym_GetValue(sym);
return 0;
}
/*
* Return a constant symbol's value
*/
uint32_t sym_GetConstantValue(char const *s)
{
@@ -238,12 +251,8 @@ uint32_t sym_GetConstantValue(char const *s)
if (sym == NULL)
error("'%s' not defined\n", s);
else if (sym == PCSymbol)
return sym_GetPCValue();
else if (!sym_IsConstant(sym))
error("\"%s\" does not have a constant value\n", s);
else
return sym_GetValue(sym);
return sym_GetConstantSymValue(sym);
return 0;
}
@@ -468,13 +477,13 @@ void sym_Export(char const *symName)
/*
* Add a macro definition
*/
struct Symbol *sym_AddMacro(char const *symName, int32_t defLineNo)
struct Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char const *body, size_t size)
{
struct Symbol *sym = createNonrelocSymbol(symName);
sym->type = SYM_MACRO;
sym->macroSize = ulNewMacroSize;
sym->macro = tzNewMacro;
sym->macroSize = size;
sym->macro = body;
updateSymbolFilename(sym);
/*
* The symbol is created at the line after the `endm`,

View File

@@ -6,6 +6,7 @@
* SPDX-License-Identifier: MIT
*/
#include <ctype.h>
#include <stdint.h>
#include "asm/main.h"
@@ -27,6 +28,37 @@ uint32_t calchash(const char *s)
return hash;
}
char const *print(char c)
{
static char buf[5]; /* '\xNN' + '\0' */
if (isprint(c)) {
buf[0] = c;
buf[1] = '\0';
return buf;
}
buf[0] = '\\';
switch (c) {
case '\n':
buf[1] = 'n';
break;
case '\r':
buf[1] = 'r';
break;
case '\t':
buf[1] = 't';
break;
default: /* Print as hex */
buf[1] = 'x';
sprintf(&buf[2], "%02hhx", c);
return buf;
}
buf[2] = '\0';
return buf;
}
size_t readUTF8Char(uint8_t *dest, char const *src)
{
uint32_t state = 0;

View File

@@ -198,14 +198,14 @@ void processWarningFlag(char const *flag)
warnx("Unknown warning `%s`", flag);
}
void verror(const char *fmt, va_list args, char const *flag)
void printDiag(const char *fmt, va_list args, char const *type,
char const *flagfmt, char const *flag)
{
fputs("ERROR: ", stderr);
fputs(type, stderr);
fstk_Dump();
fprintf(stderr, flag ? ": [-Werror=%s]\n " : ":\n ", flag);
fprintf(stderr, flagfmt, flag);
vfprintf(stderr, fmt, args);
lexer_DumpStringExpansions();
nbErrors++;
}
void error(const char *fmt, ...)
@@ -213,8 +213,9 @@ void error(const char *fmt, ...)
va_list args;
va_start(args, fmt);
verror(fmt, args, NULL);
printDiag(fmt, args, "ERROR: ", "\n ", NULL);
va_end(args);
nbErrors++;
}
noreturn_ void fatalerror(const char *fmt, ...)
@@ -222,7 +223,7 @@ noreturn_ void fatalerror(const char *fmt, ...)
va_list args;
va_start(args, fmt);
verror(fmt, args, NULL);
printDiag(fmt, args, "FATAL: ", "\n ", NULL);
va_end(args);
exit(1);
@@ -240,7 +241,7 @@ void warning(enum WarningID id, char const *fmt, ...)
return;
case WARNING_ERROR:
verror(fmt, args, flag);
printDiag(fmt, args, "ERROR: ", "[-Werror=%s]\n ", flag);
va_end(args);
return;
@@ -252,11 +253,7 @@ void warning(enum WarningID id, char const *fmt, ...)
break;
}
fputs("warning: ", stderr);
fstk_Dump();
fprintf(stderr, ": [-W%s]\n ", flag);
vfprintf(stderr, fmt, args);
lexer_DumpStringExpansions();
printDiag(fmt, args, "warning: ", "[-W%s]\n ", flag);
va_end(args);
}