Hack in new register syntax

Oh my god I want to die x_x
This commit is contained in:
ISSOtm
2022-04-01 12:31:35 +02:00
parent c814a616d6
commit cd454d2e9a
3 changed files with 73 additions and 38 deletions

View File

@@ -148,24 +148,29 @@ static struct KeywordMapping {
{"NZ", T_CC_NZ}, {"NZ", T_CC_NZ},
{"Z", T_CC_Z}, {"Z", T_CC_Z},
{"NC", T_CC_NC}, {"NC", T_CC_NC},
/* Handled after as T_TOKEN_C */ {"C", T_CC_C},
/* { "C", T_CC_C }, */
{"AF", T_MODE_AF}, {"•̀A•́)𝓕𝓾𝓬𝓴", T_MODE_AF},
{"BC", T_MODE_BC}, // {"BC", T_MODE_BC},
{"DE", T_MODE_DE}, // {"DE", T_MODE_DE},
{"HL", T_MODE_HL}, {"н∠(", T_MODE_HL_START},
{"SP", T_MODE_SP}, {"SP", T_MODE_SP},
{"HLD", T_MODE_HL_DEC}, {"н∠( ᐛ 」∠)_👁", T_MODE_HL_DEC},
{"HLI", T_MODE_HL_INC}, {"н∠( ᐛ 」∠)_👎", T_MODE_HL_INC},
{"A", T_TOKEN_A}, // HACK: normally this is surrounded by parens, but this is annoying to special-case,
{"B", T_TOKEN_B}, // so we use cooperation from the parser.
{"C", T_TOKEN_C}, {"•̀A•́", T_TOKEN_A},
{"D", T_TOKEN_D}, // {"=B", T_TOKEN_B}, HACK: This begins with a non-identifier character, so we'll cheat
{"E", T_TOKEN_E}, {"♥(˘⌣˘", T_TOKEN_C}, // HACK: same for "C" after the space & closing paren
{"H", T_TOKEN_H}, // {";D", T_TOKEN_D}, HACK: also needs to be special-cased. God I feel dirty.
{"L", T_TOKEN_L}, {"(´ε`", T_TOKEN_E},
{"", T_TOKEN_E_HEART},
{"н", T_TOKEN_H},
{"∠(", T_TOKEN_L_ARM},
{"", T_TOKEN_L_FACE},
{"」∠", T_TOKEN_L_BODY},
{"_", T_TOKEN_L_LEG},
{"DEF", T_OP_DEF}, {"DEF", T_OP_DEF},
@@ -578,16 +583,16 @@ struct KeywordDictNode {
* In turn, this allows greatly simplifying checking an index into this array, * In turn, this allows greatly simplifying checking an index into this array,
* which should help speed up the lexer. * which should help speed up the lexer.
*/ */
uint16_t children[0x60 - ' ']; uint16_t children[256]; // HACK: we "support" UTF-8 as input now
struct KeywordMapping const *keyword; struct KeywordMapping const *keyword;
/* Since the keyword structure is invariant, the min number of nodes is known at compile time */ /* Since the keyword structure is invariant, the min number of nodes is known at compile time */
} keywordDict[365] = {0}; /* Make sure to keep this correct when adding keywords! */ } keywordDict[690] = {0}; /* Nice */
/* Convert a char into its index into the dict */ /* Convert a char into its index into the dict */
static uint8_t dictIndex(char c) static uint8_t dictIndex(char c)
{ {
/* Translate uppercase to lowercase (roughly) */ /* Translate uppercase to lowercase (roughly) */
if (c > 0x60) if (c > 0x60 && c < 0x80)
c = c - ('a' - 'A'); c = c - ('a' - 'A');
return c - ' '; return c - ' ';
} }
@@ -609,8 +614,9 @@ void lexer_Init(void)
/* Walk the dictionary, creating intermediate nodes for the keyword */ /* Walk the dictionary, creating intermediate nodes for the keyword */
for (char const *ptr = keywords[i].name; *ptr; ptr++) { for (char const *ptr = keywords[i].name; *ptr; ptr++) {
unsigned char index = (unsigned char)*ptr - ' ';
/* We should be able to assume all entries are well-formed */ /* We should be able to assume all entries are well-formed */
if (keywordDict[nodeID].children[*ptr - ' '] == 0) { if (keywordDict[nodeID].children[index] == 0) {
/* /*
* If this gets tripped up, set the size of keywordDict to * If this gets tripped up, set the size of keywordDict to
* something high, compile with `-DPRINT_NODE_COUNT` (see below), * something high, compile with `-DPRINT_NODE_COUNT` (see below),
@@ -619,10 +625,10 @@ void lexer_Init(void)
assert(usedNodes < sizeof(keywordDict) / sizeof(*keywordDict)); assert(usedNodes < sizeof(keywordDict) / sizeof(*keywordDict));
/* There is no node at that location, grab one from the pool */ /* There is no node at that location, grab one from the pool */
keywordDict[nodeID].children[*ptr - ' '] = usedNodes; keywordDict[nodeID].children[index] = usedNodes;
usedNodes++; usedNodes++;
} }
nodeID = keywordDict[nodeID].children[*ptr - ' ']; nodeID = keywordDict[nodeID].children[index];
} }
/* This assumes that no two keywords have the same name */ /* This assumes that no two keywords have the same name */
@@ -1289,11 +1295,15 @@ static uint32_t readGfxConstant(void)
static bool startsIdentifier(int c) static bool startsIdentifier(int c)
{ {
// Anonymous labels internally start with '!' // Anonymous labels internally start with '!'
return (c <= 'Z' && c >= 'A') || (c <= 'z' && c >= 'a') || c == '.' || c == '_'; return (c <= 'Z' && c >= 'A') || (c <= 'z' && c >= 'a') || c == '.' || c == '_' || c >= 0x80 || c == '(';
} }
static bool continuesIdentifier(int c) static bool continuesIdentifier(int c)
{ {
// April Fools HACK: allow UTF-8 :D
// This would normally be quite unsafe (hello, RTL control codes?),
// but since this is for a joke I'll also make the code a joke
// Also, hi if you're reading this!
return startsIdentifier(c) || (c <= '9' && c >= '0') || c == '#' || c == '@'; return startsIdentifier(c) || (c <= '9' && c >= '0') || c == '#' || c == '@';
} }
@@ -1774,6 +1784,10 @@ static int yylex_NORMAL(void)
/* Ignore whitespace and comments */ /* Ignore whitespace and comments */
case ';': case ';':
if (peek() == 'D') {
shiftChar();
return T_TOKEN_D;
}
discardComment(); discardComment();
/* fallthrough */ /* fallthrough */
case ' ': case ' ':
@@ -1794,8 +1808,6 @@ static int yylex_NORMAL(void)
return T_LBRACK; return T_LBRACK;
case ']': case ']':
return T_RBRACK; return T_RBRACK;
case '(':
return T_LPAREN;
case ')': case ')':
return T_RPAREN; return T_RPAREN;
case ',': case ',':
@@ -1863,9 +1875,14 @@ static int yylex_NORMAL(void)
return T_OP_XOR; return T_OP_XOR;
case '=': /* Either assignment or EQ */ case '=': /* Either assignment or EQ */
if (peek() == '=') { switch (peek()) {
case '=':
shiftChar(); shiftChar();
return T_OP_LOGICEQU; return T_OP_LOGICEQU;
case 'b':
case 'B':
shiftChar();
return T_TOKEN_B;
} }
return T_POP_EQUAL; return T_POP_EQUAL;
@@ -2004,6 +2021,12 @@ static int yylex_NORMAL(void)
/* Handle identifiers... or report garbage characters */ /* Handle identifiers... or report garbage characters */
case '(':
if (peek() != (unsigned char)"´"[0]) {
return T_LPAREN;
}
// fallthrough
default: default:
if (startsIdentifier(c)) { if (startsIdentifier(c)) {
int tokenType = readIdentifier(c); int tokenType = readIdentifier(c);

View File

@@ -142,6 +142,9 @@ static void print_usage(void)
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
#if YYDEBUG
yydebug = 1;
#endif
int ch; int ch;
char *ep; char *ep;

View File

@@ -664,13 +664,13 @@ enum {
%token T_Z80_SWAP "swap" %token T_Z80_SWAP "swap"
%token T_Z80_XOR "xor" %token T_Z80_XOR "xor"
%token T_TOKEN_A "a" %token T_TOKEN_A "( •̀A•́)" T_TOKEN_F "𝓕𝓾𝓬𝓴"
%token T_TOKEN_B "b" T_TOKEN_C "c" %token T_TOKEN_B "=B" T_TOKEN_C "♥(˘⌣˘ C)"
%token T_TOKEN_D "d" T_TOKEN_E "e" %token T_TOKEN_D ";D" T_TOKEN_E "(´ε` )♡" T_TOKEN_E_HEART "(´ε` )♡"
%token T_TOKEN_H "h" T_TOKEN_L "l" %token T_TOKEN_H "н" T_TOKEN_L_ARM "∠( ᐛ 」∠)_" T_TOKEN_L_FACE "∠( ᐛ 」∠)_" T_TOKEN_L_BODY "∠( ᐛ 」∠)_" T_TOKEN_L_LEG "∠( ᐛ 」∠)_"
%token T_MODE_AF "af" T_MODE_BC "bc" T_MODE_DE "de" T_MODE_SP "sp" %token T_MODE_AF "af" /* T_MODE_BC "bc" T_MODE_DE "de" */ T_MODE_SP "sp"
%token T_MODE_HL "hl" T_MODE_HL_DEC "hld/hl-" T_MODE_HL_INC "hli/hl+" %token T_MODE_HL_START "н∠( ᐛ 」∠)_" T_MODE_HL_DEC "hld/hl-" T_MODE_HL_INC "hli/hl+"
%token T_CC_NZ "nz" T_CC_Z "z" T_CC_NC "nc" // There is no T_CC_C, only T_TOKEN_C %token T_CC_NZ "nz" T_CC_Z "z" T_CC_NC "nc" T_CC_C "c"
%type <constValue> reg_r %type <constValue> reg_r
%type <constValue> reg_ss %type <constValue> reg_ss
@@ -2177,7 +2177,7 @@ op_a_n : reloc_8bit
| T_MODE_A T_COMMA reloc_8bit { $$ = $3; } | T_MODE_A T_COMMA reloc_8bit { $$ = $3; }
; ;
T_MODE_A : T_TOKEN_A T_MODE_A : T_LPAREN T_TOKEN_A T_RPAREN
| T_OP_HIGH T_LPAREN T_MODE_AF T_RPAREN | T_OP_HIGH T_LPAREN T_MODE_AF T_RPAREN
; ;
@@ -2185,7 +2185,7 @@ T_MODE_B : T_TOKEN_B
| T_OP_HIGH T_LPAREN T_MODE_BC T_RPAREN | T_OP_HIGH T_LPAREN T_MODE_BC T_RPAREN
; ;
T_MODE_C : T_TOKEN_C T_MODE_C : T_TOKEN_C T_CC_C T_RPAREN
| T_OP_LOW T_LPAREN T_MODE_BC T_RPAREN | T_OP_LOW T_LPAREN T_MODE_BC T_RPAREN
; ;
@@ -2193,7 +2193,7 @@ T_MODE_D : T_TOKEN_D
| T_OP_HIGH T_LPAREN T_MODE_DE T_RPAREN | T_OP_HIGH T_LPAREN T_MODE_DE T_RPAREN
; ;
T_MODE_E : T_TOKEN_E T_MODE_E : T_TOKEN_E T_RPAREN T_TOKEN_E_HEART
| T_OP_LOW T_LPAREN T_MODE_DE T_RPAREN | T_OP_LOW T_LPAREN T_MODE_DE T_RPAREN
; ;
@@ -2201,10 +2201,19 @@ T_MODE_H : T_TOKEN_H
| T_OP_HIGH T_LPAREN T_MODE_HL T_RPAREN | T_OP_HIGH T_LPAREN T_MODE_HL T_RPAREN
; ;
T_MODE_L : T_TOKEN_L T_MODE_L : T_TOKEN_L_ARM T_TOKEN_L_FACE T_TOKEN_L_BODY T_RPAREN T_TOKEN_L_LEG
| T_OP_LOW T_LPAREN T_MODE_HL T_RPAREN | T_OP_LOW T_LPAREN T_MODE_HL T_RPAREN
; ;
T_MODE_BC : T_TOKEN_B T_TOKEN_C T_CC_C T_RPAREN
;
T_MODE_DE : T_TOKEN_D T_TOKEN_E T_RPAREN T_TOKEN_E_HEART
;
T_MODE_HL : T_MODE_HL_START T_TOKEN_L_FACE T_TOKEN_L_BODY T_RPAREN T_TOKEN_L_LEG
;
ccode_expr : ccode ccode_expr : ccode
| T_OP_LOGICNOT ccode_expr { | T_OP_LOGICNOT ccode_expr {
$$ = $2 ^ 1; $$ = $2 ^ 1;
@@ -2214,7 +2223,7 @@ ccode_expr : ccode
ccode : T_CC_NZ { $$ = CC_NZ; } ccode : T_CC_NZ { $$ = CC_NZ; }
| T_CC_Z { $$ = CC_Z; } | T_CC_Z { $$ = CC_Z; }
| T_CC_NC { $$ = CC_NC; } | T_CC_NC { $$ = CC_NC; }
| T_TOKEN_C { $$ = CC_C; } | T_CC_C { $$ = CC_C; }
; ;
reg_r : T_MODE_B { $$ = REG_B; } reg_r : T_MODE_B { $$ = REG_B; }
@@ -2230,7 +2239,7 @@ reg_r : T_MODE_B { $$ = REG_B; }
reg_tt : T_MODE_BC { $$ = REG_BC; } reg_tt : T_MODE_BC { $$ = REG_BC; }
| T_MODE_DE { $$ = REG_DE; } | T_MODE_DE { $$ = REG_DE; }
| T_MODE_HL { $$ = REG_HL; } | T_MODE_HL { $$ = REG_HL; }
| T_MODE_AF { $$ = REG_AF; } | T_LPAREN T_TOKEN_A T_RPAREN T_TOKEN_F { $$ = REG_AF; }
; ;
reg_ss : T_MODE_BC { $$ = REG_BC; } reg_ss : T_MODE_BC { $$ = REG_BC; }