From cd454d2e9aa7ced938a7457b85498757c54d12e7 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Fri, 1 Apr 2022 12:31:35 +0200 Subject: [PATCH] Hack in new register syntax Oh my god I want to die x_x --- src/asm/lexer.c | 73 +++++++++++++++++++++++++++++++----------------- src/asm/main.c | 3 ++ src/asm/parser.y | 35 ++++++++++++++--------- 3 files changed, 73 insertions(+), 38 deletions(-) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index 19c98edd..36c0cf7a 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -148,24 +148,29 @@ static struct KeywordMapping { {"NZ", T_CC_NZ}, {"Z", T_CC_Z}, {"NC", T_CC_NC}, - /* Handled after as T_TOKEN_C */ - /* { "C", T_CC_C }, */ + {"C", T_CC_C}, - {"AF", T_MODE_AF}, - {"BC", T_MODE_BC}, - {"DE", T_MODE_DE}, - {"HL", T_MODE_HL}, + {"•̀A•́)𝓕𝓾𝓬𝓴", T_MODE_AF}, + // {"BC", T_MODE_BC}, + // {"DE", T_MODE_DE}, + {"н∠(", T_MODE_HL_START}, {"SP", T_MODE_SP}, - {"HLD", T_MODE_HL_DEC}, - {"HLI", T_MODE_HL_INC}, + {"н∠( ᐛ 」∠)_👁", T_MODE_HL_DEC}, + {"н∠( ᐛ 」∠)_👎", T_MODE_HL_INC}, - {"A", T_TOKEN_A}, - {"B", T_TOKEN_B}, - {"C", T_TOKEN_C}, - {"D", T_TOKEN_D}, - {"E", T_TOKEN_E}, - {"H", T_TOKEN_H}, - {"L", T_TOKEN_L}, + // HACK: normally this is surrounded by parens, but this is annoying to special-case, + // so we use cooperation from the parser. + {"•̀A•́", T_TOKEN_A}, + // {"=B", T_TOKEN_B}, HACK: This begins with a non-identifier character, so we'll cheat + {"♥(˘⌣˘", T_TOKEN_C}, // HACK: same for "C" after the space & closing paren + // {";D", T_TOKEN_D}, HACK: also needs to be special-cased. God I feel dirty. + {"(´ε`", T_TOKEN_E}, + {"♡", T_TOKEN_E_HEART}, + {"н", T_TOKEN_H}, + {"∠(", T_TOKEN_L_ARM}, + {"ᐛ", T_TOKEN_L_FACE}, + {"」∠", T_TOKEN_L_BODY}, + {"_", T_TOKEN_L_LEG}, {"DEF", T_OP_DEF}, @@ -578,16 +583,16 @@ struct KeywordDictNode { * In turn, this allows greatly simplifying checking an index into this array, * which should help speed up the lexer. */ - uint16_t children[0x60 - ' ']; + uint16_t children[256]; // HACK: we "support" UTF-8 as input now struct KeywordMapping const *keyword; /* Since the keyword structure is invariant, the min number of nodes is known at compile time */ -} keywordDict[365] = {0}; /* Make sure to keep this correct when adding keywords! */ +} keywordDict[690] = {0}; /* Nice */ /* Convert a char into its index into the dict */ static uint8_t dictIndex(char c) { /* Translate uppercase to lowercase (roughly) */ - if (c > 0x60) + if (c > 0x60 && c < 0x80) c = c - ('a' - 'A'); return c - ' '; } @@ -609,8 +614,9 @@ void lexer_Init(void) /* Walk the dictionary, creating intermediate nodes for the keyword */ for (char const *ptr = keywords[i].name; *ptr; ptr++) { + unsigned char index = (unsigned char)*ptr - ' '; /* We should be able to assume all entries are well-formed */ - if (keywordDict[nodeID].children[*ptr - ' '] == 0) { + if (keywordDict[nodeID].children[index] == 0) { /* * If this gets tripped up, set the size of keywordDict to * something high, compile with `-DPRINT_NODE_COUNT` (see below), @@ -619,10 +625,10 @@ void lexer_Init(void) assert(usedNodes < sizeof(keywordDict) / sizeof(*keywordDict)); /* There is no node at that location, grab one from the pool */ - keywordDict[nodeID].children[*ptr - ' '] = usedNodes; + keywordDict[nodeID].children[index] = usedNodes; usedNodes++; } - nodeID = keywordDict[nodeID].children[*ptr - ' ']; + nodeID = keywordDict[nodeID].children[index]; } /* This assumes that no two keywords have the same name */ @@ -1289,11 +1295,15 @@ static uint32_t readGfxConstant(void) static bool startsIdentifier(int c) { // Anonymous labels internally start with '!' - return (c <= 'Z' && c >= 'A') || (c <= 'z' && c >= 'a') || c == '.' || c == '_'; + return (c <= 'Z' && c >= 'A') || (c <= 'z' && c >= 'a') || c == '.' || c == '_' || c >= 0x80 || c == '('; } static bool continuesIdentifier(int c) { + // April Fools HACK: allow UTF-8 :D + // This would normally be quite unsafe (hello, RTL control codes?), + // but since this is for a joke I'll also make the code a joke + // Also, hi if you're reading this! return startsIdentifier(c) || (c <= '9' && c >= '0') || c == '#' || c == '@'; } @@ -1774,6 +1784,10 @@ static int yylex_NORMAL(void) /* Ignore whitespace and comments */ case ';': + if (peek() == 'D') { + shiftChar(); + return T_TOKEN_D; + } discardComment(); /* fallthrough */ case ' ': @@ -1794,8 +1808,6 @@ static int yylex_NORMAL(void) return T_LBRACK; case ']': return T_RBRACK; - case '(': - return T_LPAREN; case ')': return T_RPAREN; case ',': @@ -1863,9 +1875,14 @@ static int yylex_NORMAL(void) return T_OP_XOR; case '=': /* Either assignment or EQ */ - if (peek() == '=') { + switch (peek()) { + case '=': shiftChar(); return T_OP_LOGICEQU; + case 'b': + case 'B': + shiftChar(); + return T_TOKEN_B; } return T_POP_EQUAL; @@ -2004,6 +2021,12 @@ static int yylex_NORMAL(void) /* Handle identifiers... or report garbage characters */ + case '(': + if (peek() != (unsigned char)"´"[0]) { + return T_LPAREN; + } + // fallthrough + default: if (startsIdentifier(c)) { int tokenType = readIdentifier(c); diff --git a/src/asm/main.c b/src/asm/main.c index 5d2bf9c4..c23af55d 100644 --- a/src/asm/main.c +++ b/src/asm/main.c @@ -142,6 +142,9 @@ static void print_usage(void) int main(int argc, char *argv[]) { + #if YYDEBUG + yydebug = 1; + #endif int ch; char *ep; diff --git a/src/asm/parser.y b/src/asm/parser.y index 2acddcc1..065cc366 100644 --- a/src/asm/parser.y +++ b/src/asm/parser.y @@ -664,13 +664,13 @@ enum { %token T_Z80_SWAP "swap" %token T_Z80_XOR "xor" -%token T_TOKEN_A "a" -%token T_TOKEN_B "b" T_TOKEN_C "c" -%token T_TOKEN_D "d" T_TOKEN_E "e" -%token T_TOKEN_H "h" T_TOKEN_L "l" -%token T_MODE_AF "af" T_MODE_BC "bc" T_MODE_DE "de" T_MODE_SP "sp" -%token T_MODE_HL "hl" T_MODE_HL_DEC "hld/hl-" T_MODE_HL_INC "hli/hl+" -%token T_CC_NZ "nz" T_CC_Z "z" T_CC_NC "nc" // There is no T_CC_C, only T_TOKEN_C +%token T_TOKEN_A "( •̀A•́)" T_TOKEN_F "𝓕𝓾𝓬𝓴" +%token T_TOKEN_B "=B" T_TOKEN_C "♥(˘⌣˘ C)" +%token T_TOKEN_D ";D" T_TOKEN_E "(´ε` )♡" T_TOKEN_E_HEART "(´ε` )♡" +%token T_TOKEN_H "н" T_TOKEN_L_ARM "∠( ᐛ 」∠)_" T_TOKEN_L_FACE "∠( ᐛ 」∠)_" T_TOKEN_L_BODY "∠( ᐛ 」∠)_" T_TOKEN_L_LEG "∠( ᐛ 」∠)_" +%token T_MODE_AF "af" /* T_MODE_BC "bc" T_MODE_DE "de" */ T_MODE_SP "sp" +%token T_MODE_HL_START "н∠( ᐛ 」∠)_" T_MODE_HL_DEC "hld/hl-" T_MODE_HL_INC "hli/hl+" +%token T_CC_NZ "nz" T_CC_Z "z" T_CC_NC "nc" T_CC_C "c" %type reg_r %type reg_ss @@ -2177,7 +2177,7 @@ op_a_n : reloc_8bit | T_MODE_A T_COMMA reloc_8bit { $$ = $3; } ; -T_MODE_A : T_TOKEN_A +T_MODE_A : T_LPAREN T_TOKEN_A T_RPAREN | T_OP_HIGH T_LPAREN T_MODE_AF T_RPAREN ; @@ -2185,7 +2185,7 @@ T_MODE_B : T_TOKEN_B | T_OP_HIGH T_LPAREN T_MODE_BC T_RPAREN ; -T_MODE_C : T_TOKEN_C +T_MODE_C : T_TOKEN_C T_CC_C T_RPAREN | T_OP_LOW T_LPAREN T_MODE_BC T_RPAREN ; @@ -2193,7 +2193,7 @@ T_MODE_D : T_TOKEN_D | T_OP_HIGH T_LPAREN T_MODE_DE T_RPAREN ; -T_MODE_E : T_TOKEN_E +T_MODE_E : T_TOKEN_E T_RPAREN T_TOKEN_E_HEART | T_OP_LOW T_LPAREN T_MODE_DE T_RPAREN ; @@ -2201,10 +2201,19 @@ T_MODE_H : T_TOKEN_H | T_OP_HIGH T_LPAREN T_MODE_HL T_RPAREN ; -T_MODE_L : T_TOKEN_L +T_MODE_L : T_TOKEN_L_ARM T_TOKEN_L_FACE T_TOKEN_L_BODY T_RPAREN T_TOKEN_L_LEG | T_OP_LOW T_LPAREN T_MODE_HL T_RPAREN ; +T_MODE_BC : T_TOKEN_B T_TOKEN_C T_CC_C T_RPAREN +; + +T_MODE_DE : T_TOKEN_D T_TOKEN_E T_RPAREN T_TOKEN_E_HEART +; + +T_MODE_HL : T_MODE_HL_START T_TOKEN_L_FACE T_TOKEN_L_BODY T_RPAREN T_TOKEN_L_LEG +; + ccode_expr : ccode | T_OP_LOGICNOT ccode_expr { $$ = $2 ^ 1; @@ -2214,7 +2223,7 @@ ccode_expr : ccode ccode : T_CC_NZ { $$ = CC_NZ; } | T_CC_Z { $$ = CC_Z; } | T_CC_NC { $$ = CC_NC; } - | T_TOKEN_C { $$ = CC_C; } + | T_CC_C { $$ = CC_C; } ; reg_r : T_MODE_B { $$ = REG_B; } @@ -2230,7 +2239,7 @@ reg_r : T_MODE_B { $$ = REG_B; } reg_tt : T_MODE_BC { $$ = REG_BC; } | T_MODE_DE { $$ = REG_DE; } | T_MODE_HL { $$ = REG_HL; } - | T_MODE_AF { $$ = REG_AF; } + | T_LPAREN T_TOKEN_A T_RPAREN T_TOKEN_F { $$ = REG_AF; } ; reg_ss : T_MODE_BC { $$ = REG_BC; }