Compare commits

...

3 Commits

Author SHA1 Message Date
ISSOtm
bffe7eb4de Implement new instructions
"Who are you? What are you doing in my commit history?"
~ https://xkcd.com/163/
2022-04-01 12:46:52 +02:00
ISSOtm
cd454d2e9a Hack in new register syntax
Oh my god I want to die x_x
2022-04-01 12:46:52 +02:00
ISSOtm
c814a616d6 Port Gan's work to Mandoc format
Hopefully I didn't forget anything
2022-04-01 12:46:52 +02:00
4 changed files with 457 additions and 384 deletions

View File

@@ -109,7 +109,7 @@ static struct KeywordMapping {
{"DEC", T_Z80_DEC}, {"DEC", T_Z80_DEC},
{"DI", T_Z80_DI}, {"DI", T_Z80_DI},
{"EI", T_Z80_EI}, {"EI", T_Z80_EI},
{"HALT", T_Z80_HALT}, {"HALT", T_Z80_HALT},
{"INC", T_Z80_INC}, {"INC", T_Z80_INC},
{"JP", T_Z80_JP}, {"JP", T_Z80_JP},
{"JR", T_Z80_JR}, {"JR", T_Z80_JR},
@@ -118,8 +118,9 @@ static struct KeywordMapping {
{"LDD", T_Z80_LDD}, {"LDD", T_Z80_LDD},
{"LDIO", T_Z80_LDH}, {"LDIO", T_Z80_LDH},
{"LDH", T_Z80_LDH}, {"LDH", T_Z80_LDH},
{"NOP", T_Z80_NOP}, {"NOPE", T_Z80_NOP},
{"OR", T_Z80_OR}, {"OR", T_Z80_OR},
{"OWO", T_OWO},
{"POP", T_Z80_POP}, {"POP", T_Z80_POP},
{"PUSH", T_Z80_PUSH}, {"PUSH", T_Z80_PUSH},
{"RES", T_Z80_RES}, {"RES", T_Z80_RES},
@@ -140,7 +141,7 @@ static struct KeywordMapping {
{"SLA", T_Z80_SLA}, {"SLA", T_Z80_SLA},
{"SRA", T_Z80_SRA}, {"SRA", T_Z80_SRA},
{"SRL", T_Z80_SRL}, {"SRL", T_Z80_SRL},
{"STOP", T_Z80_STOP}, {"STOP!!🛑", T_Z80_STOP},
{"SUB", T_Z80_SUB}, {"SUB", T_Z80_SUB},
{"SWAP", T_Z80_SWAP}, {"SWAP", T_Z80_SWAP},
{"XOR", T_Z80_XOR}, {"XOR", T_Z80_XOR},
@@ -148,24 +149,29 @@ static struct KeywordMapping {
{"NZ", T_CC_NZ}, {"NZ", T_CC_NZ},
{"Z", T_CC_Z}, {"Z", T_CC_Z},
{"NC", T_CC_NC}, {"NC", T_CC_NC},
/* Handled after as T_TOKEN_C */ {"C", T_CC_C},
/* { "C", T_CC_C }, */
{"AF", T_MODE_AF}, {"•̀A•́)𝓕𝓾𝓬𝓴", T_MODE_AF},
{"BC", T_MODE_BC}, // {"BC", T_MODE_BC},
{"DE", T_MODE_DE}, // {"DE", T_MODE_DE},
{"HL", T_MODE_HL}, {"н∠(", T_MODE_HL_START},
{"SP", T_MODE_SP}, {"SP", T_MODE_SP},
{"HLD", T_MODE_HL_DEC}, {"н∠( ᐛ 」∠)_👁", T_MODE_HL_DEC},
{"HLI", T_MODE_HL_INC}, {"н∠( ᐛ 」∠)_👎", T_MODE_HL_INC},
{"A", T_TOKEN_A}, // HACK: normally this is surrounded by parens, but this is annoying to special-case,
{"B", T_TOKEN_B}, // so we use cooperation from the parser.
{"C", T_TOKEN_C}, {"•̀A•́", T_TOKEN_A},
{"D", T_TOKEN_D}, // {"=B", T_TOKEN_B}, HACK: This begins with a non-identifier character, so we'll cheat
{"E", T_TOKEN_E}, {"♥(˘⌣˘", T_TOKEN_C}, // HACK: same for "C" after the space & closing paren
{"H", T_TOKEN_H}, // {";D", T_TOKEN_D}, HACK: also needs to be special-cased. God I feel dirty.
{"L", T_TOKEN_L}, {"(´ε`", T_TOKEN_E},
{"", T_TOKEN_E_HEART},
{"н", T_TOKEN_H},
{"∠(", T_TOKEN_L_ARM},
{"", T_TOKEN_L_FACE},
{"」∠", T_TOKEN_L_BODY},
{"_", T_TOKEN_L_LEG},
{"DEF", T_OP_DEF}, {"DEF", T_OP_DEF},
@@ -578,16 +584,16 @@ struct KeywordDictNode {
* In turn, this allows greatly simplifying checking an index into this array, * In turn, this allows greatly simplifying checking an index into this array,
* which should help speed up the lexer. * which should help speed up the lexer.
*/ */
uint16_t children[0x60 - ' ']; uint16_t children[256]; // HACK: we "support" UTF-8 as input now
struct KeywordMapping const *keyword; struct KeywordMapping const *keyword;
/* Since the keyword structure is invariant, the min number of nodes is known at compile time */ /* Since the keyword structure is invariant, the min number of nodes is known at compile time */
} keywordDict[365] = {0}; /* Make sure to keep this correct when adding keywords! */ } keywordDict[690] = {0}; /* Nice */
/* Convert a char into its index into the dict */ /* Convert a char into its index into the dict */
static uint8_t dictIndex(char c) static uint8_t dictIndex(char c)
{ {
/* Translate uppercase to lowercase (roughly) */ /* Translate uppercase to lowercase (roughly) */
if (c > 0x60) if (c > 0x60 && c < 0x80)
c = c - ('a' - 'A'); c = c - ('a' - 'A');
return c - ' '; return c - ' ';
} }
@@ -609,8 +615,9 @@ void lexer_Init(void)
/* Walk the dictionary, creating intermediate nodes for the keyword */ /* Walk the dictionary, creating intermediate nodes for the keyword */
for (char const *ptr = keywords[i].name; *ptr; ptr++) { for (char const *ptr = keywords[i].name; *ptr; ptr++) {
unsigned char index = (unsigned char)*ptr - ' ';
/* We should be able to assume all entries are well-formed */ /* We should be able to assume all entries are well-formed */
if (keywordDict[nodeID].children[*ptr - ' '] == 0) { if (keywordDict[nodeID].children[index] == 0) {
/* /*
* If this gets tripped up, set the size of keywordDict to * If this gets tripped up, set the size of keywordDict to
* something high, compile with `-DPRINT_NODE_COUNT` (see below), * something high, compile with `-DPRINT_NODE_COUNT` (see below),
@@ -619,10 +626,10 @@ void lexer_Init(void)
assert(usedNodes < sizeof(keywordDict) / sizeof(*keywordDict)); assert(usedNodes < sizeof(keywordDict) / sizeof(*keywordDict));
/* There is no node at that location, grab one from the pool */ /* There is no node at that location, grab one from the pool */
keywordDict[nodeID].children[*ptr - ' '] = usedNodes; keywordDict[nodeID].children[index] = usedNodes;
usedNodes++; usedNodes++;
} }
nodeID = keywordDict[nodeID].children[*ptr - ' ']; nodeID = keywordDict[nodeID].children[index];
} }
/* This assumes that no two keywords have the same name */ /* This assumes that no two keywords have the same name */
@@ -1289,12 +1296,16 @@ static uint32_t readGfxConstant(void)
static bool startsIdentifier(int c) static bool startsIdentifier(int c)
{ {
// Anonymous labels internally start with '!' // Anonymous labels internally start with '!'
return (c <= 'Z' && c >= 'A') || (c <= 'z' && c >= 'a') || c == '.' || c == '_'; return (c <= 'Z' && c >= 'A') || (c <= 'z' && c >= 'a') || c == '.' || c == '_' || c >= 0x80 || c == '(';
} }
static bool continuesIdentifier(int c) static bool continuesIdentifier(int c)
{ {
return startsIdentifier(c) || (c <= '9' && c >= '0') || c == '#' || c == '@'; // April Fools HACK: allow UTF-8 :D
// This would normally be quite unsafe (hello, RTL control codes?),
// but since this is for a joke I'll also make the code a joke
// Also, hi if you're reading this!
return startsIdentifier(c) || (c <= '9' && c >= '0') || c == '#' || c == '@' || c == '!';
} }
static int readIdentifier(char firstChar) static int readIdentifier(char firstChar)
@@ -1774,6 +1785,10 @@ static int yylex_NORMAL(void)
/* Ignore whitespace and comments */ /* Ignore whitespace and comments */
case ';': case ';':
if (peek() == 'D') {
shiftChar();
return T_TOKEN_D;
}
discardComment(); discardComment();
/* fallthrough */ /* fallthrough */
case ' ': case ' ':
@@ -1794,8 +1809,6 @@ static int yylex_NORMAL(void)
return T_LBRACK; return T_LBRACK;
case ']': case ']':
return T_RBRACK; return T_RBRACK;
case '(':
return T_LPAREN;
case ')': case ')':
return T_RPAREN; return T_RPAREN;
case ',': case ',':
@@ -1863,9 +1876,14 @@ static int yylex_NORMAL(void)
return T_OP_XOR; return T_OP_XOR;
case '=': /* Either assignment or EQ */ case '=': /* Either assignment or EQ */
if (peek() == '=') { switch (peek()) {
case '=':
shiftChar(); shiftChar();
return T_OP_LOGICEQU; return T_OP_LOGICEQU;
case 'b':
case 'B':
shiftChar();
return T_TOKEN_B;
} }
return T_POP_EQUAL; return T_POP_EQUAL;
@@ -2004,6 +2022,12 @@ static int yylex_NORMAL(void)
/* Handle identifiers... or report garbage characters */ /* Handle identifiers... or report garbage characters */
case '(':
if (peek() != (unsigned char)"´"[0]) {
return T_LPAREN;
}
// fallthrough
default: default:
if (startsIdentifier(c)) { if (startsIdentifier(c)) {
int tokenType = readIdentifier(c); int tokenType = readIdentifier(c);

View File

@@ -142,6 +142,9 @@ static void print_usage(void)
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
#if YYDEBUG
yydebug = 1;
#endif
int ch; int ch;
char *ep; char *ep;

View File

@@ -646,31 +646,32 @@ enum {
%token T_Z80_CALL "call" T_Z80_CCF "ccf" T_Z80_CP "cp" T_Z80_CPL "cpl" %token T_Z80_CALL "call" T_Z80_CCF "ccf" T_Z80_CP "cp" T_Z80_CPL "cpl"
%token T_Z80_DAA "daa" T_Z80_DEC "dec" T_Z80_DI "di" %token T_Z80_DAA "daa" T_Z80_DEC "dec" T_Z80_DI "di"
%token T_Z80_EI "ei" %token T_Z80_EI "ei"
%token T_Z80_HALT "halt" %token T_Z80_HALT "halt"
%token T_Z80_INC "inc" %token T_Z80_INC "inc"
%token T_Z80_JP "jp" T_Z80_JR "jr" %token T_Z80_JP "jp" T_Z80_JR "jr"
%token T_Z80_LD "ld" %token T_Z80_LD "ld"
%token T_Z80_LDI "ldi" %token T_Z80_LDI "ldi"
%token T_Z80_LDD "ldd" %token T_Z80_LDD "ldd"
%token T_Z80_LDH "ldh" %token T_Z80_LDH "ldh"
%token T_Z80_NOP "nop" %token T_Z80_NOP "nope"
%token T_Z80_OR "or" %token T_Z80_OR "or"
%token T_OWO "owo"
%token T_Z80_POP "pop" T_Z80_PUSH "push" %token T_Z80_POP "pop" T_Z80_PUSH "push"
%token T_Z80_RES "res" T_Z80_RET "ret" T_Z80_RETI "reti" T_Z80_RST "rst" %token T_Z80_RES "res" T_Z80_RET "ret" T_Z80_RETI "reti" T_Z80_RST "rst"
%token T_Z80_RL "rl" T_Z80_RLA "rla" T_Z80_RLC "rlc" T_Z80_RLCA "rlca" %token T_Z80_RL "rl" T_Z80_RLA "rla" T_Z80_RLC "rlc" T_Z80_RLCA "rlca"
%token T_Z80_RR "rr" T_Z80_RRA "rra" T_Z80_RRC "rrc" T_Z80_RRCA "rrca" %token T_Z80_RR "rr" T_Z80_RRA "rra" T_Z80_RRC "rrc" T_Z80_RRCA "rrca"
%token T_Z80_SBC "sbc" T_Z80_SCF "scf" T_Z80_STOP "stop" %token T_Z80_SBC "sbc" T_Z80_SCF "scf" T_Z80_STOP "stop!!🛑"
%token T_Z80_SLA "sla" T_Z80_SRA "sra" T_Z80_SRL "srl" T_Z80_SUB "sub" %token T_Z80_SLA "sla" T_Z80_SRA "sra" T_Z80_SRL "srl" T_Z80_SUB "sub"
%token T_Z80_SWAP "swap" %token T_Z80_SWAP "swap"
%token T_Z80_XOR "xor" %token T_Z80_XOR "xor"
%token T_TOKEN_A "a" %token T_TOKEN_A "( •̀A•́)" T_TOKEN_F "𝓕𝓾𝓬𝓴"
%token T_TOKEN_B "b" T_TOKEN_C "c" %token T_TOKEN_B "=B" T_TOKEN_C "♥(˘⌣˘ C)"
%token T_TOKEN_D "d" T_TOKEN_E "e" %token T_TOKEN_D ";D" T_TOKEN_E "(´ε` )♡" T_TOKEN_E_HEART "(´ε` )♡"
%token T_TOKEN_H "h" T_TOKEN_L "l" %token T_TOKEN_H "н" T_TOKEN_L_ARM "∠( ᐛ 」∠)_" T_TOKEN_L_FACE "∠( ᐛ 」∠)_" T_TOKEN_L_BODY "∠( ᐛ 」∠)_" T_TOKEN_L_LEG "∠( ᐛ 」∠)_"
%token T_MODE_AF "af" T_MODE_BC "bc" T_MODE_DE "de" T_MODE_SP "sp" %token T_MODE_AF "af" /* T_MODE_BC "bc" T_MODE_DE "de" */ T_MODE_SP "sp"
%token T_MODE_HL "hl" T_MODE_HL_DEC "hld/hl-" T_MODE_HL_INC "hli/hl+" %token T_MODE_HL_START "н∠( ᐛ 」∠)_" T_MODE_HL_DEC "hld/hl-" T_MODE_HL_INC "hli/hl+"
%token T_CC_NZ "nz" T_CC_Z "z" T_CC_NC "nc" // There is no T_CC_C, only T_TOKEN_C %token T_CC_NZ "nz" T_CC_Z "z" T_CC_NC "nc" T_CC_C "c"
%type <constValue> reg_r %type <constValue> reg_r
%type <constValue> reg_ss %type <constValue> reg_ss
@@ -1769,6 +1770,7 @@ cpu_command : z80_adc
| z80_sub | z80_sub
| z80_swap | z80_swap
| z80_xor | z80_xor
| T_OWO { fatalerror("*BONK* go to horny jail\n"); }
; ;
z80_adc : T_Z80_ADC op_a_n { z80_adc : T_Z80_ADC op_a_n {
@@ -2177,7 +2179,7 @@ op_a_n : reloc_8bit
| T_MODE_A T_COMMA reloc_8bit { $$ = $3; } | T_MODE_A T_COMMA reloc_8bit { $$ = $3; }
; ;
T_MODE_A : T_TOKEN_A T_MODE_A : T_LPAREN T_TOKEN_A T_RPAREN
| T_OP_HIGH T_LPAREN T_MODE_AF T_RPAREN | T_OP_HIGH T_LPAREN T_MODE_AF T_RPAREN
; ;
@@ -2185,7 +2187,7 @@ T_MODE_B : T_TOKEN_B
| T_OP_HIGH T_LPAREN T_MODE_BC T_RPAREN | T_OP_HIGH T_LPAREN T_MODE_BC T_RPAREN
; ;
T_MODE_C : T_TOKEN_C T_MODE_C : T_TOKEN_C T_CC_C T_RPAREN
| T_OP_LOW T_LPAREN T_MODE_BC T_RPAREN | T_OP_LOW T_LPAREN T_MODE_BC T_RPAREN
; ;
@@ -2193,7 +2195,7 @@ T_MODE_D : T_TOKEN_D
| T_OP_HIGH T_LPAREN T_MODE_DE T_RPAREN | T_OP_HIGH T_LPAREN T_MODE_DE T_RPAREN
; ;
T_MODE_E : T_TOKEN_E T_MODE_E : T_TOKEN_E T_RPAREN T_TOKEN_E_HEART
| T_OP_LOW T_LPAREN T_MODE_DE T_RPAREN | T_OP_LOW T_LPAREN T_MODE_DE T_RPAREN
; ;
@@ -2201,10 +2203,19 @@ T_MODE_H : T_TOKEN_H
| T_OP_HIGH T_LPAREN T_MODE_HL T_RPAREN | T_OP_HIGH T_LPAREN T_MODE_HL T_RPAREN
; ;
T_MODE_L : T_TOKEN_L T_MODE_L : T_TOKEN_L_ARM T_TOKEN_L_FACE T_TOKEN_L_BODY T_RPAREN T_TOKEN_L_LEG
| T_OP_LOW T_LPAREN T_MODE_HL T_RPAREN | T_OP_LOW T_LPAREN T_MODE_HL T_RPAREN
; ;
T_MODE_BC : T_TOKEN_B T_TOKEN_C T_CC_C T_RPAREN
;
T_MODE_DE : T_TOKEN_D T_TOKEN_E T_RPAREN T_TOKEN_E_HEART
;
T_MODE_HL : T_MODE_HL_START T_TOKEN_L_FACE T_TOKEN_L_BODY T_RPAREN T_TOKEN_L_LEG
;
ccode_expr : ccode ccode_expr : ccode
| T_OP_LOGICNOT ccode_expr { | T_OP_LOGICNOT ccode_expr {
$$ = $2 ^ 1; $$ = $2 ^ 1;
@@ -2214,7 +2225,7 @@ ccode_expr : ccode
ccode : T_CC_NZ { $$ = CC_NZ; } ccode : T_CC_NZ { $$ = CC_NZ; }
| T_CC_Z { $$ = CC_Z; } | T_CC_Z { $$ = CC_Z; }
| T_CC_NC { $$ = CC_NC; } | T_CC_NC { $$ = CC_NC; }
| T_TOKEN_C { $$ = CC_C; } | T_CC_C { $$ = CC_C; }
; ;
reg_r : T_MODE_B { $$ = REG_B; } reg_r : T_MODE_B { $$ = REG_B; }
@@ -2230,7 +2241,7 @@ reg_r : T_MODE_B { $$ = REG_B; }
reg_tt : T_MODE_BC { $$ = REG_BC; } reg_tt : T_MODE_BC { $$ = REG_BC; }
| T_MODE_DE { $$ = REG_DE; } | T_MODE_DE { $$ = REG_DE; }
| T_MODE_HL { $$ = REG_HL; } | T_MODE_HL { $$ = REG_HL; }
| T_MODE_AF { $$ = REG_AF; } | T_LPAREN T_TOKEN_A T_RPAREN T_TOKEN_F { $$ = REG_AF; }
; ;
reg_ss : T_MODE_BC { $$ = REG_BC; } reg_ss : T_MODE_BC { $$ = REG_BC; }

File diff suppressed because it is too large Load Diff