Compare commits

...

3 Commits

Author SHA1 Message Date
ISSOtm
bffe7eb4de Implement new instructions
"Who are you? What are you doing in my commit history?"
~ https://xkcd.com/163/
2022-04-01 12:46:52 +02:00
ISSOtm
cd454d2e9a Hack in new register syntax
Oh my god I want to die x_x
2022-04-01 12:46:52 +02:00
ISSOtm
c814a616d6 Port Gan's work to Mandoc format
Hopefully I didn't forget anything
2022-04-01 12:46:52 +02:00
4 changed files with 457 additions and 384 deletions

View File

@@ -109,7 +109,7 @@ static struct KeywordMapping {
{"DEC", T_Z80_DEC},
{"DI", T_Z80_DI},
{"EI", T_Z80_EI},
{"HALT", T_Z80_HALT},
{"HALT", T_Z80_HALT},
{"INC", T_Z80_INC},
{"JP", T_Z80_JP},
{"JR", T_Z80_JR},
@@ -118,8 +118,9 @@ static struct KeywordMapping {
{"LDD", T_Z80_LDD},
{"LDIO", T_Z80_LDH},
{"LDH", T_Z80_LDH},
{"NOP", T_Z80_NOP},
{"NOPE", T_Z80_NOP},
{"OR", T_Z80_OR},
{"OWO", T_OWO},
{"POP", T_Z80_POP},
{"PUSH", T_Z80_PUSH},
{"RES", T_Z80_RES},
@@ -140,7 +141,7 @@ static struct KeywordMapping {
{"SLA", T_Z80_SLA},
{"SRA", T_Z80_SRA},
{"SRL", T_Z80_SRL},
{"STOP", T_Z80_STOP},
{"STOP!!🛑", T_Z80_STOP},
{"SUB", T_Z80_SUB},
{"SWAP", T_Z80_SWAP},
{"XOR", T_Z80_XOR},
@@ -148,24 +149,29 @@ static struct KeywordMapping {
{"NZ", T_CC_NZ},
{"Z", T_CC_Z},
{"NC", T_CC_NC},
/* Handled after as T_TOKEN_C */
/* { "C", T_CC_C }, */
{"C", T_CC_C},
{"AF", T_MODE_AF},
{"BC", T_MODE_BC},
{"DE", T_MODE_DE},
{"HL", T_MODE_HL},
{"•̀A•́)𝓕𝓾𝓬𝓴", T_MODE_AF},
// {"BC", T_MODE_BC},
// {"DE", T_MODE_DE},
{"н∠(", T_MODE_HL_START},
{"SP", T_MODE_SP},
{"HLD", T_MODE_HL_DEC},
{"HLI", T_MODE_HL_INC},
{"н∠( ᐛ 」∠)_👁", T_MODE_HL_DEC},
{"н∠( ᐛ 」∠)_👎", T_MODE_HL_INC},
{"A", T_TOKEN_A},
{"B", T_TOKEN_B},
{"C", T_TOKEN_C},
{"D", T_TOKEN_D},
{"E", T_TOKEN_E},
{"H", T_TOKEN_H},
{"L", T_TOKEN_L},
// HACK: normally this is surrounded by parens, but this is annoying to special-case,
// so we use cooperation from the parser.
{"•̀A•́", T_TOKEN_A},
// {"=B", T_TOKEN_B}, HACK: This begins with a non-identifier character, so we'll cheat
{"♥(˘⌣˘", T_TOKEN_C}, // HACK: same for "C" after the space & closing paren
// {";D", T_TOKEN_D}, HACK: also needs to be special-cased. God I feel dirty.
{"(´ε`", T_TOKEN_E},
{"", T_TOKEN_E_HEART},
{"н", T_TOKEN_H},
{"∠(", T_TOKEN_L_ARM},
{"", T_TOKEN_L_FACE},
{"」∠", T_TOKEN_L_BODY},
{"_", T_TOKEN_L_LEG},
{"DEF", T_OP_DEF},
@@ -578,16 +584,16 @@ struct KeywordDictNode {
* In turn, this allows greatly simplifying checking an index into this array,
* which should help speed up the lexer.
*/
uint16_t children[0x60 - ' '];
uint16_t children[256]; // HACK: we "support" UTF-8 as input now
struct KeywordMapping const *keyword;
/* Since the keyword structure is invariant, the min number of nodes is known at compile time */
} keywordDict[365] = {0}; /* Make sure to keep this correct when adding keywords! */
} keywordDict[690] = {0}; /* Nice */
/* Convert a char into its index into the dict */
static uint8_t dictIndex(char c)
{
/* Translate uppercase to lowercase (roughly) */
if (c > 0x60)
if (c > 0x60 && c < 0x80)
c = c - ('a' - 'A');
return c - ' ';
}
@@ -609,8 +615,9 @@ void lexer_Init(void)
/* Walk the dictionary, creating intermediate nodes for the keyword */
for (char const *ptr = keywords[i].name; *ptr; ptr++) {
unsigned char index = (unsigned char)*ptr - ' ';
/* We should be able to assume all entries are well-formed */
if (keywordDict[nodeID].children[*ptr - ' '] == 0) {
if (keywordDict[nodeID].children[index] == 0) {
/*
* If this gets tripped up, set the size of keywordDict to
* something high, compile with `-DPRINT_NODE_COUNT` (see below),
@@ -619,10 +626,10 @@ void lexer_Init(void)
assert(usedNodes < sizeof(keywordDict) / sizeof(*keywordDict));
/* There is no node at that location, grab one from the pool */
keywordDict[nodeID].children[*ptr - ' '] = usedNodes;
keywordDict[nodeID].children[index] = usedNodes;
usedNodes++;
}
nodeID = keywordDict[nodeID].children[*ptr - ' '];
nodeID = keywordDict[nodeID].children[index];
}
/* This assumes that no two keywords have the same name */
@@ -1289,12 +1296,16 @@ static uint32_t readGfxConstant(void)
static bool startsIdentifier(int c)
{
// Anonymous labels internally start with '!'
return (c <= 'Z' && c >= 'A') || (c <= 'z' && c >= 'a') || c == '.' || c == '_';
return (c <= 'Z' && c >= 'A') || (c <= 'z' && c >= 'a') || c == '.' || c == '_' || c >= 0x80 || c == '(';
}
static bool continuesIdentifier(int c)
{
return startsIdentifier(c) || (c <= '9' && c >= '0') || c == '#' || c == '@';
// April Fools HACK: allow UTF-8 :D
// This would normally be quite unsafe (hello, RTL control codes?),
// but since this is for a joke I'll also make the code a joke
// Also, hi if you're reading this!
return startsIdentifier(c) || (c <= '9' && c >= '0') || c == '#' || c == '@' || c == '!';
}
static int readIdentifier(char firstChar)
@@ -1774,6 +1785,10 @@ static int yylex_NORMAL(void)
/* Ignore whitespace and comments */
case ';':
if (peek() == 'D') {
shiftChar();
return T_TOKEN_D;
}
discardComment();
/* fallthrough */
case ' ':
@@ -1794,8 +1809,6 @@ static int yylex_NORMAL(void)
return T_LBRACK;
case ']':
return T_RBRACK;
case '(':
return T_LPAREN;
case ')':
return T_RPAREN;
case ',':
@@ -1863,9 +1876,14 @@ static int yylex_NORMAL(void)
return T_OP_XOR;
case '=': /* Either assignment or EQ */
if (peek() == '=') {
switch (peek()) {
case '=':
shiftChar();
return T_OP_LOGICEQU;
case 'b':
case 'B':
shiftChar();
return T_TOKEN_B;
}
return T_POP_EQUAL;
@@ -2004,6 +2022,12 @@ static int yylex_NORMAL(void)
/* Handle identifiers... or report garbage characters */
case '(':
if (peek() != (unsigned char)"´"[0]) {
return T_LPAREN;
}
// fallthrough
default:
if (startsIdentifier(c)) {
int tokenType = readIdentifier(c);

View File

@@ -142,6 +142,9 @@ static void print_usage(void)
int main(int argc, char *argv[])
{
#if YYDEBUG
yydebug = 1;
#endif
int ch;
char *ep;

View File

@@ -646,31 +646,32 @@ enum {
%token T_Z80_CALL "call" T_Z80_CCF "ccf" T_Z80_CP "cp" T_Z80_CPL "cpl"
%token T_Z80_DAA "daa" T_Z80_DEC "dec" T_Z80_DI "di"
%token T_Z80_EI "ei"
%token T_Z80_HALT "halt"
%token T_Z80_HALT "halt"
%token T_Z80_INC "inc"
%token T_Z80_JP "jp" T_Z80_JR "jr"
%token T_Z80_LD "ld"
%token T_Z80_LDI "ldi"
%token T_Z80_LDD "ldd"
%token T_Z80_LDH "ldh"
%token T_Z80_NOP "nop"
%token T_Z80_NOP "nope"
%token T_Z80_OR "or"
%token T_OWO "owo"
%token T_Z80_POP "pop" T_Z80_PUSH "push"
%token T_Z80_RES "res" T_Z80_RET "ret" T_Z80_RETI "reti" T_Z80_RST "rst"
%token T_Z80_RL "rl" T_Z80_RLA "rla" T_Z80_RLC "rlc" T_Z80_RLCA "rlca"
%token T_Z80_RR "rr" T_Z80_RRA "rra" T_Z80_RRC "rrc" T_Z80_RRCA "rrca"
%token T_Z80_SBC "sbc" T_Z80_SCF "scf" T_Z80_STOP "stop"
%token T_Z80_SBC "sbc" T_Z80_SCF "scf" T_Z80_STOP "stop!!🛑"
%token T_Z80_SLA "sla" T_Z80_SRA "sra" T_Z80_SRL "srl" T_Z80_SUB "sub"
%token T_Z80_SWAP "swap"
%token T_Z80_XOR "xor"
%token T_TOKEN_A "a"
%token T_TOKEN_B "b" T_TOKEN_C "c"
%token T_TOKEN_D "d" T_TOKEN_E "e"
%token T_TOKEN_H "h" T_TOKEN_L "l"
%token T_MODE_AF "af" T_MODE_BC "bc" T_MODE_DE "de" T_MODE_SP "sp"
%token T_MODE_HL "hl" T_MODE_HL_DEC "hld/hl-" T_MODE_HL_INC "hli/hl+"
%token T_CC_NZ "nz" T_CC_Z "z" T_CC_NC "nc" // There is no T_CC_C, only T_TOKEN_C
%token T_TOKEN_A "( •̀A•́)" T_TOKEN_F "𝓕𝓾𝓬𝓴"
%token T_TOKEN_B "=B" T_TOKEN_C "♥(˘⌣˘ C)"
%token T_TOKEN_D ";D" T_TOKEN_E "(´ε` )♡" T_TOKEN_E_HEART "(´ε` )♡"
%token T_TOKEN_H "н" T_TOKEN_L_ARM "∠( ᐛ 」∠)_" T_TOKEN_L_FACE "∠( ᐛ 」∠)_" T_TOKEN_L_BODY "∠( ᐛ 」∠)_" T_TOKEN_L_LEG "∠( ᐛ 」∠)_"
%token T_MODE_AF "af" /* T_MODE_BC "bc" T_MODE_DE "de" */ T_MODE_SP "sp"
%token T_MODE_HL_START "н∠( ᐛ 」∠)_" T_MODE_HL_DEC "hld/hl-" T_MODE_HL_INC "hli/hl+"
%token T_CC_NZ "nz" T_CC_Z "z" T_CC_NC "nc" T_CC_C "c"
%type <constValue> reg_r
%type <constValue> reg_ss
@@ -1769,6 +1770,7 @@ cpu_command : z80_adc
| z80_sub
| z80_swap
| z80_xor
| T_OWO { fatalerror("*BONK* go to horny jail\n"); }
;
z80_adc : T_Z80_ADC op_a_n {
@@ -2177,7 +2179,7 @@ op_a_n : reloc_8bit
| T_MODE_A T_COMMA reloc_8bit { $$ = $3; }
;
T_MODE_A : T_TOKEN_A
T_MODE_A : T_LPAREN T_TOKEN_A T_RPAREN
| T_OP_HIGH T_LPAREN T_MODE_AF T_RPAREN
;
@@ -2185,7 +2187,7 @@ T_MODE_B : T_TOKEN_B
| T_OP_HIGH T_LPAREN T_MODE_BC T_RPAREN
;
T_MODE_C : T_TOKEN_C
T_MODE_C : T_TOKEN_C T_CC_C T_RPAREN
| T_OP_LOW T_LPAREN T_MODE_BC T_RPAREN
;
@@ -2193,7 +2195,7 @@ T_MODE_D : T_TOKEN_D
| T_OP_HIGH T_LPAREN T_MODE_DE T_RPAREN
;
T_MODE_E : T_TOKEN_E
T_MODE_E : T_TOKEN_E T_RPAREN T_TOKEN_E_HEART
| T_OP_LOW T_LPAREN T_MODE_DE T_RPAREN
;
@@ -2201,10 +2203,19 @@ T_MODE_H : T_TOKEN_H
| T_OP_HIGH T_LPAREN T_MODE_HL T_RPAREN
;
T_MODE_L : T_TOKEN_L
T_MODE_L : T_TOKEN_L_ARM T_TOKEN_L_FACE T_TOKEN_L_BODY T_RPAREN T_TOKEN_L_LEG
| T_OP_LOW T_LPAREN T_MODE_HL T_RPAREN
;
T_MODE_BC : T_TOKEN_B T_TOKEN_C T_CC_C T_RPAREN
;
T_MODE_DE : T_TOKEN_D T_TOKEN_E T_RPAREN T_TOKEN_E_HEART
;
T_MODE_HL : T_MODE_HL_START T_TOKEN_L_FACE T_TOKEN_L_BODY T_RPAREN T_TOKEN_L_LEG
;
ccode_expr : ccode
| T_OP_LOGICNOT ccode_expr {
$$ = $2 ^ 1;
@@ -2214,7 +2225,7 @@ ccode_expr : ccode
ccode : T_CC_NZ { $$ = CC_NZ; }
| T_CC_Z { $$ = CC_Z; }
| T_CC_NC { $$ = CC_NC; }
| T_TOKEN_C { $$ = CC_C; }
| T_CC_C { $$ = CC_C; }
;
reg_r : T_MODE_B { $$ = REG_B; }
@@ -2230,7 +2241,7 @@ reg_r : T_MODE_B { $$ = REG_B; }
reg_tt : T_MODE_BC { $$ = REG_BC; }
| T_MODE_DE { $$ = REG_DE; }
| T_MODE_HL { $$ = REG_HL; }
| T_MODE_AF { $$ = REG_AF; }
| T_LPAREN T_TOKEN_A T_RPAREN T_TOKEN_F { $$ = REG_AF; }
;
reg_ss : T_MODE_BC { $$ = REG_BC; }

File diff suppressed because it is too large Load Diff