Implement new instructions

"Who are you? What are you doing in my commit history?" ~ https://xkcd.com/163/
Hack in new register syntax
2026-01-21 07:51:51 +00:00 · 2022-04-01 12:46:52 +02:00 · 2022-04-01 12:46:52 +02:00 · 2022-04-01 12:46:52 +02:00
4 changed files with 457 additions and 384 deletions
--- a/src/asm/lexer.c
+++ b/src/asm/lexer.c
@@ -109,7 +109,7 @@ static struct KeywordMapping {
 	{"DEC", T_Z80_DEC},
 	{"DI", T_Z80_DI},
 	{"EI", T_Z80_EI},
-	{"HALT", T_Z80_HALT},
+	{"HALT✋", T_Z80_HALT},
 	{"INC", T_Z80_INC},
 	{"JP", T_Z80_JP},
 	{"JR", T_Z80_JR},
@@ -118,8 +118,9 @@ static struct KeywordMapping {
 	{"LDD", T_Z80_LDD},
 	{"LDIO", T_Z80_LDH},
 	{"LDH", T_Z80_LDH},
-	{"NOP", T_Z80_NOP},
+	{"NOPE", T_Z80_NOP},
 	{"OR", T_Z80_OR},
+	{"OWO", T_OWO},
 	{"POP", T_Z80_POP},
 	{"PUSH", T_Z80_PUSH},
 	{"RES", T_Z80_RES},
@@ -140,7 +141,7 @@ static struct KeywordMapping {
 	{"SLA", T_Z80_SLA},
 	{"SRA", T_Z80_SRA},
 	{"SRL", T_Z80_SRL},
-	{"STOP", T_Z80_STOP},
+	{"STOP!!🛑", T_Z80_STOP},
 	{"SUB", T_Z80_SUB},
 	{"SWAP", T_Z80_SWAP},
 	{"XOR", T_Z80_XOR},
@@ -148,24 +149,29 @@ static struct KeywordMapping {
 	{"NZ", T_CC_NZ},
 	{"Z", T_CC_Z},
 	{"NC", T_CC_NC},
-	/* Handled after as T_TOKEN_C */
-	/* { "C", T_CC_C }, */
+	{"C", T_CC_C},

-	{"AF", T_MODE_AF},
-	{"BC", T_MODE_BC},
-	{"DE", T_MODE_DE},
-	{"HL", T_MODE_HL},
+	{"•̀A•́)𝓕𝓾𝓬𝓴", T_MODE_AF},
+	// {"BC", T_MODE_BC},
+	// {"DE", T_MODE_DE},
+	{"н∠(", T_MODE_HL_START},
 	{"SP", T_MODE_SP},
-	{"HLD", T_MODE_HL_DEC},
-	{"HLI", T_MODE_HL_INC},
+	{"н∠( ᐛ 」∠)＿👁", T_MODE_HL_DEC},
+	{"н∠( ᐛ 」∠)＿👎", T_MODE_HL_INC},

-	{"A", T_TOKEN_A},
-	{"B", T_TOKEN_B},
-	{"C", T_TOKEN_C},
-	{"D", T_TOKEN_D},
-	{"E", T_TOKEN_E},
-	{"H", T_TOKEN_H},
-	{"L", T_TOKEN_L},
+	// HACK: normally this is surrounded by parens, but this is annoying to special-case,
+	// so we use cooperation from the parser.
+	{"•̀A•́", T_TOKEN_A},
+	// {"=B", T_TOKEN_B}, HACK: This begins with a non-identifier character, so we'll cheat
+	{"♥(˘⌣˘", T_TOKEN_C}, // HACK: same for "C" after the space & closing paren
+	// {";D", T_TOKEN_D}, HACK: also needs to be special-cased. God I feel dirty.
+	{"(´ε｀", T_TOKEN_E},
+	{"♡", T_TOKEN_E_HEART},
+	{"н", T_TOKEN_H},
+	{"∠(", T_TOKEN_L_ARM},
+	{"ᐛ", T_TOKEN_L_FACE},
+	{"」∠", T_TOKEN_L_BODY},
+	{"＿", T_TOKEN_L_LEG},

 	{"DEF", T_OP_DEF},

@@ -578,16 +584,16 @@ struct KeywordDictNode {
 	 * In turn, this allows greatly simplifying checking an index into this array,
 	 * which should help speed up the lexer.
 	 */
-	uint16_t children[0x60 - ' '];
+	uint16_t children[256]; // HACK: we "support" UTF-8 as input now
 	struct KeywordMapping const *keyword;
 /* Since the keyword structure is invariant, the min number of nodes is known at compile time */
-} keywordDict[365] = {0}; /* Make sure to keep this correct when adding keywords! */
+} keywordDict[690] = {0}; /* Nice */

 /* Convert a char into its index into the dict */
 static uint8_t dictIndex(char c)
 {
 	/* Translate uppercase to lowercase (roughly) */
-	if (c > 0x60)
+	if (c > 0x60 && c < 0x80)
 		c = c - ('a' - 'A');
 	return c - ' ';
 }
@@ -609,8 +615,9 @@ void lexer_Init(void)

 		/* Walk the dictionary, creating intermediate nodes for the keyword */
 		for (char const *ptr = keywords[i].name; *ptr; ptr++) {
+			unsigned char index = (unsigned char)*ptr - ' ';
 			/* We should be able to assume all entries are well-formed */
-			if (keywordDict[nodeID].children[*ptr - ' '] == 0) {
+			if (keywordDict[nodeID].children[index] == 0) {
 				/*
 				 * If this gets tripped up, set the size of keywordDict to
 				 * something high, compile with `-DPRINT_NODE_COUNT` (see below),
@@ -619,10 +626,10 @@ void lexer_Init(void)
 				assert(usedNodes < sizeof(keywordDict) / sizeof(*keywordDict));

 				/* There is no node at that location, grab one from the pool */
-				keywordDict[nodeID].children[*ptr - ' '] = usedNodes;
+				keywordDict[nodeID].children[index] = usedNodes;
 				usedNodes++;
 			}
-			nodeID = keywordDict[nodeID].children[*ptr - ' '];
+			nodeID = keywordDict[nodeID].children[index];
 		}

 		/* This assumes that no two keywords have the same name */
@@ -1289,12 +1296,16 @@ static uint32_t readGfxConstant(void)
 static bool startsIdentifier(int c)
 {
 	// Anonymous labels internally start with '!'
-	return (c <= 'Z' && c >= 'A') || (c <= 'z' && c >= 'a') || c == '.' || c == '_';
+	return (c <= 'Z' && c >= 'A') || (c <= 'z' && c >= 'a') || c == '.' || c == '_' || c >= 0x80 || c == '(';
 }

 static bool continuesIdentifier(int c)
 {
-	return startsIdentifier(c) || (c <= '9' && c >= '0') || c == '#' || c == '@';
+	// April Fools HACK: allow UTF-8 :D
+	// This would normally be quite unsafe (hello, RTL control codes?),
+	// but since this is for a joke I'll also make the code a joke
+	// Also, hi if you're reading this!
+	return startsIdentifier(c) || (c <= '9' && c >= '0') || c == '#' || c == '@' || c == '!';
 }

 static int readIdentifier(char firstChar)
@@ -1774,6 +1785,10 @@ static int yylex_NORMAL(void)
 		/* Ignore whitespace and comments */

 		case ';':
+			if (peek() == 'D') {
+				shiftChar();
+				return T_TOKEN_D;
+			}
 			discardComment();
 			/* fallthrough */
 		case ' ':
@@ -1794,8 +1809,6 @@ static int yylex_NORMAL(void)
 			return T_LBRACK;
 		case ']':
 			return T_RBRACK;
-		case '(':
-			return T_LPAREN;
 		case ')':
 			return T_RPAREN;
 		case ',':
@@ -1863,9 +1876,14 @@ static int yylex_NORMAL(void)
 			return T_OP_XOR;

 		case '=': /* Either assignment or EQ */
-			if (peek() == '=') {
+			switch (peek()) {
+			case '=':
 				shiftChar();
 				return T_OP_LOGICEQU;
+			case 'b':
+			case 'B':
+				shiftChar();
+				return T_TOKEN_B;
 			}
 			return T_POP_EQUAL;

@@ -2004,6 +2022,12 @@ static int yylex_NORMAL(void)

 		/* Handle identifiers... or report garbage characters */

+		case '(':
+			if (peek() != (unsigned char)"´"[0]) {
+				return T_LPAREN;
+			}
+			// fallthrough
+
 		default:
 			if (startsIdentifier(c)) {
 				int tokenType = readIdentifier(c);
--- a/src/asm/main.c
+++ b/src/asm/main.c
@@ -142,6 +142,9 @@ static void print_usage(void)

 int main(int argc, char *argv[])
 {
+	#if YYDEBUG
+	yydebug = 1;
+	#endif
 	int ch;
 	char *ep;

--- a/src/asm/parser.y
+++ b/src/asm/parser.y
@@ -646,31 +646,32 @@ enum {
 %token	T_Z80_CALL "call" T_Z80_CCF "ccf" T_Z80_CP "cp" T_Z80_CPL "cpl"
 %token	T_Z80_DAA "daa" T_Z80_DEC "dec" T_Z80_DI "di"
 %token	T_Z80_EI "ei"
-%token	T_Z80_HALT "halt"
+%token	T_Z80_HALT "halt✋"
 %token	T_Z80_INC "inc"
 %token	T_Z80_JP "jp" T_Z80_JR "jr"
 %token	T_Z80_LD "ld"
 %token	T_Z80_LDI "ldi"
 %token	T_Z80_LDD "ldd"
 %token	T_Z80_LDH "ldh"
-%token	T_Z80_NOP "nop"
+%token	T_Z80_NOP "nope"
 %token	T_Z80_OR "or"
+%token	T_OWO "owo"
 %token	T_Z80_POP "pop" T_Z80_PUSH "push"
 %token	T_Z80_RES "res" T_Z80_RET "ret" T_Z80_RETI "reti" T_Z80_RST "rst"
 %token	T_Z80_RL "rl" T_Z80_RLA "rla" T_Z80_RLC "rlc" T_Z80_RLCA "rlca"
 %token	T_Z80_RR "rr" T_Z80_RRA "rra" T_Z80_RRC "rrc" T_Z80_RRCA "rrca"
-%token	T_Z80_SBC "sbc" T_Z80_SCF "scf" T_Z80_STOP "stop"
+%token	T_Z80_SBC "sbc" T_Z80_SCF "scf" T_Z80_STOP "stop!!🛑"
 %token	T_Z80_SLA "sla" T_Z80_SRA "sra" T_Z80_SRL "srl" T_Z80_SUB "sub"
 %token	T_Z80_SWAP "swap"
 %token	T_Z80_XOR "xor"

-%token	T_TOKEN_A "a"
-%token	T_TOKEN_B "b" T_TOKEN_C "c"
-%token	T_TOKEN_D "d" T_TOKEN_E "e"
-%token	T_TOKEN_H "h" T_TOKEN_L "l"
-%token	T_MODE_AF "af" T_MODE_BC "bc" T_MODE_DE "de" T_MODE_SP "sp"
-%token	T_MODE_HL "hl" T_MODE_HL_DEC "hld/hl-" T_MODE_HL_INC "hli/hl+"
-%token	T_CC_NZ "nz" T_CC_Z "z" T_CC_NC "nc" // There is no T_CC_C, only T_TOKEN_C
+%token	T_TOKEN_A "( •̀A•́)" T_TOKEN_F "𝓕𝓾𝓬𝓴"
+%token	T_TOKEN_B "=B" T_TOKEN_C "♥(˘⌣˘ C)"
+%token	T_TOKEN_D ";D" T_TOKEN_E "(´ε｀ )♡" T_TOKEN_E_HEART "(´ε｀ )♡"
+%token	T_TOKEN_H "н" T_TOKEN_L_ARM "∠( ᐛ 」∠)＿" T_TOKEN_L_FACE "∠( ᐛ 」∠)＿" T_TOKEN_L_BODY "∠( ᐛ 」∠)＿" T_TOKEN_L_LEG "∠( ᐛ 」∠)＿"
+%token	T_MODE_AF "af" /* T_MODE_BC "bc" T_MODE_DE "de" */ T_MODE_SP "sp"
+%token	T_MODE_HL_START "н∠( ᐛ 」∠)＿" T_MODE_HL_DEC "hld/hl-" T_MODE_HL_INC "hli/hl+"
+%token	T_CC_NZ "nz" T_CC_Z "z" T_CC_NC "nc" T_CC_C "c"

 %type	<constValue>	reg_r
 %type	<constValue>	reg_ss
@@ -1769,6 +1770,7 @@ cpu_command	: z80_adc
 		| z80_sub
 		| z80_swap
 		| z80_xor
+		| T_OWO { fatalerror("*BONK* go to horny jail\n"); }
 ;

 z80_adc		: T_Z80_ADC op_a_n {
@@ -2177,7 +2179,7 @@ op_a_n		: reloc_8bit
 		| T_MODE_A T_COMMA reloc_8bit { $$ = $3; }
 ;

-T_MODE_A	: T_TOKEN_A
+T_MODE_A	: T_LPAREN T_TOKEN_A T_RPAREN
 		| T_OP_HIGH T_LPAREN T_MODE_AF T_RPAREN
 ;

@@ -2185,7 +2187,7 @@ T_MODE_B	: T_TOKEN_B
 		| T_OP_HIGH T_LPAREN T_MODE_BC T_RPAREN
 ;

-T_MODE_C	: T_TOKEN_C
+T_MODE_C	: T_TOKEN_C T_CC_C T_RPAREN
 		| T_OP_LOW T_LPAREN T_MODE_BC T_RPAREN
 ;

@@ -2193,7 +2195,7 @@ T_MODE_D	: T_TOKEN_D
 		| T_OP_HIGH T_LPAREN T_MODE_DE T_RPAREN
 ;

-T_MODE_E	: T_TOKEN_E
+T_MODE_E	: T_TOKEN_E T_RPAREN T_TOKEN_E_HEART
 		| T_OP_LOW T_LPAREN T_MODE_DE T_RPAREN
 ;

@@ -2201,10 +2203,19 @@ T_MODE_H	: T_TOKEN_H
 		| T_OP_HIGH T_LPAREN T_MODE_HL T_RPAREN
 ;

-T_MODE_L	: T_TOKEN_L
+T_MODE_L	: T_TOKEN_L_ARM T_TOKEN_L_FACE T_TOKEN_L_BODY T_RPAREN T_TOKEN_L_LEG
 		| T_OP_LOW T_LPAREN T_MODE_HL T_RPAREN
 ;

+T_MODE_BC	: T_TOKEN_B T_TOKEN_C T_CC_C T_RPAREN
+;
+
+T_MODE_DE	: T_TOKEN_D T_TOKEN_E T_RPAREN T_TOKEN_E_HEART
+;
+
+T_MODE_HL	: T_MODE_HL_START T_TOKEN_L_FACE T_TOKEN_L_BODY T_RPAREN T_TOKEN_L_LEG
+;
+
 ccode_expr	: ccode
 		| T_OP_LOGICNOT ccode_expr {
 			$$ = $2 ^ 1;
@@ -2214,7 +2225,7 @@ ccode_expr	: ccode
 ccode		: T_CC_NZ { $$ = CC_NZ; }
 		| T_CC_Z { $$ = CC_Z; }
 		| T_CC_NC { $$ = CC_NC; }
-		| T_TOKEN_C { $$ = CC_C; }
+		| T_CC_C { $$ = CC_C; }
 ;

 reg_r		: T_MODE_B { $$ = REG_B; }
@@ -2230,7 +2241,7 @@ reg_r		: T_MODE_B { $$ = REG_B; }
 reg_tt		: T_MODE_BC { $$ = REG_BC; }
 		| T_MODE_DE { $$ = REG_DE; }
 		| T_MODE_HL { $$ = REG_HL; }
-		| T_MODE_AF { $$ = REG_AF; }
+		| T_LPAREN T_TOKEN_A T_RPAREN T_TOKEN_F { $$ = REG_AF; }
 ;

 reg_ss		: T_MODE_BC { $$ = REG_BC; }
--- a/src/gbz80.7
+++ b/src/gbz80.7
Author	SHA1	Message	Date
ISSOtm	bffe7eb4de	Implement new instructions "Who are you? What are you doing in my commit history?" ~ https://xkcd.com/163/	2022-04-01 12:46:52 +02:00
ISSOtm	cd454d2e9a	Hack in new register syntax Oh my god I want to die x_x	2022-04-01 12:46:52 +02:00
ISSOtm	c814a616d6	Port Gan's work to Mandoc format Hopefully I didn't forget anything	2022-04-01 12:46:52 +02:00