Require underscores to actually be digit separators (#1812)

Multiple, trailing, or next to decimal point are errors
2026-01-21 07:51:51 +00:00 · 2025-08-30 10:44:20 -04:00
parent 85176ef10a
commit 531278961f
7 changed files with 196 additions and 45 deletions
--- a/src/asm/lexer.cpp
+++ b/src/asm/lexer.cpp
@@ -939,17 +939,27 @@ static uint32_t readFractionalPart(uint32_t integer) {
 		READFRACTIONALPART_PRECISION,
 		READFRACTIONALPART_PRECISION_DIGITS,
 	} state = READFRACTIONALPART_DIGITS;
 	bool nonDigit = true;
 	for (int c = peek();; c = nextChar()) {
 		if (state == READFRACTIONALPART_DIGITS) {
 			if (c == '_') {
 				if (nonDigit) {
 					error("Invalid integer constant, '_' after another '_'");
 				}
 				nonDigit = true;
 				continue;
-			} else if (c == 'q' || c == 'Q') {
+			}
 			if (c == 'q' || c == 'Q') {
 				state = READFRACTIONALPART_PRECISION;
 				nonDigit = false; // '_' is allowed before 'q'/'Q'
 				continue;
 			} else if (!isDigit(c)) {
 				break;
 			}
 			nonDigit = false;
 			if (divisor > (UINT32_MAX - (c - '0')) / 10) {
 				warning(WARNING_LARGE_CONSTANT, "Precision of fixed-point constant is too large");
 				// Discard any additional digits
@@ -965,6 +975,7 @@ static uint32_t readFractionalPart(uint32_t integer) {
 			} else if (!isDigit(c)) {
 				break;
 			}
 			precision = precision * 10 + (c - '0');
 		}
 	}
@@ -978,6 +989,9 @@ static uint32_t readFractionalPart(uint32_t integer) {
 		error("Fixed-point constant precision must be between 1 and 31");
 		precision = options.fixPrecision;
 	}
 	if (nonDigit) {
 		error("Invalid fixed-point constant, trailing '_'");
 	}
 	if (integer >= (1ULL << (32 - precision))) {
 		warning(WARNING_LARGE_CONSTANT, "Magnitude of fixed-point constant is too large");
@@ -1032,22 +1046,31 @@ void lexer_SetGfxDigits(char const digits[4]) {
 	}
 }
-static uint32_t readBinaryNumber() {
+static uint32_t readBinaryNumber(char const *prefix) {
 	uint32_t value = 0;
 	bool empty = true;
 	bool nonDigit = false;
 	for (int c = peek();; c = nextChar()) {
-		int bit;
+		if (c == '_') {
-
+			if (nonDigit) {
-		if (c == '_' && !empty) {
+				error("Invalid integer constant, '_' after another '_'");
 			}
 			nonDigit = true;
 			continue;
-		} else if (c == '0' || c == options.binDigits[0]) {
+		}
 		int bit;
 		if (c == '0' || c == options.binDigits[0]) {
 			bit = 0;
 		} else if (c == '1' || c == options.binDigits[1]) {
 			bit = 1;
 		} else {
 			break;
 		}
 		empty = false;
 		nonDigit = false;
 		if (value > (UINT32_MAX - bit) / 2) {
 			warning(WARNING_LARGE_CONSTANT, "Integer constant is too large");
 			// Discard any additional digits
@@ -1058,29 +1081,39 @@ static uint32_t readBinaryNumber() {
 			return 0;
 		}
 		value = value * 2 + bit;
 		empty = false;
 	}
 	if (empty) {
-		error("Invalid integer constant, no digits after '%%'");
+		error("Invalid integer constant, no digits after %s", prefix);
 	}
 	if (nonDigit) {
 		error("Invalid integer constant, trailing '_'");
 	}
 	return value;
 }
-static uint32_t readOctalNumber() {
+static uint32_t readOctalNumber(char const *prefix) {
 	uint32_t value = 0;
 	bool empty = true;
 	bool nonDigit = false;
 	for (int c = peek();; c = nextChar()) {
-		if (c == '_' && !empty) {
+		if (c == '_') {
 			if (nonDigit) {
 				error("Invalid integer constant, '_' after another '_'");
 			}
 			nonDigit = true;
 			continue;
-		} else if (isOctDigit(c)) {
+		}
 		if (isOctDigit(c)) {
 			c = c - '0';
 		} else {
 			break;
 		}
 		empty = false;
 		nonDigit = false;
 		if (value > (UINT32_MAX - c) / 8) {
 			warning(WARNING_LARGE_CONSTANT, "Integer constant is too large");
@@ -1089,12 +1122,13 @@ static uint32_t readOctalNumber() {
 			return 0;
 		}
 		value = value * 8 + c;
 		empty = false;
 	}
 	if (empty) {
-		error("Invalid integer constant, no digits after '&'");
+		error("Invalid integer constant, no digits after %s", prefix);
 	}
 	if (nonDigit) {
 		error("Invalid integer constant, trailing '_'");
 	}
 	return value;
@@ -1103,15 +1137,23 @@ static uint32_t readOctalNumber() {
 static uint32_t readDecimalNumber(int initial) {
 	assume(isDigit(initial));
 	uint32_t value = initial - '0';
 	bool nonDigit = false;
 	for (int c = peek();; c = nextChar()) {
 		if (c == '_') {
 			if (nonDigit) {
 				error("Invalid integer constant, '_' after another '_'");
 			}
 			nonDigit = true;
 			continue;
-		} else if (isDigit(c)) {
+		}
 		if (isDigit(c)) {
 			c = c - '0';
 		} else {
 			break;
 		}
 		nonDigit = false;
 		if (value > (UINT32_MAX - c) / 10) {
 			warning(WARNING_LARGE_CONSTANT, "Integer constant is too large");
@@ -1122,17 +1164,28 @@ static uint32_t readDecimalNumber(int initial) {
 		value = value * 10 + c;
 	}
 	if (nonDigit) {
 		error("Invalid integer constant, trailing '_'");
 	}
 	return value;
 }
-static uint32_t readHexNumber() {
+static uint32_t readHexNumber(char const *prefix) {
 	uint32_t value = 0;
 	bool empty = true;
 	bool nonDigit = false;
 	for (int c = peek();; c = nextChar()) {
-		if (c == '_' && !empty) {
+		if (c == '_') {
 			if (nonDigit) {
 				error("Invalid integer constant, '_' after another '_'");
 			}
 			nonDigit = true;
 			continue;
-		} else if (c >= 'a' && c <= 'f') {
+		}
 		if (c >= 'a' && c <= 'f') {
 			c = c - 'a' + 10;
 		} else if (c >= 'A' && c <= 'F') {
 			c = c - 'A' + 10;
@@ -1141,6 +1194,8 @@ static uint32_t readHexNumber() {
 		} else {
 			break;
 		}
 		empty = false;
 		nonDigit = false;
 		if (value > (UINT32_MAX - c) / 16) {
 			warning(WARNING_LARGE_CONSTANT, "Integer constant is too large");
@@ -1149,12 +1204,13 @@ static uint32_t readHexNumber() {
 			return 0;
 		}
 		value = value * 16 + c;
 		empty = false;
 	}
 	if (empty) {
-		error("Invalid integer constant, no digits after '$'");
+		error("Invalid integer constant, no digits after %s", prefix);
 	}
 	if (nonDigit) {
 		error("Invalid integer constant, trailing '_'");
 	}
 	return value;
@@ -1163,13 +1219,19 @@ static uint32_t readHexNumber() {
 static uint32_t readGfxConstant() {
 	uint32_t bitPlaneLower = 0, bitPlaneUpper = 0;
 	uint8_t width = 0;
 	bool nonDigit = false;
 	for (int c = peek();; c = nextChar()) {
-		uint32_t pixel;
+		if (c == '_') {
-
+			if (nonDigit) {
-		if (c == '_' && width > 0) {
+				error("Invalid integer constant, '_' after another '_'");
 			}
 			nonDigit = true;
 			continue;
-		} else if (c == '0' || c == options.gfxDigits[0]) {
+		}
 		uint32_t pixel;
 		if (c == '0' || c == options.gfxDigits[0]) {
 			pixel = 0;
 		} else if (c == '1' || c == options.gfxDigits[1]) {
 			pixel = 1;
@@ -1180,6 +1242,7 @@ static uint32_t readGfxConstant() {
 		} else {
 			break;
 		}
 		nonDigit = false;
 		if (width < 8) {
 			bitPlaneLower = bitPlaneLower << 1 | (pixel & 1);
@@ -1197,6 +1260,9 @@ static uint32_t readGfxConstant() {
 		    WARNING_LARGE_CONSTANT, "Graphics constant is too large; only first 8 pixels considered"
 		);
 	}
 	if (nonDigit) {
 		error("Invalid graphics constant, trailing '_'");
 	}
 	return bitPlaneUpper << 8 | bitPlaneLower;
 }
@@ -1729,15 +1795,15 @@ static Token yylex_NORMAL() {
 			case 'x':
 			case 'X':
 				shiftChar();
-				return Token(T_(NUMBER), readHexNumber());
+				return Token(T_(NUMBER), readHexNumber("\"0x\""));
 			case 'o':
 			case 'O':
 				shiftChar();
-				return Token(T_(NUMBER), readOctalNumber());
+				return Token(T_(NUMBER), readOctalNumber("\"0o\""));
 			case 'b':
 			case 'B':
 				shiftChar();
-				return Token(T_(NUMBER), readBinaryNumber());
+				return Token(T_(NUMBER), readBinaryNumber("\"0b\""));
 			}
 			[[fallthrough]];
@@ -1763,20 +1829,21 @@ static Token yylex_NORMAL() {
 		case '&': // Either &=, binary AND, logical AND, or an octal constant
 			c = peek();
-			if (isOctDigit(c)) {
+			if (isOctDigit(c) || c == '_') {
-				return Token(T_(NUMBER), readOctalNumber());
+				return Token(T_(NUMBER), readOctalNumber("'&'"));
 			}
 			return oneOrTwo('=', T_(POP_ANDEQ), '&', T_(OP_LOGICAND), T_(OP_AND));
 		case '%': // Either %=, MOD, or a binary constant
 			c = peek();
-			if (c == '0' || c == '1' || c == options.binDigits[0] || c == options.binDigits[1]) {
+			if (c == '0' || c == '1' || c == options.binDigits[0] || c == options.binDigits[1]
-				return Token(T_(NUMBER), readBinaryNumber());
+			    || c == '_') {
 				return Token(T_(NUMBER), readBinaryNumber("'%'"));
 			}
 			return oneOrTwo('=', T_(POP_MODEQ), T_(OP_MOD));
 		case '$': // Hex constant
-			return Token(T_(NUMBER), readHexNumber());
+			return Token(T_(NUMBER), readHexNumber("'$'"));
 		case '`': // Gfx constant
 			return Token(T_(NUMBER), readGfxConstant());
--- a/test/asm/bracketed-macro-args.asm
+++ b/test/asm/bracketed-macro-args.asm
@@ -9,11 +9,11 @@ ENDM
 	printargs A, B, C, D
 MACRO mac
-	println \<2__> + \<1_2> + \<\1>
+	println \<2> + \<1_2> + \<\1>
 	def x = 2
 	println \<{d:x}> + \<1_{d:x}> + \<\<\<13>>>
 	def y equs "NARG"
-	println \<x> + \<1_{d:x}_> + \<\<\<_{y}>>>
+	println \<x> + \<1_{d:x}> + \<\<\<_{y}>>>
 ENDM
 	mac 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 1
--- a/test/asm/invalid-numbers.err
+++ b/test/asm/invalid-numbers.err
@@ -2,11 +2,11 @@ error: Invalid integer constant, no digits after '$'
    at invalid-numbers.asm::try(2) <- invalid-numbers.asm(11)
 error: Invalid graphics constant, no digits after '`'
    at invalid-numbers.asm::try(2) <- invalid-numbers.asm(12)
-error: Invalid integer constant, no digits after '%'
+error: Invalid integer constant, no digits after "0b"
    at invalid-numbers.asm::try(2) <- invalid-numbers.asm(13)
-error: Invalid integer constant, no digits after '&'
+error: Invalid integer constant, no digits after "0o"
    at invalid-numbers.asm::try(2) <- invalid-numbers.asm(14)
-error: Invalid integer constant, no digits after '$'
+error: Invalid integer constant, no digits after "0x"
    at invalid-numbers.asm::try(2) <- invalid-numbers.asm(15)
 warning: Integer constant is too large [-Wlarge-constant]
    at invalid-numbers.asm::try(2) <- invalid-numbers.asm(18)
--- a/test/asm/invalid-underscore.asm
+++ b/test/asm/invalid-underscore.asm
@@ -0,0 +1,31 @@
 ; good
 println 123_456
 println %_1010_1010
 println 0b_1010_1010
 println &_555_555
 println 0o_777_777
 println $_dead_beef
 println 0x_cafe_babe
 println `_0101_2323
 println 12_34.56_78
 println 12_34.56_q8
 ; bad (multiple '_')
 println 123__456
 println %1010__1010
 println &123__456
 println $abc__def
 println `0101__2323
 println 3.14__15
 println 2.718__Q16
 ; bad (trailing '_')
 println 12345_
 println 0b101010_
 println 0o123456_
 println 0xabcdef_
 println `01230123_
 ; bad ('_' next to '.')
 println 1_.618
 println 2._718
--- a/test/asm/invalid-underscore.err
+++ b/test/asm/invalid-underscore.err
@@ -0,0 +1,29 @@
 error: Invalid integer constant, '_' after another '_'
    at invalid-underscore.asm(14)
 error: Invalid integer constant, '_' after another '_'
    at invalid-underscore.asm(15)
 error: Invalid integer constant, '_' after another '_'
    at invalid-underscore.asm(16)
 error: Invalid integer constant, '_' after another '_'
    at invalid-underscore.asm(17)
 error: Invalid integer constant, '_' after another '_'
    at invalid-underscore.asm(18)
 error: Invalid integer constant, '_' after another '_'
    at invalid-underscore.asm(19)
 error: Invalid integer constant, '_' after another '_'
    at invalid-underscore.asm(20)
 error: Invalid integer constant, trailing '_'
    at invalid-underscore.asm(23)
 error: Invalid integer constant, trailing '_'
    at invalid-underscore.asm(24)
 error: Invalid integer constant, trailing '_'
    at invalid-underscore.asm(25)
 error: Invalid integer constant, trailing '_'
    at invalid-underscore.asm(26)
 error: Invalid graphics constant, trailing '_'
    at invalid-underscore.asm(27)
 error: Invalid integer constant, trailing '_'
    at invalid-underscore.asm(30)
 error: Invalid integer constant, '_' after another '_'
    at invalid-underscore.asm(31)
 Assembly aborted with 14 errors!
--- a/test/asm/invalid-underscore.out
+++ b/test/asm/invalid-underscore.out
@@ -0,0 +1,24 @@
 $1E240
 $AA
 $AA
 $2DB6D
 $3FFFF
 $DEADBEEF
 $CAFEBABE
 $F55
 $4D2915B
 $4D28F
 $1E240
 $AA
 $A72E
 $ABCDEF
 $F55
 $32439
 $2B7CF
 $3039
 $2A
 $A72E
 $ABCDEF
 $3355
 $19E35
 $2B7CF
--- a/test/asm/underscore-in-numeric-literal.asm
+++ b/test/asm/underscore-in-numeric-literal.asm
@@ -14,15 +14,15 @@ _1234::
 ; with underscores
 	dw _1234 ; label
-	db 123, 1_23, 1__23 ; decimal
+	db 123, 1_23, 12_3 ; decimal
 	dw 12_345 ; decimal
 	dw $ab_cd ; hex
-	db &2_0_0_ ; octal
+	db &2_0_0 ; octal
 	db %1111_0000, %1_0 ; binary
-	dl 6_._283_185 ; fixed point
+	dl 6.283_185 ; fixed point
-	dw `0123_3210, `00_33_22_11_ ; gfx
+	dw `0123_3210, `_00_33_22_11 ; gfx
 ; underscores with custom digits
 	opt g.ABC, b.X
-	db %.X.X_..XX_
+	db %.X.X_..XX
-	dw `.A.B_.C.._
+	dw `.A.B_.C..