Require underscores to actually be digit separators (#1812)

Multiple, trailing, or next to decimal point are errors
This commit is contained in:
Rangi
2025-08-30 10:44:20 -04:00
committed by GitHub
parent 85176ef10a
commit 531278961f
7 changed files with 196 additions and 45 deletions

View File

@@ -939,17 +939,27 @@ static uint32_t readFractionalPart(uint32_t integer) {
READFRACTIONALPART_PRECISION, READFRACTIONALPART_PRECISION,
READFRACTIONALPART_PRECISION_DIGITS, READFRACTIONALPART_PRECISION_DIGITS,
} state = READFRACTIONALPART_DIGITS; } state = READFRACTIONALPART_DIGITS;
bool nonDigit = true;
for (int c = peek();; c = nextChar()) { for (int c = peek();; c = nextChar()) {
if (state == READFRACTIONALPART_DIGITS) { if (state == READFRACTIONALPART_DIGITS) {
if (c == '_') { if (c == '_') {
if (nonDigit) {
error("Invalid integer constant, '_' after another '_'");
}
nonDigit = true;
continue; continue;
} else if (c == 'q' || c == 'Q') { }
if (c == 'q' || c == 'Q') {
state = READFRACTIONALPART_PRECISION; state = READFRACTIONALPART_PRECISION;
nonDigit = false; // '_' is allowed before 'q'/'Q'
continue; continue;
} else if (!isDigit(c)) { } else if (!isDigit(c)) {
break; break;
} }
nonDigit = false;
if (divisor > (UINT32_MAX - (c - '0')) / 10) { if (divisor > (UINT32_MAX - (c - '0')) / 10) {
warning(WARNING_LARGE_CONSTANT, "Precision of fixed-point constant is too large"); warning(WARNING_LARGE_CONSTANT, "Precision of fixed-point constant is too large");
// Discard any additional digits // Discard any additional digits
@@ -965,6 +975,7 @@ static uint32_t readFractionalPart(uint32_t integer) {
} else if (!isDigit(c)) { } else if (!isDigit(c)) {
break; break;
} }
precision = precision * 10 + (c - '0'); precision = precision * 10 + (c - '0');
} }
} }
@@ -978,6 +989,9 @@ static uint32_t readFractionalPart(uint32_t integer) {
error("Fixed-point constant precision must be between 1 and 31"); error("Fixed-point constant precision must be between 1 and 31");
precision = options.fixPrecision; precision = options.fixPrecision;
} }
if (nonDigit) {
error("Invalid fixed-point constant, trailing '_'");
}
if (integer >= (1ULL << (32 - precision))) { if (integer >= (1ULL << (32 - precision))) {
warning(WARNING_LARGE_CONSTANT, "Magnitude of fixed-point constant is too large"); warning(WARNING_LARGE_CONSTANT, "Magnitude of fixed-point constant is too large");
@@ -1032,22 +1046,31 @@ void lexer_SetGfxDigits(char const digits[4]) {
} }
} }
static uint32_t readBinaryNumber() { static uint32_t readBinaryNumber(char const *prefix) {
uint32_t value = 0; uint32_t value = 0;
bool empty = true; bool empty = true;
bool nonDigit = false;
for (int c = peek();; c = nextChar()) { for (int c = peek();; c = nextChar()) {
int bit; if (c == '_') {
if (nonDigit) {
if (c == '_' && !empty) { error("Invalid integer constant, '_' after another '_'");
}
nonDigit = true;
continue; continue;
} else if (c == '0' || c == options.binDigits[0]) { }
int bit;
if (c == '0' || c == options.binDigits[0]) {
bit = 0; bit = 0;
} else if (c == '1' || c == options.binDigits[1]) { } else if (c == '1' || c == options.binDigits[1]) {
bit = 1; bit = 1;
} else { } else {
break; break;
} }
empty = false;
nonDigit = false;
if (value > (UINT32_MAX - bit) / 2) { if (value > (UINT32_MAX - bit) / 2) {
warning(WARNING_LARGE_CONSTANT, "Integer constant is too large"); warning(WARNING_LARGE_CONSTANT, "Integer constant is too large");
// Discard any additional digits // Discard any additional digits
@@ -1058,29 +1081,39 @@ static uint32_t readBinaryNumber() {
return 0; return 0;
} }
value = value * 2 + bit; value = value * 2 + bit;
empty = false;
} }
if (empty) { if (empty) {
error("Invalid integer constant, no digits after '%%'"); error("Invalid integer constant, no digits after %s", prefix);
}
if (nonDigit) {
error("Invalid integer constant, trailing '_'");
} }
return value; return value;
} }
static uint32_t readOctalNumber() { static uint32_t readOctalNumber(char const *prefix) {
uint32_t value = 0; uint32_t value = 0;
bool empty = true; bool empty = true;
bool nonDigit = false;
for (int c = peek();; c = nextChar()) { for (int c = peek();; c = nextChar()) {
if (c == '_' && !empty) { if (c == '_') {
if (nonDigit) {
error("Invalid integer constant, '_' after another '_'");
}
nonDigit = true;
continue; continue;
} else if (isOctDigit(c)) { }
if (isOctDigit(c)) {
c = c - '0'; c = c - '0';
} else { } else {
break; break;
} }
empty = false;
nonDigit = false;
if (value > (UINT32_MAX - c) / 8) { if (value > (UINT32_MAX - c) / 8) {
warning(WARNING_LARGE_CONSTANT, "Integer constant is too large"); warning(WARNING_LARGE_CONSTANT, "Integer constant is too large");
@@ -1089,12 +1122,13 @@ static uint32_t readOctalNumber() {
return 0; return 0;
} }
value = value * 8 + c; value = value * 8 + c;
empty = false;
} }
if (empty) { if (empty) {
error("Invalid integer constant, no digits after '&'"); error("Invalid integer constant, no digits after %s", prefix);
}
if (nonDigit) {
error("Invalid integer constant, trailing '_'");
} }
return value; return value;
@@ -1103,15 +1137,23 @@ static uint32_t readOctalNumber() {
static uint32_t readDecimalNumber(int initial) { static uint32_t readDecimalNumber(int initial) {
assume(isDigit(initial)); assume(isDigit(initial));
uint32_t value = initial - '0'; uint32_t value = initial - '0';
bool nonDigit = false;
for (int c = peek();; c = nextChar()) { for (int c = peek();; c = nextChar()) {
if (c == '_') { if (c == '_') {
if (nonDigit) {
error("Invalid integer constant, '_' after another '_'");
}
nonDigit = true;
continue; continue;
} else if (isDigit(c)) { }
if (isDigit(c)) {
c = c - '0'; c = c - '0';
} else { } else {
break; break;
} }
nonDigit = false;
if (value > (UINT32_MAX - c) / 10) { if (value > (UINT32_MAX - c) / 10) {
warning(WARNING_LARGE_CONSTANT, "Integer constant is too large"); warning(WARNING_LARGE_CONSTANT, "Integer constant is too large");
@@ -1122,17 +1164,28 @@ static uint32_t readDecimalNumber(int initial) {
value = value * 10 + c; value = value * 10 + c;
} }
if (nonDigit) {
error("Invalid integer constant, trailing '_'");
}
return value; return value;
} }
static uint32_t readHexNumber() { static uint32_t readHexNumber(char const *prefix) {
uint32_t value = 0; uint32_t value = 0;
bool empty = true; bool empty = true;
bool nonDigit = false;
for (int c = peek();; c = nextChar()) { for (int c = peek();; c = nextChar()) {
if (c == '_' && !empty) { if (c == '_') {
if (nonDigit) {
error("Invalid integer constant, '_' after another '_'");
}
nonDigit = true;
continue; continue;
} else if (c >= 'a' && c <= 'f') { }
if (c >= 'a' && c <= 'f') {
c = c - 'a' + 10; c = c - 'a' + 10;
} else if (c >= 'A' && c <= 'F') { } else if (c >= 'A' && c <= 'F') {
c = c - 'A' + 10; c = c - 'A' + 10;
@@ -1141,6 +1194,8 @@ static uint32_t readHexNumber() {
} else { } else {
break; break;
} }
empty = false;
nonDigit = false;
if (value > (UINT32_MAX - c) / 16) { if (value > (UINT32_MAX - c) / 16) {
warning(WARNING_LARGE_CONSTANT, "Integer constant is too large"); warning(WARNING_LARGE_CONSTANT, "Integer constant is too large");
@@ -1149,12 +1204,13 @@ static uint32_t readHexNumber() {
return 0; return 0;
} }
value = value * 16 + c; value = value * 16 + c;
empty = false;
} }
if (empty) { if (empty) {
error("Invalid integer constant, no digits after '$'"); error("Invalid integer constant, no digits after %s", prefix);
}
if (nonDigit) {
error("Invalid integer constant, trailing '_'");
} }
return value; return value;
@@ -1163,13 +1219,19 @@ static uint32_t readHexNumber() {
static uint32_t readGfxConstant() { static uint32_t readGfxConstant() {
uint32_t bitPlaneLower = 0, bitPlaneUpper = 0; uint32_t bitPlaneLower = 0, bitPlaneUpper = 0;
uint8_t width = 0; uint8_t width = 0;
bool nonDigit = false;
for (int c = peek();; c = nextChar()) { for (int c = peek();; c = nextChar()) {
uint32_t pixel; if (c == '_') {
if (nonDigit) {
if (c == '_' && width > 0) { error("Invalid integer constant, '_' after another '_'");
}
nonDigit = true;
continue; continue;
} else if (c == '0' || c == options.gfxDigits[0]) { }
uint32_t pixel;
if (c == '0' || c == options.gfxDigits[0]) {
pixel = 0; pixel = 0;
} else if (c == '1' || c == options.gfxDigits[1]) { } else if (c == '1' || c == options.gfxDigits[1]) {
pixel = 1; pixel = 1;
@@ -1180,6 +1242,7 @@ static uint32_t readGfxConstant() {
} else { } else {
break; break;
} }
nonDigit = false;
if (width < 8) { if (width < 8) {
bitPlaneLower = bitPlaneLower << 1 | (pixel & 1); bitPlaneLower = bitPlaneLower << 1 | (pixel & 1);
@@ -1197,6 +1260,9 @@ static uint32_t readGfxConstant() {
WARNING_LARGE_CONSTANT, "Graphics constant is too large; only first 8 pixels considered" WARNING_LARGE_CONSTANT, "Graphics constant is too large; only first 8 pixels considered"
); );
} }
if (nonDigit) {
error("Invalid graphics constant, trailing '_'");
}
return bitPlaneUpper << 8 | bitPlaneLower; return bitPlaneUpper << 8 | bitPlaneLower;
} }
@@ -1729,15 +1795,15 @@ static Token yylex_NORMAL() {
case 'x': case 'x':
case 'X': case 'X':
shiftChar(); shiftChar();
return Token(T_(NUMBER), readHexNumber()); return Token(T_(NUMBER), readHexNumber("\"0x\""));
case 'o': case 'o':
case 'O': case 'O':
shiftChar(); shiftChar();
return Token(T_(NUMBER), readOctalNumber()); return Token(T_(NUMBER), readOctalNumber("\"0o\""));
case 'b': case 'b':
case 'B': case 'B':
shiftChar(); shiftChar();
return Token(T_(NUMBER), readBinaryNumber()); return Token(T_(NUMBER), readBinaryNumber("\"0b\""));
} }
[[fallthrough]]; [[fallthrough]];
@@ -1763,20 +1829,21 @@ static Token yylex_NORMAL() {
case '&': // Either &=, binary AND, logical AND, or an octal constant case '&': // Either &=, binary AND, logical AND, or an octal constant
c = peek(); c = peek();
if (isOctDigit(c)) { if (isOctDigit(c) || c == '_') {
return Token(T_(NUMBER), readOctalNumber()); return Token(T_(NUMBER), readOctalNumber("'&'"));
} }
return oneOrTwo('=', T_(POP_ANDEQ), '&', T_(OP_LOGICAND), T_(OP_AND)); return oneOrTwo('=', T_(POP_ANDEQ), '&', T_(OP_LOGICAND), T_(OP_AND));
case '%': // Either %=, MOD, or a binary constant case '%': // Either %=, MOD, or a binary constant
c = peek(); c = peek();
if (c == '0' || c == '1' || c == options.binDigits[0] || c == options.binDigits[1]) { if (c == '0' || c == '1' || c == options.binDigits[0] || c == options.binDigits[1]
return Token(T_(NUMBER), readBinaryNumber()); || c == '_') {
return Token(T_(NUMBER), readBinaryNumber("'%'"));
} }
return oneOrTwo('=', T_(POP_MODEQ), T_(OP_MOD)); return oneOrTwo('=', T_(POP_MODEQ), T_(OP_MOD));
case '$': // Hex constant case '$': // Hex constant
return Token(T_(NUMBER), readHexNumber()); return Token(T_(NUMBER), readHexNumber("'$'"));
case '`': // Gfx constant case '`': // Gfx constant
return Token(T_(NUMBER), readGfxConstant()); return Token(T_(NUMBER), readGfxConstant());

View File

@@ -9,11 +9,11 @@ ENDM
printargs A, B, C, D printargs A, B, C, D
MACRO mac MACRO mac
println \<2__> + \<1_2> + \<\1> println \<2> + \<1_2> + \<\1>
def x = 2 def x = 2
println \<{d:x}> + \<1_{d:x}> + \<\<\<13>>> println \<{d:x}> + \<1_{d:x}> + \<\<\<13>>>
def y equs "NARG" def y equs "NARG"
println \<x> + \<1_{d:x}_> + \<\<\<_{y}>>> println \<x> + \<1_{d:x}> + \<\<\<_{y}>>>
ENDM ENDM
mac 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 1 mac 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 1

View File

@@ -2,11 +2,11 @@ error: Invalid integer constant, no digits after '$'
at invalid-numbers.asm::try(2) <- invalid-numbers.asm(11) at invalid-numbers.asm::try(2) <- invalid-numbers.asm(11)
error: Invalid graphics constant, no digits after '`' error: Invalid graphics constant, no digits after '`'
at invalid-numbers.asm::try(2) <- invalid-numbers.asm(12) at invalid-numbers.asm::try(2) <- invalid-numbers.asm(12)
error: Invalid integer constant, no digits after '%' error: Invalid integer constant, no digits after "0b"
at invalid-numbers.asm::try(2) <- invalid-numbers.asm(13) at invalid-numbers.asm::try(2) <- invalid-numbers.asm(13)
error: Invalid integer constant, no digits after '&' error: Invalid integer constant, no digits after "0o"
at invalid-numbers.asm::try(2) <- invalid-numbers.asm(14) at invalid-numbers.asm::try(2) <- invalid-numbers.asm(14)
error: Invalid integer constant, no digits after '$' error: Invalid integer constant, no digits after "0x"
at invalid-numbers.asm::try(2) <- invalid-numbers.asm(15) at invalid-numbers.asm::try(2) <- invalid-numbers.asm(15)
warning: Integer constant is too large [-Wlarge-constant] warning: Integer constant is too large [-Wlarge-constant]
at invalid-numbers.asm::try(2) <- invalid-numbers.asm(18) at invalid-numbers.asm::try(2) <- invalid-numbers.asm(18)

View File

@@ -0,0 +1,31 @@
; good
println 123_456
println %_1010_1010
println 0b_1010_1010
println &_555_555
println 0o_777_777
println $_dead_beef
println 0x_cafe_babe
println `_0101_2323
println 12_34.56_78
println 12_34.56_q8
; bad (multiple '_')
println 123__456
println %1010__1010
println &123__456
println $abc__def
println `0101__2323
println 3.14__15
println 2.718__Q16
; bad (trailing '_')
println 12345_
println 0b101010_
println 0o123456_
println 0xabcdef_
println `01230123_
; bad ('_' next to '.')
println 1_.618
println 2._718

View File

@@ -0,0 +1,29 @@
error: Invalid integer constant, '_' after another '_'
at invalid-underscore.asm(14)
error: Invalid integer constant, '_' after another '_'
at invalid-underscore.asm(15)
error: Invalid integer constant, '_' after another '_'
at invalid-underscore.asm(16)
error: Invalid integer constant, '_' after another '_'
at invalid-underscore.asm(17)
error: Invalid integer constant, '_' after another '_'
at invalid-underscore.asm(18)
error: Invalid integer constant, '_' after another '_'
at invalid-underscore.asm(19)
error: Invalid integer constant, '_' after another '_'
at invalid-underscore.asm(20)
error: Invalid integer constant, trailing '_'
at invalid-underscore.asm(23)
error: Invalid integer constant, trailing '_'
at invalid-underscore.asm(24)
error: Invalid integer constant, trailing '_'
at invalid-underscore.asm(25)
error: Invalid integer constant, trailing '_'
at invalid-underscore.asm(26)
error: Invalid graphics constant, trailing '_'
at invalid-underscore.asm(27)
error: Invalid integer constant, trailing '_'
at invalid-underscore.asm(30)
error: Invalid integer constant, '_' after another '_'
at invalid-underscore.asm(31)
Assembly aborted with 14 errors!

View File

@@ -0,0 +1,24 @@
$1E240
$AA
$AA
$2DB6D
$3FFFF
$DEADBEEF
$CAFEBABE
$F55
$4D2915B
$4D28F
$1E240
$AA
$A72E
$ABCDEF
$F55
$32439
$2B7CF
$3039
$2A
$A72E
$ABCDEF
$3355
$19E35
$2B7CF

View File

@@ -14,15 +14,15 @@ _1234::
; with underscores ; with underscores
dw _1234 ; label dw _1234 ; label
db 123, 1_23, 1__23 ; decimal db 123, 1_23, 12_3 ; decimal
dw 12_345 ; decimal dw 12_345 ; decimal
dw $ab_cd ; hex dw $ab_cd ; hex
db &2_0_0_ ; octal db &2_0_0 ; octal
db %1111_0000, %1_0 ; binary db %1111_0000, %1_0 ; binary
dl 6_._283_185 ; fixed point dl 6.283_185 ; fixed point
dw `0123_3210, `00_33_22_11_ ; gfx dw `0123_3210, `_00_33_22_11 ; gfx
; underscores with custom digits ; underscores with custom digits
opt g.ABC, b.X opt g.ABC, b.X
db %.X.X_..XX_ db %.X.X_..XX
dw `.A.B_.C.._ dw `.A.B_.C..