Require underscores to actually be digit separators (#1812)

Multiple, trailing, or next to decimal point are errors
This commit is contained in:
Rangi
2025-08-30 10:44:20 -04:00
committed by GitHub
parent 85176ef10a
commit 531278961f
7 changed files with 196 additions and 45 deletions

View File

@@ -939,17 +939,27 @@ static uint32_t readFractionalPart(uint32_t integer) {
READFRACTIONALPART_PRECISION,
READFRACTIONALPART_PRECISION_DIGITS,
} state = READFRACTIONALPART_DIGITS;
bool nonDigit = true;
for (int c = peek();; c = nextChar()) {
if (state == READFRACTIONALPART_DIGITS) {
if (c == '_') {
if (nonDigit) {
error("Invalid integer constant, '_' after another '_'");
}
nonDigit = true;
continue;
} else if (c == 'q' || c == 'Q') {
}
if (c == 'q' || c == 'Q') {
state = READFRACTIONALPART_PRECISION;
nonDigit = false; // '_' is allowed before 'q'/'Q'
continue;
} else if (!isDigit(c)) {
break;
}
nonDigit = false;
if (divisor > (UINT32_MAX - (c - '0')) / 10) {
warning(WARNING_LARGE_CONSTANT, "Precision of fixed-point constant is too large");
// Discard any additional digits
@@ -965,6 +975,7 @@ static uint32_t readFractionalPart(uint32_t integer) {
} else if (!isDigit(c)) {
break;
}
precision = precision * 10 + (c - '0');
}
}
@@ -978,6 +989,9 @@ static uint32_t readFractionalPart(uint32_t integer) {
error("Fixed-point constant precision must be between 1 and 31");
precision = options.fixPrecision;
}
if (nonDigit) {
error("Invalid fixed-point constant, trailing '_'");
}
if (integer >= (1ULL << (32 - precision))) {
warning(WARNING_LARGE_CONSTANT, "Magnitude of fixed-point constant is too large");
@@ -1032,22 +1046,31 @@ void lexer_SetGfxDigits(char const digits[4]) {
}
}
static uint32_t readBinaryNumber() {
static uint32_t readBinaryNumber(char const *prefix) {
uint32_t value = 0;
bool empty = true;
bool nonDigit = false;
for (int c = peek();; c = nextChar()) {
int bit;
if (c == '_' && !empty) {
if (c == '_') {
if (nonDigit) {
error("Invalid integer constant, '_' after another '_'");
}
nonDigit = true;
continue;
} else if (c == '0' || c == options.binDigits[0]) {
}
int bit;
if (c == '0' || c == options.binDigits[0]) {
bit = 0;
} else if (c == '1' || c == options.binDigits[1]) {
bit = 1;
} else {
break;
}
empty = false;
nonDigit = false;
if (value > (UINT32_MAX - bit) / 2) {
warning(WARNING_LARGE_CONSTANT, "Integer constant is too large");
// Discard any additional digits
@@ -1058,29 +1081,39 @@ static uint32_t readBinaryNumber() {
return 0;
}
value = value * 2 + bit;
empty = false;
}
if (empty) {
error("Invalid integer constant, no digits after '%%'");
error("Invalid integer constant, no digits after %s", prefix);
}
if (nonDigit) {
error("Invalid integer constant, trailing '_'");
}
return value;
}
static uint32_t readOctalNumber() {
static uint32_t readOctalNumber(char const *prefix) {
uint32_t value = 0;
bool empty = true;
bool nonDigit = false;
for (int c = peek();; c = nextChar()) {
if (c == '_' && !empty) {
if (c == '_') {
if (nonDigit) {
error("Invalid integer constant, '_' after another '_'");
}
nonDigit = true;
continue;
} else if (isOctDigit(c)) {
}
if (isOctDigit(c)) {
c = c - '0';
} else {
break;
}
empty = false;
nonDigit = false;
if (value > (UINT32_MAX - c) / 8) {
warning(WARNING_LARGE_CONSTANT, "Integer constant is too large");
@@ -1089,12 +1122,13 @@ static uint32_t readOctalNumber() {
return 0;
}
value = value * 8 + c;
empty = false;
}
if (empty) {
error("Invalid integer constant, no digits after '&'");
error("Invalid integer constant, no digits after %s", prefix);
}
if (nonDigit) {
error("Invalid integer constant, trailing '_'");
}
return value;
@@ -1103,15 +1137,23 @@ static uint32_t readOctalNumber() {
static uint32_t readDecimalNumber(int initial) {
assume(isDigit(initial));
uint32_t value = initial - '0';
bool nonDigit = false;
for (int c = peek();; c = nextChar()) {
if (c == '_') {
if (nonDigit) {
error("Invalid integer constant, '_' after another '_'");
}
nonDigit = true;
continue;
} else if (isDigit(c)) {
}
if (isDigit(c)) {
c = c - '0';
} else {
break;
}
nonDigit = false;
if (value > (UINT32_MAX - c) / 10) {
warning(WARNING_LARGE_CONSTANT, "Integer constant is too large");
@@ -1122,17 +1164,28 @@ static uint32_t readDecimalNumber(int initial) {
value = value * 10 + c;
}
if (nonDigit) {
error("Invalid integer constant, trailing '_'");
}
return value;
}
static uint32_t readHexNumber() {
static uint32_t readHexNumber(char const *prefix) {
uint32_t value = 0;
bool empty = true;
bool nonDigit = false;
for (int c = peek();; c = nextChar()) {
if (c == '_' && !empty) {
if (c == '_') {
if (nonDigit) {
error("Invalid integer constant, '_' after another '_'");
}
nonDigit = true;
continue;
} else if (c >= 'a' && c <= 'f') {
}
if (c >= 'a' && c <= 'f') {
c = c - 'a' + 10;
} else if (c >= 'A' && c <= 'F') {
c = c - 'A' + 10;
@@ -1141,6 +1194,8 @@ static uint32_t readHexNumber() {
} else {
break;
}
empty = false;
nonDigit = false;
if (value > (UINT32_MAX - c) / 16) {
warning(WARNING_LARGE_CONSTANT, "Integer constant is too large");
@@ -1149,12 +1204,13 @@ static uint32_t readHexNumber() {
return 0;
}
value = value * 16 + c;
empty = false;
}
if (empty) {
error("Invalid integer constant, no digits after '$'");
error("Invalid integer constant, no digits after %s", prefix);
}
if (nonDigit) {
error("Invalid integer constant, trailing '_'");
}
return value;
@@ -1163,13 +1219,19 @@ static uint32_t readHexNumber() {
static uint32_t readGfxConstant() {
uint32_t bitPlaneLower = 0, bitPlaneUpper = 0;
uint8_t width = 0;
bool nonDigit = false;
for (int c = peek();; c = nextChar()) {
uint32_t pixel;
if (c == '_' && width > 0) {
if (c == '_') {
if (nonDigit) {
error("Invalid integer constant, '_' after another '_'");
}
nonDigit = true;
continue;
} else if (c == '0' || c == options.gfxDigits[0]) {
}
uint32_t pixel;
if (c == '0' || c == options.gfxDigits[0]) {
pixel = 0;
} else if (c == '1' || c == options.gfxDigits[1]) {
pixel = 1;
@@ -1180,6 +1242,7 @@ static uint32_t readGfxConstant() {
} else {
break;
}
nonDigit = false;
if (width < 8) {
bitPlaneLower = bitPlaneLower << 1 | (pixel & 1);
@@ -1197,6 +1260,9 @@ static uint32_t readGfxConstant() {
WARNING_LARGE_CONSTANT, "Graphics constant is too large; only first 8 pixels considered"
);
}
if (nonDigit) {
error("Invalid graphics constant, trailing '_'");
}
return bitPlaneUpper << 8 | bitPlaneLower;
}
@@ -1729,15 +1795,15 @@ static Token yylex_NORMAL() {
case 'x':
case 'X':
shiftChar();
return Token(T_(NUMBER), readHexNumber());
return Token(T_(NUMBER), readHexNumber("\"0x\""));
case 'o':
case 'O':
shiftChar();
return Token(T_(NUMBER), readOctalNumber());
return Token(T_(NUMBER), readOctalNumber("\"0o\""));
case 'b':
case 'B':
shiftChar();
return Token(T_(NUMBER), readBinaryNumber());
return Token(T_(NUMBER), readBinaryNumber("\"0b\""));
}
[[fallthrough]];
@@ -1763,20 +1829,21 @@ static Token yylex_NORMAL() {
case '&': // Either &=, binary AND, logical AND, or an octal constant
c = peek();
if (isOctDigit(c)) {
return Token(T_(NUMBER), readOctalNumber());
if (isOctDigit(c) || c == '_') {
return Token(T_(NUMBER), readOctalNumber("'&'"));
}
return oneOrTwo('=', T_(POP_ANDEQ), '&', T_(OP_LOGICAND), T_(OP_AND));
case '%': // Either %=, MOD, or a binary constant
c = peek();
if (c == '0' || c == '1' || c == options.binDigits[0] || c == options.binDigits[1]) {
return Token(T_(NUMBER), readBinaryNumber());
if (c == '0' || c == '1' || c == options.binDigits[0] || c == options.binDigits[1]
|| c == '_') {
return Token(T_(NUMBER), readBinaryNumber("'%'"));
}
return oneOrTwo('=', T_(POP_MODEQ), T_(OP_MOD));
case '$': // Hex constant
return Token(T_(NUMBER), readHexNumber());
return Token(T_(NUMBER), readHexNumber("'$'"));
case '`': // Gfx constant
return Token(T_(NUMBER), readGfxConstant());

View File

@@ -9,11 +9,11 @@ ENDM
printargs A, B, C, D
MACRO mac
println \<2__> + \<1_2> + \<\1>
println \<2> + \<1_2> + \<\1>
def x = 2
println \<{d:x}> + \<1_{d:x}> + \<\<\<13>>>
def y equs "NARG"
println \<x> + \<1_{d:x}_> + \<\<\<_{y}>>>
println \<x> + \<1_{d:x}> + \<\<\<_{y}>>>
ENDM
mac 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 1

View File

@@ -2,11 +2,11 @@ error: Invalid integer constant, no digits after '$'
at invalid-numbers.asm::try(2) <- invalid-numbers.asm(11)
error: Invalid graphics constant, no digits after '`'
at invalid-numbers.asm::try(2) <- invalid-numbers.asm(12)
error: Invalid integer constant, no digits after '%'
error: Invalid integer constant, no digits after "0b"
at invalid-numbers.asm::try(2) <- invalid-numbers.asm(13)
error: Invalid integer constant, no digits after '&'
error: Invalid integer constant, no digits after "0o"
at invalid-numbers.asm::try(2) <- invalid-numbers.asm(14)
error: Invalid integer constant, no digits after '$'
error: Invalid integer constant, no digits after "0x"
at invalid-numbers.asm::try(2) <- invalid-numbers.asm(15)
warning: Integer constant is too large [-Wlarge-constant]
at invalid-numbers.asm::try(2) <- invalid-numbers.asm(18)

View File

@@ -0,0 +1,31 @@
; good
println 123_456
println %_1010_1010
println 0b_1010_1010
println &_555_555
println 0o_777_777
println $_dead_beef
println 0x_cafe_babe
println `_0101_2323
println 12_34.56_78
println 12_34.56_q8
; bad (multiple '_')
println 123__456
println %1010__1010
println &123__456
println $abc__def
println `0101__2323
println 3.14__15
println 2.718__Q16
; bad (trailing '_')
println 12345_
println 0b101010_
println 0o123456_
println 0xabcdef_
println `01230123_
; bad ('_' next to '.')
println 1_.618
println 2._718

View File

@@ -0,0 +1,29 @@
error: Invalid integer constant, '_' after another '_'
at invalid-underscore.asm(14)
error: Invalid integer constant, '_' after another '_'
at invalid-underscore.asm(15)
error: Invalid integer constant, '_' after another '_'
at invalid-underscore.asm(16)
error: Invalid integer constant, '_' after another '_'
at invalid-underscore.asm(17)
error: Invalid integer constant, '_' after another '_'
at invalid-underscore.asm(18)
error: Invalid integer constant, '_' after another '_'
at invalid-underscore.asm(19)
error: Invalid integer constant, '_' after another '_'
at invalid-underscore.asm(20)
error: Invalid integer constant, trailing '_'
at invalid-underscore.asm(23)
error: Invalid integer constant, trailing '_'
at invalid-underscore.asm(24)
error: Invalid integer constant, trailing '_'
at invalid-underscore.asm(25)
error: Invalid integer constant, trailing '_'
at invalid-underscore.asm(26)
error: Invalid graphics constant, trailing '_'
at invalid-underscore.asm(27)
error: Invalid integer constant, trailing '_'
at invalid-underscore.asm(30)
error: Invalid integer constant, '_' after another '_'
at invalid-underscore.asm(31)
Assembly aborted with 14 errors!

View File

@@ -0,0 +1,24 @@
$1E240
$AA
$AA
$2DB6D
$3FFFF
$DEADBEEF
$CAFEBABE
$F55
$4D2915B
$4D28F
$1E240
$AA
$A72E
$ABCDEF
$F55
$32439
$2B7CF
$3039
$2A
$A72E
$ABCDEF
$3355
$19E35
$2B7CF

View File

@@ -14,15 +14,15 @@ _1234::
; with underscores
dw _1234 ; label
db 123, 1_23, 1__23 ; decimal
db 123, 1_23, 12_3 ; decimal
dw 12_345 ; decimal
dw $ab_cd ; hex
db &2_0_0_ ; octal
db &2_0_0 ; octal
db %1111_0000, %1_0 ; binary
dl 6_._283_185 ; fixed point
dw `0123_3210, `00_33_22_11_ ; gfx
dl 6.283_185 ; fixed point
dw `0123_3210, `_00_33_22_11 ; gfx
; underscores with custom digits
opt g.ABC, b.X
db %.X.X_..XX_
dw `.A.B_.C.._
db %.X.X_..XX
dw `.A.B_.C..