diff --git a/src/asm/lexer.cpp b/src/asm/lexer.cpp index ca5a1dd3..9a161f50 100644 --- a/src/asm/lexer.cpp +++ b/src/asm/lexer.cpp @@ -939,17 +939,27 @@ static uint32_t readFractionalPart(uint32_t integer) { READFRACTIONALPART_PRECISION, READFRACTIONALPART_PRECISION_DIGITS, } state = READFRACTIONALPART_DIGITS; + bool nonDigit = true; for (int c = peek();; c = nextChar()) { if (state == READFRACTIONALPART_DIGITS) { if (c == '_') { + if (nonDigit) { + error("Invalid integer constant, '_' after another '_'"); + } + nonDigit = true; continue; - } else if (c == 'q' || c == 'Q') { + } + + if (c == 'q' || c == 'Q') { state = READFRACTIONALPART_PRECISION; + nonDigit = false; // '_' is allowed before 'q'/'Q' continue; } else if (!isDigit(c)) { break; } + nonDigit = false; + if (divisor > (UINT32_MAX - (c - '0')) / 10) { warning(WARNING_LARGE_CONSTANT, "Precision of fixed-point constant is too large"); // Discard any additional digits @@ -965,6 +975,7 @@ static uint32_t readFractionalPart(uint32_t integer) { } else if (!isDigit(c)) { break; } + precision = precision * 10 + (c - '0'); } } @@ -978,6 +989,9 @@ static uint32_t readFractionalPart(uint32_t integer) { error("Fixed-point constant precision must be between 1 and 31"); precision = options.fixPrecision; } + if (nonDigit) { + error("Invalid fixed-point constant, trailing '_'"); + } if (integer >= (1ULL << (32 - precision))) { warning(WARNING_LARGE_CONSTANT, "Magnitude of fixed-point constant is too large"); @@ -1032,22 +1046,31 @@ void lexer_SetGfxDigits(char const digits[4]) { } } -static uint32_t readBinaryNumber() { +static uint32_t readBinaryNumber(char const *prefix) { uint32_t value = 0; bool empty = true; + bool nonDigit = false; for (int c = peek();; c = nextChar()) { - int bit; - - if (c == '_' && !empty) { + if (c == '_') { + if (nonDigit) { + error("Invalid integer constant, '_' after another '_'"); + } + nonDigit = true; continue; - } else if (c == '0' || c == options.binDigits[0]) { + } + + int bit; + if (c == '0' || c == options.binDigits[0]) { bit = 0; } else if (c == '1' || c == options.binDigits[1]) { bit = 1; } else { break; } + empty = false; + nonDigit = false; + if (value > (UINT32_MAX - bit) / 2) { warning(WARNING_LARGE_CONSTANT, "Integer constant is too large"); // Discard any additional digits @@ -1058,29 +1081,39 @@ static uint32_t readBinaryNumber() { return 0; } value = value * 2 + bit; - - empty = false; } if (empty) { - error("Invalid integer constant, no digits after '%%'"); + error("Invalid integer constant, no digits after %s", prefix); + } + if (nonDigit) { + error("Invalid integer constant, trailing '_'"); } return value; } -static uint32_t readOctalNumber() { +static uint32_t readOctalNumber(char const *prefix) { uint32_t value = 0; bool empty = true; + bool nonDigit = false; for (int c = peek();; c = nextChar()) { - if (c == '_' && !empty) { + if (c == '_') { + if (nonDigit) { + error("Invalid integer constant, '_' after another '_'"); + } + nonDigit = true; continue; - } else if (isOctDigit(c)) { + } + + if (isOctDigit(c)) { c = c - '0'; } else { break; } + empty = false; + nonDigit = false; if (value > (UINT32_MAX - c) / 8) { warning(WARNING_LARGE_CONSTANT, "Integer constant is too large"); @@ -1089,12 +1122,13 @@ static uint32_t readOctalNumber() { return 0; } value = value * 8 + c; - - empty = false; } if (empty) { - error("Invalid integer constant, no digits after '&'"); + error("Invalid integer constant, no digits after %s", prefix); + } + if (nonDigit) { + error("Invalid integer constant, trailing '_'"); } return value; @@ -1103,15 +1137,23 @@ static uint32_t readOctalNumber() { static uint32_t readDecimalNumber(int initial) { assume(isDigit(initial)); uint32_t value = initial - '0'; + bool nonDigit = false; for (int c = peek();; c = nextChar()) { if (c == '_') { + if (nonDigit) { + error("Invalid integer constant, '_' after another '_'"); + } + nonDigit = true; continue; - } else if (isDigit(c)) { + } + + if (isDigit(c)) { c = c - '0'; } else { break; } + nonDigit = false; if (value > (UINT32_MAX - c) / 10) { warning(WARNING_LARGE_CONSTANT, "Integer constant is too large"); @@ -1122,17 +1164,28 @@ static uint32_t readDecimalNumber(int initial) { value = value * 10 + c; } + if (nonDigit) { + error("Invalid integer constant, trailing '_'"); + } + return value; } -static uint32_t readHexNumber() { +static uint32_t readHexNumber(char const *prefix) { uint32_t value = 0; bool empty = true; + bool nonDigit = false; for (int c = peek();; c = nextChar()) { - if (c == '_' && !empty) { + if (c == '_') { + if (nonDigit) { + error("Invalid integer constant, '_' after another '_'"); + } + nonDigit = true; continue; - } else if (c >= 'a' && c <= 'f') { + } + + if (c >= 'a' && c <= 'f') { c = c - 'a' + 10; } else if (c >= 'A' && c <= 'F') { c = c - 'A' + 10; @@ -1141,6 +1194,8 @@ static uint32_t readHexNumber() { } else { break; } + empty = false; + nonDigit = false; if (value > (UINT32_MAX - c) / 16) { warning(WARNING_LARGE_CONSTANT, "Integer constant is too large"); @@ -1149,12 +1204,13 @@ static uint32_t readHexNumber() { return 0; } value = value * 16 + c; - - empty = false; } if (empty) { - error("Invalid integer constant, no digits after '$'"); + error("Invalid integer constant, no digits after %s", prefix); + } + if (nonDigit) { + error("Invalid integer constant, trailing '_'"); } return value; @@ -1163,13 +1219,19 @@ static uint32_t readHexNumber() { static uint32_t readGfxConstant() { uint32_t bitPlaneLower = 0, bitPlaneUpper = 0; uint8_t width = 0; + bool nonDigit = false; for (int c = peek();; c = nextChar()) { - uint32_t pixel; - - if (c == '_' && width > 0) { + if (c == '_') { + if (nonDigit) { + error("Invalid integer constant, '_' after another '_'"); + } + nonDigit = true; continue; - } else if (c == '0' || c == options.gfxDigits[0]) { + } + + uint32_t pixel; + if (c == '0' || c == options.gfxDigits[0]) { pixel = 0; } else if (c == '1' || c == options.gfxDigits[1]) { pixel = 1; @@ -1180,6 +1242,7 @@ static uint32_t readGfxConstant() { } else { break; } + nonDigit = false; if (width < 8) { bitPlaneLower = bitPlaneLower << 1 | (pixel & 1); @@ -1197,6 +1260,9 @@ static uint32_t readGfxConstant() { WARNING_LARGE_CONSTANT, "Graphics constant is too large; only first 8 pixels considered" ); } + if (nonDigit) { + error("Invalid graphics constant, trailing '_'"); + } return bitPlaneUpper << 8 | bitPlaneLower; } @@ -1729,15 +1795,15 @@ static Token yylex_NORMAL() { case 'x': case 'X': shiftChar(); - return Token(T_(NUMBER), readHexNumber()); + return Token(T_(NUMBER), readHexNumber("\"0x\"")); case 'o': case 'O': shiftChar(); - return Token(T_(NUMBER), readOctalNumber()); + return Token(T_(NUMBER), readOctalNumber("\"0o\"")); case 'b': case 'B': shiftChar(); - return Token(T_(NUMBER), readBinaryNumber()); + return Token(T_(NUMBER), readBinaryNumber("\"0b\"")); } [[fallthrough]]; @@ -1763,20 +1829,21 @@ static Token yylex_NORMAL() { case '&': // Either &=, binary AND, logical AND, or an octal constant c = peek(); - if (isOctDigit(c)) { - return Token(T_(NUMBER), readOctalNumber()); + if (isOctDigit(c) || c == '_') { + return Token(T_(NUMBER), readOctalNumber("'&'")); } return oneOrTwo('=', T_(POP_ANDEQ), '&', T_(OP_LOGICAND), T_(OP_AND)); case '%': // Either %=, MOD, or a binary constant c = peek(); - if (c == '0' || c == '1' || c == options.binDigits[0] || c == options.binDigits[1]) { - return Token(T_(NUMBER), readBinaryNumber()); + if (c == '0' || c == '1' || c == options.binDigits[0] || c == options.binDigits[1] + || c == '_') { + return Token(T_(NUMBER), readBinaryNumber("'%'")); } return oneOrTwo('=', T_(POP_MODEQ), T_(OP_MOD)); case '$': // Hex constant - return Token(T_(NUMBER), readHexNumber()); + return Token(T_(NUMBER), readHexNumber("'$'")); case '`': // Gfx constant return Token(T_(NUMBER), readGfxConstant()); diff --git a/test/asm/bracketed-macro-args.asm b/test/asm/bracketed-macro-args.asm index 95ccc6dc..3d44dfc3 100644 --- a/test/asm/bracketed-macro-args.asm +++ b/test/asm/bracketed-macro-args.asm @@ -9,11 +9,11 @@ ENDM printargs A, B, C, D MACRO mac - println \<2__> + \<1_2> + \<\1> + println \<2> + \<1_2> + \<\1> def x = 2 println \<{d:x}> + \<1_{d:x}> + \<\<\<13>>> def y equs "NARG" - println \ + \<1_{d:x}_> + \<\<\<_{y}>>> + println \ + \<1_{d:x}> + \<\<\<_{y}>>> ENDM mac 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 1 diff --git a/test/asm/invalid-numbers.err b/test/asm/invalid-numbers.err index 69d3520a..1fb75cc6 100644 --- a/test/asm/invalid-numbers.err +++ b/test/asm/invalid-numbers.err @@ -2,11 +2,11 @@ error: Invalid integer constant, no digits after '$' at invalid-numbers.asm::try(2) <- invalid-numbers.asm(11) error: Invalid graphics constant, no digits after '`' at invalid-numbers.asm::try(2) <- invalid-numbers.asm(12) -error: Invalid integer constant, no digits after '%' +error: Invalid integer constant, no digits after "0b" at invalid-numbers.asm::try(2) <- invalid-numbers.asm(13) -error: Invalid integer constant, no digits after '&' +error: Invalid integer constant, no digits after "0o" at invalid-numbers.asm::try(2) <- invalid-numbers.asm(14) -error: Invalid integer constant, no digits after '$' +error: Invalid integer constant, no digits after "0x" at invalid-numbers.asm::try(2) <- invalid-numbers.asm(15) warning: Integer constant is too large [-Wlarge-constant] at invalid-numbers.asm::try(2) <- invalid-numbers.asm(18) diff --git a/test/asm/invalid-underscore.asm b/test/asm/invalid-underscore.asm new file mode 100644 index 00000000..b06a830b --- /dev/null +++ b/test/asm/invalid-underscore.asm @@ -0,0 +1,31 @@ +; good +println 123_456 +println %_1010_1010 +println 0b_1010_1010 +println &_555_555 +println 0o_777_777 +println $_dead_beef +println 0x_cafe_babe +println `_0101_2323 +println 12_34.56_78 +println 12_34.56_q8 + +; bad (multiple '_') +println 123__456 +println %1010__1010 +println &123__456 +println $abc__def +println `0101__2323 +println 3.14__15 +println 2.718__Q16 + +; bad (trailing '_') +println 12345_ +println 0b101010_ +println 0o123456_ +println 0xabcdef_ +println `01230123_ + +; bad ('_' next to '.') +println 1_.618 +println 2._718 diff --git a/test/asm/invalid-underscore.err b/test/asm/invalid-underscore.err new file mode 100644 index 00000000..561e13d7 --- /dev/null +++ b/test/asm/invalid-underscore.err @@ -0,0 +1,29 @@ +error: Invalid integer constant, '_' after another '_' + at invalid-underscore.asm(14) +error: Invalid integer constant, '_' after another '_' + at invalid-underscore.asm(15) +error: Invalid integer constant, '_' after another '_' + at invalid-underscore.asm(16) +error: Invalid integer constant, '_' after another '_' + at invalid-underscore.asm(17) +error: Invalid integer constant, '_' after another '_' + at invalid-underscore.asm(18) +error: Invalid integer constant, '_' after another '_' + at invalid-underscore.asm(19) +error: Invalid integer constant, '_' after another '_' + at invalid-underscore.asm(20) +error: Invalid integer constant, trailing '_' + at invalid-underscore.asm(23) +error: Invalid integer constant, trailing '_' + at invalid-underscore.asm(24) +error: Invalid integer constant, trailing '_' + at invalid-underscore.asm(25) +error: Invalid integer constant, trailing '_' + at invalid-underscore.asm(26) +error: Invalid graphics constant, trailing '_' + at invalid-underscore.asm(27) +error: Invalid integer constant, trailing '_' + at invalid-underscore.asm(30) +error: Invalid integer constant, '_' after another '_' + at invalid-underscore.asm(31) +Assembly aborted with 14 errors! diff --git a/test/asm/invalid-underscore.out b/test/asm/invalid-underscore.out new file mode 100644 index 00000000..e7942632 --- /dev/null +++ b/test/asm/invalid-underscore.out @@ -0,0 +1,24 @@ +$1E240 +$AA +$AA +$2DB6D +$3FFFF +$DEADBEEF +$CAFEBABE +$F55 +$4D2915B +$4D28F +$1E240 +$AA +$A72E +$ABCDEF +$F55 +$32439 +$2B7CF +$3039 +$2A +$A72E +$ABCDEF +$3355 +$19E35 +$2B7CF diff --git a/test/asm/underscore-in-numeric-literal.asm b/test/asm/underscore-in-numeric-literal.asm index a27c4b13..a88439eb 100644 --- a/test/asm/underscore-in-numeric-literal.asm +++ b/test/asm/underscore-in-numeric-literal.asm @@ -14,15 +14,15 @@ _1234:: ; with underscores dw _1234 ; label - db 123, 1_23, 1__23 ; decimal + db 123, 1_23, 12_3 ; decimal dw 12_345 ; decimal dw $ab_cd ; hex - db &2_0_0_ ; octal + db &2_0_0 ; octal db %1111_0000, %1_0 ; binary - dl 6_._283_185 ; fixed point - dw `0123_3210, `00_33_22_11_ ; gfx + dl 6.283_185 ; fixed point + dw `0123_3210, `_00_33_22_11 ; gfx ; underscores with custom digits opt g.ABC, b.X - db %.X.X_..XX_ - dw `.A.B_.C.._ + db %.X.X_..XX + dw `.A.B_.C..