From 11f6278d95798196fa0099fb4c119d3205e2dbb8 Mon Sep 17 00:00:00 2001 From: Rangi <35663410+Rangi42@users.noreply.github.com> Date: Mon, 6 Apr 2026 21:45:34 -0400 Subject: [PATCH] Refactor lexing of fixed-point numbers (#1915) This incidentally fixes a bug with too-long fixed-point literals that have precision suffixes. --- src/asm/lexer.cpp | 181 ++++++++++++++++---------------- test/asm/fixed-point-syntax.asm | 8 +- test/asm/fixed-point-syntax.err | 18 +++- test/asm/fixed-point-syntax.out | 4 + test/asm/invalid-numbers.err | 2 +- 5 files changed, 116 insertions(+), 97 deletions(-) diff --git a/src/asm/lexer.cpp b/src/asm/lexer.cpp index 145adbc4..2f569647 100644 --- a/src/asm/lexer.cpp +++ b/src/asm/lexer.cpp @@ -935,87 +935,92 @@ static std::string readAnonLabelRef(char c) { return sym_MakeAnonLabelName(n, c == '-'); } -static void checkDigitSeparator(bool nonDigit) { - if (nonDigit) { - error("Invalid integer constant, '_' after another '_'"); +static void checkDigitSeparator(bool prevWasSeparator, char const *name) { + if (prevWasSeparator) { + error("Invalid %s constant, '_' after another '_'", name); } } -static void checkIntegerConstantSuffix(bool empty, bool nonDigit, char const *prefix) { +static void + checkDigitsEnding(bool empty, char const *prefix, bool prevWasSeparator, char const *name) { if (empty) { - error("Invalid integer constant, no digits after %s", prefix); + error("Invalid %s constant, no digits after %s", name, prefix); } - if (nonDigit) { - error("Invalid integer constant, trailing '_'"); + if (prevWasSeparator) { + error("Invalid %s constant, trailing '_'", name); } } -static uint32_t readFractionalPart(uint32_t integer) { - uint32_t value = 0, divisor = 1; - uint8_t precision = 0; - enum { - READFRACTIONALPART_DIGITS, - READFRACTIONALPART_PRECISION, - READFRACTIONALPART_PRECISION_DIGITS, - } state = READFRACTIONALPART_DIGITS; - bool anyDigit = false; - bool nonDigit = false; +static bool readFractionDigits(uint32_t ÷nd, uint32_t &divisor) { + bool prevWasSeparator = false; - for (int c = peek();; c = nextChar()) { - if (state == READFRACTIONALPART_DIGITS) { - if (c == '_') { - if (nonDigit) { - error("Invalid fixed-point constant, '_' after another '_'"); - } else if (!anyDigit) { - error("Invalid fixed-point constant, '_' after '.'"); - } - nonDigit = true; - continue; - } + int c = peek(); + if (c == '_') { + error("Invalid fixed-point constant, '_' after '.'"); + } - if (c == 'q' || c == 'Q') { - state = READFRACTIONALPART_PRECISION; - nonDigit = false; // '_' is allowed before 'q'/'Q' - continue; - } else if (!isDigit(c)) { - break; - } + for (;; c = nextChar()) { + if (c == '_') { + checkDigitSeparator(prevWasSeparator, "fixed-point"); + prevWasSeparator = true; + } else if (isDigit(c)) { + prevWasSeparator = false; c -= '0'; - anyDigit = true; - nonDigit = false; - if (divisor > (UINT32_MAX - c) / 10) { warning(WARNING_LARGE_CONSTANT, "Precision of fixed-point constant is too large"); // Discard any additional digits - skipChars([](int d) { return isDigit(d) || d == '_'; }); - break; + for (int d = peek(); isDigit(d) || d == '_'; c = d, d = nextChar()) {} + return c == '_'; } - value = value * 10 + c; + dividend = dividend * 10 + c; divisor *= 10; } else { - if (c == '.' && state == READFRACTIONALPART_PRECISION) { - state = READFRACTIONALPART_PRECISION_DIGITS; - continue; - } else if (!isDigit(c)) { - break; - } - - precision = precision * 10 + (c - '0'); + break; } } - if (precision == 0) { - if (state >= READFRACTIONALPART_PRECISION) { - error("Invalid fixed-point constant, no significant digits after 'q'"); - } - precision = options.fixPrecision; - } else if (precision > 31) { + return prevWasSeparator; +} + +static uint8_t readPrecisionSuffix() { + if (peek() == '.') { + shiftChar(); + } + + uint8_t precision = 0; + bool empty = true; + + // '_' is not allowed after 'q'/'Q' + for (int c = peek(); isDigit(c); c = nextChar()) { + empty = false; + precision = precision * 10 + (c - '0'); + } + + if (empty) { + error("Invalid fixed-point constant, no digits after 'q'"); + return options.fixPrecision; + } + + return precision; +} + +static uint32_t finishReadingFixedPoint(uint32_t integer) { + uint32_t dividend = 0, divisor = 1; + uint8_t precision = options.fixPrecision; + + bool prevWasSeparator = readFractionDigits(dividend, divisor); + if (int c = peek(); c == 'q' || c == 'Q') { + // '_' is allowed before 'q'/'Q', so do not call `checkDigitsEnding` + shiftChar(); + precision = readPrecisionSuffix(); + } else { + checkDigitsEnding(false, nullptr, prevWasSeparator, "fixed-point"); + } + + if (precision < 1 || precision > 31) { error("Fixed-point constant precision must be between 1 and 31"); precision = options.fixPrecision; } - if (nonDigit) { - error("Invalid fixed-point constant, trailing '_'"); - } if (integer >= (1ULL << (32 - precision))) { warning(WARNING_LARGE_CONSTANT, "Magnitude of fixed-point constant is too large"); @@ -1024,7 +1029,7 @@ static uint32_t readFractionalPart(uint32_t integer) { // Cast to unsigned avoids undefined overflow behavior uint32_t fractional = - static_cast(round(static_cast(value) / divisor * pow(2.0, precision))); + static_cast(round(static_cast(dividend) / divisor * (1ULL << precision))); return (integer << precision) | fractional; } @@ -1077,12 +1082,12 @@ void lexer_SetGfxDigits(char const digits[4]) { static uint32_t readBinaryNumber(char const *prefix) { uint32_t value = 0; bool empty = true; - bool nonDigit = false; + bool prevWasSeparator = false; for (int c = peek();; c = nextChar()) { if (c == '_') { - checkDigitSeparator(nonDigit); - nonDigit = true; + checkDigitSeparator(prevWasSeparator, "integer"); + prevWasSeparator = true; continue; } @@ -1095,7 +1100,7 @@ static uint32_t readBinaryNumber(char const *prefix) { break; } empty = false; - nonDigit = false; + prevWasSeparator = false; if (value > (UINT32_MAX - bit) / 2) { warning(WARNING_LARGE_CONSTANT, "Integer constant is too large"); @@ -1106,19 +1111,19 @@ static uint32_t readBinaryNumber(char const *prefix) { value = value * 2 + bit; } - checkIntegerConstantSuffix(empty, nonDigit, prefix); + checkDigitsEnding(empty, prefix, prevWasSeparator, "integer"); return value; } static uint32_t readOctalNumber(char const *prefix) { uint32_t value = 0; bool empty = true; - bool nonDigit = false; + bool prevWasSeparator = false; for (int c = peek();; c = nextChar()) { if (c == '_') { - checkDigitSeparator(nonDigit); - nonDigit = true; + checkDigitSeparator(prevWasSeparator, "integer"); + prevWasSeparator = true; continue; } @@ -1127,7 +1132,7 @@ static uint32_t readOctalNumber(char const *prefix) { } c -= '0'; empty = false; - nonDigit = false; + prevWasSeparator = false; if (value > (UINT32_MAX - c) / 8) { warning(WARNING_LARGE_CONSTANT, "Integer constant is too large"); @@ -1138,19 +1143,19 @@ static uint32_t readOctalNumber(char const *prefix) { value = value * 8 + c; } - checkIntegerConstantSuffix(empty, nonDigit, prefix); + checkDigitsEnding(empty, prefix, prevWasSeparator, "integer"); return value; } static uint32_t readDecimalNumber(int initial) { assume(isDigit(initial)); uint32_t value = initial - '0'; - bool nonDigit = false; + bool prevWasSeparator = false; for (int c = peek();; c = nextChar()) { if (c == '_') { - checkDigitSeparator(nonDigit); - nonDigit = true; + checkDigitSeparator(prevWasSeparator, "integer"); + prevWasSeparator = true; continue; } @@ -1158,7 +1163,7 @@ static uint32_t readDecimalNumber(int initial) { break; } c -= '0'; - nonDigit = false; + prevWasSeparator = false; if (value > (UINT32_MAX - c) / 10) { warning(WARNING_LARGE_CONSTANT, "Integer constant is too large"); @@ -1169,19 +1174,19 @@ static uint32_t readDecimalNumber(int initial) { value = value * 10 + c; } - checkIntegerConstantSuffix(false, nonDigit, nullptr); + checkDigitsEnding(false, nullptr, prevWasSeparator, "integer"); return value; } static uint32_t readHexNumber(char const *prefix) { uint32_t value = 0; bool empty = true; - bool nonDigit = false; + bool prevWasSeparator = false; for (int c = peek();; c = nextChar()) { if (c == '_') { - checkDigitSeparator(nonDigit); - nonDigit = true; + checkDigitSeparator(prevWasSeparator, "integer"); + prevWasSeparator = true; continue; } @@ -1190,7 +1195,7 @@ static uint32_t readHexNumber(char const *prefix) { } c = parseHexDigit(c); empty = false; - nonDigit = false; + prevWasSeparator = false; if (value > (UINT32_MAX - c) / 16) { warning(WARNING_LARGE_CONSTANT, "Integer constant is too large"); @@ -1201,19 +1206,19 @@ static uint32_t readHexNumber(char const *prefix) { value = value * 16 + c; } - checkIntegerConstantSuffix(empty, nonDigit, prefix); + checkDigitsEnding(empty, prefix, prevWasSeparator, "integer"); return value; } static uint32_t readGfxConstant() { uint32_t bitPlaneLower = 0, bitPlaneUpper = 0; uint8_t width = 0; - bool nonDigit = false; + bool prevWasSeparator = false; for (int c = peek();; c = nextChar()) { if (c == '_') { - checkDigitSeparator(nonDigit); - nonDigit = true; + checkDigitSeparator(prevWasSeparator, "integer"); + prevWasSeparator = true; continue; } @@ -1229,7 +1234,7 @@ static uint32_t readGfxConstant() { } else { break; } - nonDigit = false; + prevWasSeparator = false; if (width < 8) { bitPlaneLower = bitPlaneLower << 1 | (pixel & 1); @@ -1240,16 +1245,12 @@ static uint32_t readGfxConstant() { } } - if (width == 0) { - error("Invalid graphics constant, no digits after '`'"); - } else if (width == 9) { + checkDigitsEnding(width == 0, "'`'", prevWasSeparator, "graphics"); + if (width == 9) { warning( WARNING_LARGE_CONSTANT, "Graphics constant is too large; only first 8 pixels considered" ); } - if (nonDigit) { - error("Invalid graphics constant, trailing '_'"); - } return bitPlaneUpper << 8 | bitPlaneLower; } @@ -1833,7 +1834,7 @@ static Token yylex_NORMAL() { if (peek() == '.') { shiftChar(); - n = readFractionalPart(n); + n = finishReadingFixedPoint(n); } return Token(T_(NUMBER), n); } diff --git a/test/asm/fixed-point-syntax.asm b/test/asm/fixed-point-syntax.asm index e0eeb172..6a0d7e4a 100644 --- a/test/asm/fixed-point-syntax.asm +++ b/test/asm/fixed-point-syntax.asm @@ -8,8 +8,12 @@ println 1.q2 ; bad println 12.34q0 -println 12.34q_15 -println 12.34q1_5 +println 12.34q_15 ; lexes as `12.34q` (invalid) then symbol `_15` +println 12.34q1_5 ; lexes as `12.34q1` (valid) then symbol `_5` println 1_.2 println 1._2 +println 1.__2 println 1.2q +println 1.999_999_999_999_999 +println 1.999_999_999_999_999q16 +println 1.999_999_999_999_999q.16 diff --git a/test/asm/fixed-point-syntax.err b/test/asm/fixed-point-syntax.err index 388d7f50..4c0e0f0b 100644 --- a/test/asm/fixed-point-syntax.err +++ b/test/asm/fixed-point-syntax.err @@ -1,6 +1,6 @@ -error: Invalid fixed-point constant, no significant digits after 'q' +error: Fixed-point constant precision must be between 1 and 31 at fixed-point-syntax.asm(10) -error: Invalid fixed-point constant, no significant digits after 'q' +error: Invalid fixed-point constant, no digits after 'q' at fixed-point-syntax.asm(11) error: syntax error, unexpected symbol at fixed-point-syntax.asm(11) @@ -10,6 +10,16 @@ error: Invalid integer constant, trailing '_' at fixed-point-syntax.asm(13) error: Invalid fixed-point constant, '_' after '.' at fixed-point-syntax.asm(14) -error: Invalid fixed-point constant, no significant digits after 'q' +error: Invalid fixed-point constant, '_' after '.' at fixed-point-syntax.asm(15) -Assembly aborted with 7 errors +error: Invalid fixed-point constant, '_' after another '_' + at fixed-point-syntax.asm(15) +error: Invalid fixed-point constant, no digits after 'q' + at fixed-point-syntax.asm(16) +warning: Precision of fixed-point constant is too large [-Wlarge-constant] + at fixed-point-syntax.asm(17) +warning: Precision of fixed-point constant is too large [-Wlarge-constant] + at fixed-point-syntax.asm(18) +warning: Precision of fixed-point constant is too large [-Wlarge-constant] + at fixed-point-syntax.asm(19) +Assembly aborted with 9 errors diff --git a/test/asm/fixed-point-syntax.out b/test/asm/fixed-point-syntax.out index cde3a113..cac04af4 100644 --- a/test/asm/fixed-point-syntax.out +++ b/test/asm/fixed-point-syntax.out @@ -8,3 +8,7 @@ $C570A $13333 $13333 $13333 +$13333 +$10000 +$10000 +$10000 diff --git a/test/asm/invalid-numbers.err b/test/asm/invalid-numbers.err index 853cf0ea..2ee19770 100644 --- a/test/asm/invalid-numbers.err +++ b/test/asm/invalid-numbers.err @@ -20,7 +20,7 @@ warning: Graphics constant is too large; only first 8 pixels considered [-Wlarge at invalid-numbers.asm::try(2) <- invalid-numbers.asm(22) warning: Magnitude of fixed-point constant is too large [-Wlarge-constant] at invalid-numbers.asm::try(2) <- invalid-numbers.asm(23) -error: Invalid fixed-point constant, no significant digits after 'q' +error: Invalid fixed-point constant, no digits after 'q' at invalid-numbers.asm::try(2) <- invalid-numbers.asm(26) error: Fixed-point constant precision must be between 1 and 31 at invalid-numbers.asm::try(2) <- invalid-numbers.asm(29)