Use templates to reduce the redundant number-lexing functions (#1963)

This commit is contained in:
Rangi
2026-05-21 17:13:09 -04:00
committed by GitHub
parent 1871b0a0b8
commit 48fcd9a0ca
8 changed files with 121 additions and 216 deletions
+29 -5
View File
@@ -29,10 +29,6 @@ bool isPrintable(int c);
bool isUpper(int c);
bool isLower(int c);
bool isLetter(int c);
bool isDigit(int c);
bool isBinDigit(int c);
bool isOctDigit(int c);
bool isHexDigit(int c);
bool isAlphanumeric(int c);
// Locale-independent character transform functions
@@ -42,7 +38,35 @@ char toUpper(char c);
bool startsIdentifier(int c);
bool continuesIdentifier(int c);
uint8_t parseHexDigit(int c);
template<uint32_t Base>
bool isDigit(int c) {
static_assert(Base <= 36, "Base must be 36 or less to allow digits 0-9A-Z");
if constexpr (Base <= 10) {
return c >= '0' && c < static_cast<int>('0' + Base);
} else {
return isDigit<10>(c) || (c >= 'A' && c < static_cast<int>('A' + Base - 10))
|| (c >= 'a' && c < static_cast<int>('a' + Base - 10));
}
}
template<uint32_t Base>
uint8_t parseDigit(int c) {
static_assert(Base <= 36, "Base must be 36 or less to allow digits 0-9A-Z");
assume(isDigit<Base>(c));
if constexpr (Base <= 10) {
return c - '0';
} else {
// Check digit ranges from greatest to least ('a'-'z', then 'A'-'Z', then '0'-'9')
if (c >= 'a') {
return c - 'a' + 10;
} else if (c >= 'A') {
return c - 'A' + 10;
} else {
return parseDigit<10>(c);
}
}
}
std::optional<uint64_t> parseNumber(char const *&str, NumberBase base = BASE_AUTO);
std::optional<uint64_t> parseWholeNumber(char const *str, NumberBase base = BASE_AUTO);
+1 -1
View File
@@ -47,7 +47,7 @@ size_t FormatSpec::parseSpec(char const *spec) {
padZero = true;
}
// <width>
if (isDigit(spec[i])) {
if (isDigit<10>(spec[i])) {
width = parseSpecNumber();
}
// <frac>
+53 -120
View File
@@ -526,7 +526,8 @@ static int peek();
static void shiftChar();
static int bumpChar();
static int nextChar();
static uint32_t readDecimalNumber(int initial);
template<uint32_t Base>
static uint32_t readNumber(int initial, char const *prefix);
static uint32_t readBracketedMacroArgNum() {
bool enableExpansions = lexerState->enableExpansions;
@@ -543,8 +544,8 @@ static uint32_t readBracketedMacroArgNum() {
c = nextChar();
}
if (isDigit(c)) {
uint32_t n = readDecimalNumber(bumpChar());
if (isDigit<10>(c)) {
uint32_t n = readNumber<10>(bumpChar(), nullptr);
if (n > INT32_MAX) {
error("Number in bracketed macro argument is too large");
return 0;
@@ -968,7 +969,7 @@ static std::tuple<uint32_t, uint32_t, bool> readFractionDigits() {
if (c == '_') {
checkDigitSeparator(prevWasSeparator, "fixed-point");
prevWasSeparator = true;
} else if (isDigit(c)) {
} else if (isDigit<10>(c)) {
prevWasSeparator = false;
int digit = c - '0';
if (dividend > (UINT32_MAX - digit) / 10 || divisor > UINT32_MAX / 10) {
@@ -976,7 +977,7 @@ static std::tuple<uint32_t, uint32_t, bool> readFractionDigits() {
WARNING_LARGE_CONSTANT, "Fixed-point constant has too many fractional digits"
);
// Discard any additional digits
for (int d = peek(); isDigit(d) || d == '_'; c = d, d = nextChar()) {}
for (int d = peek(); isDigit<10>(d) || d == '_'; c = d, d = nextChar()) {}
return {dividend, divisor, c == '_'};
}
dividend = dividend * 10 + digit;
@@ -998,12 +999,12 @@ static uint8_t readPrecisionSuffix() {
bool empty = true;
// '_' is not allowed after 'q'/'Q'
for (int c = peek(); isDigit(c); c = nextChar()) {
for (int c = peek(); isDigit<10>(c); c = nextChar()) {
empty = false;
int digit = c - '0';
if (precision > (UINT8_MAX - digit) / 10) {
// Discard any additional digits
skipChars(isDigit);
skipChars(isDigit<10>);
// Return an invalid precision to cause a subsequent error, which is checked afterwards
// to cover the default `options.fixPrecision` as well, just in case
return UINT8_MAX;
@@ -1051,8 +1052,13 @@ static bool isValidDigit(char c) {
return isAlphanumeric(c) || c == '.' || c == '#' || c == '@';
}
static bool isCustomBinDigit(int c) {
return isBinDigit(c) || c == options.binDigits[0] || c == options.binDigits[1];
static bool isAsmBinDigit(int c) {
return isDigit<2>(c) || c == options.binDigits[0] || c == options.binDigits[1];
}
static uint8_t parseAsmBinDigit(int c) {
assume(isAsmBinDigit(c));
return c == '1' || c == options.binDigits[1]; // Returns 0 or 1
}
static bool checkDigitErrors(char const *digits, size_t n, char const *type) {
@@ -1092,45 +1098,35 @@ void lexer_SetGfxDigits(char const digits[4]) {
}
}
static uint32_t readBinaryNumber(char const *prefix) {
uint32_t number = 0;
bool empty = true;
bool prevWasSeparator = false;
for (int c = peek();; c = nextChar()) {
if (c == '_') {
checkDigitSeparator(prevWasSeparator, "integer");
prevWasSeparator = true;
continue;
}
int bit;
if (c == '0' || c == options.binDigits[0]) {
bit = 0;
} else if (c == '1' || c == options.binDigits[1]) {
bit = 1;
template<uint32_t Base>
static uint32_t readNumber(int initial, char const *prefix) {
auto isSomeDigit = [](int c) {
if constexpr (Base == 2) {
return isAsmBinDigit(c);
} else {
break;
return isDigit<Base>(c);
}
empty = false;
prevWasSeparator = false;
};
auto parseSomeDigit = [](int c) {
if constexpr (Base == 2) {
return parseAsmBinDigit(c);
} else {
return parseDigit<Base>(c);
}
};
if (number > (UINT32_MAX - bit) / 2) {
warning(WARNING_LARGE_CONSTANT, "Integer constant is too large");
// Discard any additional digits
skipChars([](int d) { return isCustomBinDigit(d) || d == '_'; });
return 0;
}
number = number * 2 + bit;
uint32_t number;
bool empty;
if constexpr (Base == 10) {
assume(prefix == nullptr);
number = parseSomeDigit(initial);
empty = false;
} else {
assume(initial == 0 && prefix != nullptr);
number = 0;
empty = true;
}
checkDigitsEnding(empty, prefix, prevWasSeparator, "integer");
return number;
}
static uint32_t readOctalNumber(char const *prefix) {
uint32_t number = 0;
bool empty = true;
bool prevWasSeparator = false;
for (int c = peek();; c = nextChar()) {
@@ -1140,83 +1136,20 @@ static uint32_t readOctalNumber(char const *prefix) {
continue;
}
if (!isOctDigit(c)) {
if (!isSomeDigit(c)) {
break;
}
int digit = c - '0';
int digit = parseSomeDigit(c);
empty = false;
prevWasSeparator = false;
if (number > (UINT32_MAX - digit) / 8) {
if (number > (UINT32_MAX - digit) / Base) {
warning(WARNING_LARGE_CONSTANT, "Integer constant is too large");
// Discard any additional digits
skipChars([](int d) { return isOctDigit(d) || d == '_'; });
skipChars([&isSomeDigit](int d) { return isSomeDigit(d) || d == '_'; });
return 0;
}
number = number * 8 + digit;
}
checkDigitsEnding(empty, prefix, prevWasSeparator, "integer");
return number;
}
static uint32_t readDecimalNumber(int initial) {
assume(isDigit(initial));
uint32_t number = initial - '0';
bool prevWasSeparator = false;
for (int c = peek();; c = nextChar()) {
if (c == '_') {
checkDigitSeparator(prevWasSeparator, "integer");
prevWasSeparator = true;
continue;
}
if (!isDigit(c)) {
break;
}
int digit = c - '0';
prevWasSeparator = false;
if (number > (UINT32_MAX - digit) / 10) {
warning(WARNING_LARGE_CONSTANT, "Integer constant is too large");
// Discard any additional digits
skipChars([](int d) { return isDigit(d) || d == '_'; });
return 0;
}
number = number * 10 + digit;
}
checkDigitsEnding(false, nullptr, prevWasSeparator, "integer");
return number;
}
static uint32_t readHexNumber(char const *prefix) {
uint32_t number = 0;
bool empty = true;
bool prevWasSeparator = false;
for (int c = peek();; c = nextChar()) {
if (c == '_') {
checkDigitSeparator(prevWasSeparator, "integer");
prevWasSeparator = true;
continue;
}
if (!isHexDigit(c)) {
break;
}
int digit = parseHexDigit(c);
empty = false;
prevWasSeparator = false;
if (number > (UINT32_MAX - digit) / 16) {
warning(WARNING_LARGE_CONSTANT, "Integer constant is too large");
// Discard any additional digits
skipChars([](int d) { return isHexDigit(d) || d == '_'; });
return 0;
}
number = number * 16 + digit;
number = number * Base + digit;
}
checkDigitsEnding(empty, prefix, prevWasSeparator, "integer");
@@ -1830,15 +1763,15 @@ static Token yylex_NORMAL() {
case 'x':
case 'X':
shiftChar();
return Token(T_(NUMBER), readHexNumber("\"0x\""));
return Token(T_(NUMBER), readNumber<16>(0, "\"0x\""));
case 'o':
case 'O':
shiftChar();
return Token(T_(NUMBER), readOctalNumber("\"0o\""));
return Token(T_(NUMBER), readNumber<8>(0, "\"0o\""));
case 'b':
case 'B':
shiftChar();
return Token(T_(NUMBER), readBinaryNumber("\"0b\""));
return Token(T_(NUMBER), readNumber<2>(0, "\"0b\""));
}
[[fallthrough]];
@@ -1853,7 +1786,7 @@ static Token yylex_NORMAL() {
case '7':
case '8':
case '9': {
uint32_t n = readDecimalNumber(c);
uint32_t n = readNumber<10>(c, nullptr);
if (peek() == '.') {
shiftChar();
@@ -1864,20 +1797,20 @@ static Token yylex_NORMAL() {
case '&': // Either &=, binary AND, logical AND, or an octal constant
c = peek();
if (isOctDigit(c) || c == '_') {
return Token(T_(NUMBER), readOctalNumber("'&'"));
if (isDigit<8>(c) || c == '_') {
return Token(T_(NUMBER), readNumber<8>(0, "'&'"));
}
return oneOrTwo('=', T_(POP_ANDEQ), '&', T_(OP_LOGICAND), T_(OP_AND));
case '%': // Either %=, MOD, or a binary constant
c = peek();
if (isCustomBinDigit(c) || c == '_') {
return Token(T_(NUMBER), readBinaryNumber("'%'"));
if (isAsmBinDigit(c) || c == '_') {
return Token(T_(NUMBER), readNumber<2>(0, "'%'"));
}
return oneOrTwo('=', T_(POP_MODEQ), T_(OP_MOD));
case '$': // Hex constant
return Token(T_(NUMBER), readHexNumber("'$'"));
return Token(T_(NUMBER), readNumber<16>(0, "'$'"));
case '`': // Gfx constant
return Token(T_(NUMBER), readGfxConstant());
+1 -1
View File
@@ -11,7 +11,7 @@
#include "helpers.hpp"
#include "style.hpp"
#include "util.hpp" // isDigit
#include "util.hpp" // parseNumber
void warnx(char const *fmt, ...) {
va_list ap;
+1 -1
View File
@@ -128,7 +128,7 @@ MbcType mbc_ParseName(char const *name, uint8_t &tpp1Major, uint8_t &tpp1Minor)
}
// Parse numeric MBC and return it as-is (unless it's too large)
if (char c = *ptr; isDigit(c) || c == '$' || c == '&' || c == '%') {
if (char c = *ptr; isDigit<10>(c) || c == '$' || c == '&' || c == '%') {
if (std::optional<uint64_t> mbc = parseWholeNumber(ptr); !mbc) {
fatalUnknownMBC(name);
} else if (*mbc > 0xFF) {
+2 -2
View File
@@ -21,7 +21,7 @@
#include "diagnostics.hpp"
#include "helpers.hpp"
#include "platform.hpp"
#include "util.hpp" // UpperMap, isDigit
#include "util.hpp" // UpperMap, parseDigit
#include "gfx/main.hpp"
#include "gfx/png.hpp"
@@ -37,7 +37,7 @@ static void skipBlankSpace(std::string_view const &str, size_t &pos) {
}
static uint8_t toHex(char c1, char c2) {
return parseHexDigit(c1) * 16 + parseHexDigit(c2);
return parseDigit<16>(c1) * 16 + parseDigit<16>(c2);
}
static uint8_t singleToHex(char c) {
+23 -48
View File
@@ -94,66 +94,41 @@ static std::string readKeyword(int initial) {
return keyword;
}
static yy::parser::symbol_type parseDecNumber(int initial) {
template<uint32_t Base>
static yy::parser::symbol_type readNumber(int initial, char const *prefix, char const *name) {
LexerStackEntry &context = lexerStack.back();
uint32_t number = initial - '0';
for (int c = context.file.sgetc(); isDigit(c) || c == '_'; c = context.file.snextc()) {
uint32_t number;
if constexpr (Base == 10) {
assume(prefix == nullptr && name == nullptr);
number = parseDigit<Base>(initial);
} else {
assume(initial == 0 && prefix != nullptr && name != nullptr);
int c = context.file.sgetc();
if (!isDigit<Base>(c)) {
scriptError("No %s digits found after %s", name, prefix);
return yy::parser::make_number(0);
}
number = parseDigit<Base>(c);
context.file.sbumpc();
}
for (int c = context.file.sgetc(); isDigit<Base>(c) || c == '_'; c = context.file.snextc()) {
if (c != '_') {
number = number * 10 + (c - '0');
number = number * Base + parseDigit<Base>(c);
}
}
return yy::parser::make_number(number);
}
static yy::parser::symbol_type parseBinNumber(char const *prefix) {
LexerStackEntry &context = lexerStack.back();
int c = context.file.sgetc();
if (!isBinDigit(c)) {
scriptError("No binary digits found after %s", prefix);
return yy::parser::make_number(0);
}
uint32_t number = c - '0';
for (c = context.file.snextc(); isBinDigit(c) || c == '_'; c = context.file.snextc()) {
if (c != '_') {
number = number * 2 + (c - '0');
}
}
return yy::parser::make_number(number);
return readNumber<2>(0, prefix, "binary");
}
static yy::parser::symbol_type parseOctNumber(char const *prefix) {
LexerStackEntry &context = lexerStack.back();
int c = context.file.sgetc();
if (!isOctDigit(c)) {
scriptError("No octal digits found after %s", prefix);
return yy::parser::make_number(0);
}
uint32_t number = c - '0';
for (c = context.file.snextc(); isOctDigit(c) || c == '_'; c = context.file.snextc()) {
if (c != '_') {
number = number * 8 + (c - '0');
}
}
return yy::parser::make_number(number);
return readNumber<8>(0, prefix, "octal");
}
static yy::parser::symbol_type parseHexNumber(char const *prefix) {
LexerStackEntry &context = lexerStack.back();
int c = context.file.sgetc();
if (!isHexDigit(c)) {
scriptError("No hexadecimal digits found after %s", prefix);
return yy::parser::make_number(0);
}
uint32_t number = parseHexDigit(c);
for (c = context.file.snextc(); isHexDigit(c) || c == '_'; c = context.file.snextc()) {
if (c != '_') {
number = number * 16 + parseHexDigit(c);
}
}
return yy::parser::make_number(number);
return readNumber<16>(0, prefix, "hexadecimal");
}
static yy::parser::symbol_type parseAnyNumber(int initial) {
@@ -174,7 +149,7 @@ static yy::parser::symbol_type parseAnyNumber(int initial) {
return parseBinNumber("\"0b\"");
}
}
return parseDecNumber(initial);
return readNumber<10>(initial, nullptr, nullptr);
}
static yy::parser::symbol_type parseString() {
@@ -245,7 +220,7 @@ yy::parser::symbol_type yylex() {
return parseBinNumber("'%'");
} else if (c == '&') {
return parseOctNumber("'&'");
} else if (isDigit(c)) {
} else if (isDigit<10>(c)) {
return parseAnyNumber(c);
} else if (isLetter(c)) {
std::string keyword = readKeyword(c);
+11 -38
View File
@@ -38,24 +38,8 @@ bool isLetter(int c) {
return isUpper(c) || isLower(c);
}
bool isDigit(int c) {
return c >= '0' && c <= '9';
}
bool isBinDigit(int c) {
return c == '0' || c == '1';
}
bool isOctDigit(int c) {
return c >= '0' && c <= '7';
}
bool isHexDigit(int c) {
return isDigit(c) || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
}
bool isAlphanumeric(int c) {
return isLetter(c) || isDigit(c);
return isLetter(c) || isDigit<10>(c);
}
char toLower(char c) {
@@ -73,18 +57,7 @@ bool startsIdentifier(int c) {
}
bool continuesIdentifier(int c) {
return startsIdentifier(c) || isDigit(c) || c == '#' || c == '$' || c == '@';
}
uint8_t parseHexDigit(int c) {
if (c >= 'A' && c <= 'F') {
return c - 'A' + 10;
} else if (c >= 'a' && c <= 'f') {
return c - 'a' + 10;
} else {
assume(isDigit(c));
return c - '0';
}
return startsIdentifier(c) || isDigit<10>(c) || c == '#' || c == '$' || c == '@';
}
// Parses a number from a string, moving the pointer to skip the parsed characters.
@@ -134,23 +107,23 @@ std::optional<uint64_t> parseNumber(char const *&str, NumberBase base) {
}
// Get the digit-condition function corresponding to the base
bool (*canParseDigit)(int c) = base == BASE_2 ? isBinDigit
: base == BASE_8 ? isOctDigit
: base == BASE_10 ? isDigit
: base == BASE_16 ? isHexDigit
: nullptr; // LCOV_EXCL_LINE
assume(canParseDigit != nullptr);
bool (*isSomeDigit)(int c) = base == BASE_2 ? isDigit<2>
: base == BASE_8 ? isDigit<8>
: base == BASE_10 ? isDigit<10>
: base == BASE_16 ? isDigit<16>
: nullptr; // LCOV_EXCL_LINE
assume(isSomeDigit != nullptr);
char const * const startDigits = str;
// Parse the number one digit at a time
// Does *not* support '_' digit separators
uint64_t result = 0;
for (; canParseDigit(str[0]); ++str) {
uint8_t digit = parseHexDigit(str[0]);
for (; isSomeDigit(str[0]); ++str) {
uint8_t digit = parseDigit<16>(str[0]);
if (result > (UINT64_MAX - digit) / base) {
// Skip remaining digits and set errno = ERANGE on overflow
while (canParseDigit(str[0])) {
while (isSomeDigit(str[0])) {
++str;
}
result = UINT64_MAX;