From 386fb5f3987b24d11d46c1c0d9f8dc8f432e4444 Mon Sep 17 00:00:00 2001 From: Rangi42 Date: Tue, 19 Aug 2025 19:12:58 -0400 Subject: [PATCH] Add more character utility functions --- Makefile | 9 +++------ include/util.hpp | 5 +++++ src/CMakeLists.txt | 5 +---- src/asm/lexer.cpp | 30 +++++++++++++----------------- src/diagnostics.cpp | 3 ++- src/fix/mbc.cpp | 4 ++-- src/gfx/pal_spec.cpp | 4 ++-- src/link/lexer.cpp | 44 ++++++++++++++------------------------------ src/util.cpp | 24 ++++++++++++++++++++++-- 9 files changed, 64 insertions(+), 64 deletions(-) diff --git a/Makefile b/Makefile index 373cf02a..f84101ca 100644 --- a/Makefile +++ b/Makefile @@ -53,7 +53,8 @@ common_obj := \ src/extern/getopt.o \ src/diagnostics.o \ src/style.o \ - src/usage.o + src/usage.o \ + src/util.o rgbasm_obj := \ ${common_obj} \ @@ -76,7 +77,6 @@ rgbasm_obj := \ src/backtrace.o \ src/linkdefs.o \ src/opmath.o \ - src/util.o \ src/verbosity.o src/asm/lexer.o src/asm/main.o: src/asm/parser.hpp @@ -100,7 +100,6 @@ rgblink_obj := \ src/backtrace.o \ src/linkdefs.o \ src/opmath.o \ - src/util.o \ src/verbosity.o src/link/lexer.o src/link/main.o: src/link/script.hpp @@ -109,8 +108,7 @@ rgbfix_obj := \ ${common_obj} \ src/fix/main.o \ src/fix/mbc.o \ - src/fix/warning.o \ - src/util.o + src/fix/warning.o rgbgfx_obj := \ ${common_obj} \ @@ -124,7 +122,6 @@ rgbgfx_obj := \ src/gfx/reverse.o \ src/gfx/rgba.o \ src/gfx/warning.o \ - src/util.o \ src/verbosity.o rgbasm: ${rgbasm_obj} diff --git a/include/util.hpp b/include/util.hpp index 929a2f77..de10995c 100644 --- a/include/util.hpp +++ b/include/util.hpp @@ -16,6 +16,11 @@ bool isNewline(int c); bool isBlankSpace(int c); bool isWhitespace(int c); bool isPrintable(int c); +bool isLetter(int c); +bool isDigit(int c); +bool isOctDigit(int c); +bool isHexDigit(int c); +bool isAlphanumeric(int c); bool startsIdentifier(int c); bool continuesIdentifier(int c); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index cfc9b692..be7143ba 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -7,6 +7,7 @@ set(common_src "diagnostics.cpp" "style.cpp" "usage.cpp" + "util.cpp" "_version.cpp" ) @@ -54,7 +55,6 @@ set(rgbasm_src "backtrace.cpp" "linkdefs.cpp" "opmath.cpp" - "util.cpp" "verbosity.cpp" ) @@ -76,7 +76,6 @@ set(rgblink_src "backtrace.cpp" "linkdefs.cpp" "opmath.cpp" - "util.cpp" "verbosity.cpp" ) @@ -84,7 +83,6 @@ set(rgbfix_src "fix/main.cpp" "fix/mbc.cpp" "fix/warning.cpp" - "util.cpp" ) set(rgbgfx_src @@ -98,7 +96,6 @@ set(rgbgfx_src "gfx/reverse.cpp" "gfx/rgba.cpp" "gfx/warning.cpp" - "util.cpp" "verbosity.cpp" ) diff --git a/src/asm/lexer.cpp b/src/asm/lexer.cpp index bb8dcd9d..aa59f45b 100644 --- a/src/asm/lexer.cpp +++ b/src/asm/lexer.cpp @@ -533,7 +533,7 @@ static uint32_t readBracketedMacroArgNum() { c = nextChar(); } - if (c >= '0' && c <= '9') { + if (isDigit(c)) { uint32_t n = readDecimalNumber(bumpChar()); if (n > INT32_MAX) { error("Number in bracketed macro argument is too large"); @@ -940,13 +940,13 @@ static uint32_t readFractionalPart(uint32_t integer) { } else if (c == 'q' || c == 'Q') { state = READFRACTIONALPART_PRECISION; continue; - } else if (c < '0' || c > '9') { + } else if (!isDigit(c)) { break; } if (divisor > (UINT32_MAX - (c - '0')) / 10) { warning(WARNING_LARGE_CONSTANT, "Precision of fixed-point constant is too large"); // Discard any additional digits - skipChars([](int d) { return (d >= '0' && d <= '9') || d == '_'; }); + skipChars([](int d) { return isDigit(d) || d == '_'; }); break; } value = value * 10 + (c - '0'); @@ -955,7 +955,7 @@ static uint32_t readFractionalPart(uint32_t integer) { if (c == '.' && state == READFRACTIONALPART_PRECISION) { state = READFRACTIONALPART_PRECISION_DIGITS; continue; - } else if (c < '0' || c > '9') { + } else if (!isDigit(c)) { break; } precision = precision * 10 + (c - '0'); @@ -985,8 +985,7 @@ static uint32_t readFractionalPart(uint32_t integer) { } static bool isValidDigit(char c) { - return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '.' - || c == '#' || c == '@'; + return isAlphanumeric(c) || c == '.' || c == '#' || c == '@'; } static bool checkDigitErrors(char const *digits, size_t n, char const *type) { @@ -1070,7 +1069,7 @@ static uint32_t readOctalNumber() { for (int c = peek();; c = nextChar()) { if (c == '_' && !empty) { continue; - } else if (c >= '0' && c <= '7') { + } else if (isOctDigit(c)) { c = c - '0'; } else { break; @@ -1079,7 +1078,7 @@ static uint32_t readOctalNumber() { if (value > (UINT32_MAX - c) / 8) { warning(WARNING_LARGE_CONSTANT, "Integer constant is too large"); // Discard any additional digits - skipChars([](int d) { return (d >= '0' && d <= '7') || d == '_'; }); + skipChars([](int d) { return isOctDigit(d) || d == '_'; }); return 0; } value = value * 8 + c; @@ -1095,13 +1094,13 @@ static uint32_t readOctalNumber() { } static uint32_t readDecimalNumber(int initial) { - assume(initial >= '0' && initial <= '9'); + assume(isDigit(initial)); uint32_t value = initial - '0'; for (int c = peek();; c = nextChar()) { if (c == '_') { continue; - } else if (c >= '0' && c <= '9') { + } else if (isDigit(c)) { c = c - '0'; } else { break; @@ -1110,7 +1109,7 @@ static uint32_t readDecimalNumber(int initial) { if (value > (UINT32_MAX - c) / 10) { warning(WARNING_LARGE_CONSTANT, "Integer constant is too large"); // Discard any additional digits - skipChars([](int d) { return (d >= '0' && d <= '9') || d == '_'; }); + skipChars([](int d) { return isDigit(d) || d == '_'; }); return 0; } value = value * 10 + c; @@ -1130,7 +1129,7 @@ static uint32_t readHexNumber() { c = c - 'a' + 10; } else if (c >= 'A' && c <= 'F') { c = c - 'A' + 10; - } else if (c >= '0' && c <= '9') { + } else if (isDigit(c)) { c = c - '0'; } else { break; @@ -1139,10 +1138,7 @@ static uint32_t readHexNumber() { if (value > (UINT32_MAX - c) / 16) { warning(WARNING_LARGE_CONSTANT, "Integer constant is too large"); // Discard any additional digits - skipChars([](int d) { - return (d >= '0' && d <= '9') || (d >= 'a' && d <= 'f') || (d >= 'A' && d <= 'f') - || d == '_'; - }); + skipChars([](int d) { return isHexDigit(d) || d == '_'; }); return 0; } value = value * 16 + c; @@ -1825,7 +1821,7 @@ static Token yylex_NORMAL() { } else if (c == '&') { shiftChar(); return Token(T_(OP_LOGICAND)); - } else if (c >= '0' && c <= '7') { + } else if (isOctDigit(c)) { return Token(T_(NUMBER), readOctalNumber()); } return Token(T_(OP_AND)); diff --git a/src/diagnostics.cpp b/src/diagnostics.cpp index bbdfcefe..d1ead143 100644 --- a/src/diagnostics.cpp +++ b/src/diagnostics.cpp @@ -3,6 +3,7 @@ #include "diagnostics.hpp" #include "style.hpp" +#include "util.hpp" // isDigit void warnx(char const *fmt, ...) { va_list ap; @@ -60,7 +61,7 @@ std::pair> getInitialWarningState(std::str uint32_t param = 0; bool overflowed = false; - for (; *ptr >= '0' && *ptr <= '9'; ++ptr) { + for (; isDigit(*ptr); ++ptr) { if (overflowed) { continue; } diff --git a/src/fix/mbc.cpp b/src/fix/mbc.cpp index f1f6ad91..7419476b 100644 --- a/src/fix/mbc.cpp +++ b/src/fix/mbc.cpp @@ -8,7 +8,7 @@ #include "helpers.hpp" // unreachable_ #include "platform.hpp" // strcasecmp -#include "util.hpp" // isBlankSpace +#include "util.hpp" // isBlankSpace, isDigit #include "fix/warning.hpp" @@ -143,7 +143,7 @@ MbcType mbc_ParseName(char const *name, uint8_t &tpp1Major, uint8_t &tpp1Minor) exit(0); } - if ((name[0] >= '0' && name[0] <= '9') || name[0] == '$') { + if (isDigit(name[0]) || name[0] == '$') { int base = 0; if (name[0] == '$') { diff --git a/src/gfx/pal_spec.cpp b/src/gfx/pal_spec.cpp index 98b6963c..3389c9c2 100644 --- a/src/gfx/pal_spec.cpp +++ b/src/gfx/pal_spec.cpp @@ -20,7 +20,7 @@ #include "diagnostics.hpp" #include "helpers.hpp" #include "platform.hpp" -#include "util.hpp" // UpperMap +#include "util.hpp" // UpperMap, isDigit #include "gfx/main.hpp" #include "gfx/png.hpp" @@ -43,7 +43,7 @@ static constexpr uint8_t nibble(char c) { assume(c <= 'F'); return c - 'A' + 10; } else { - assume(c >= '0' && c <= '9'); + assume(isDigit(c)); return c - '0'; } } diff --git a/src/link/lexer.cpp b/src/link/lexer.cpp index 22bd2a58..cd7a137d 100644 --- a/src/link/lexer.cpp +++ b/src/link/lexer.cpp @@ -82,28 +82,20 @@ static yy::parser::symbol_type yywrap() { return yy::parser::make_YYEOF(); } -static bool isIdentChar(int c) { - return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9'); -} - -static std::string readIdent(int c) { +static std::string readKeyword(int c) { LexerStackEntry &context = lexerStack.back(); - std::string ident; - ident.push_back(c); - for (c = context.file.sgetc(); isIdentChar(c); c = context.file.snextc()) { - ident.push_back(c); + std::string keyword; + keyword.push_back(c); + for (c = context.file.sgetc(); isAlphanumeric(c); c = context.file.snextc()) { + keyword.push_back(c); } - return ident; -} - -static bool isDecDigit(int c) { - return c >= '0' && c <= '9'; + return keyword; } static yy::parser::symbol_type parseDecNumber(int c) { LexerStackEntry &context = lexerStack.back(); uint32_t number = c - '0'; - for (c = context.file.sgetc(); isDecDigit(c) || c == '_'; c = context.file.sgetc()) { + for (c = context.file.sgetc(); isDigit(c) || c == '_'; c = context.file.sgetc()) { if (c != '_') { number = number * 10 + (c - '0'); } @@ -135,10 +127,6 @@ static yy::parser::symbol_type parseBinNumber(char const *prefix) { return yy::parser::make_number(number); } -static bool isOctDigit(int c) { - return c >= '0' && c <= '7'; -} - static yy::parser::symbol_type parseOctNumber(char const *prefix) { LexerStackEntry &context = lexerStack.back(); int c = context.file.sgetc(); @@ -158,12 +146,8 @@ static yy::parser::symbol_type parseOctNumber(char const *prefix) { return yy::parser::make_number(number); } -static bool isHexDigit(int c) { - return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); -} - static uint8_t parseHexDigit(int c) { - if (c >= '0' && c <= '9') { + if (isDigit(c)) { return c - '0'; } else if (c >= 'A' && c <= 'F') { return c - 'A' + 10; @@ -290,10 +274,10 @@ yy::parser::symbol_type yylex() { return parseBinNumber("'%'"); } else if (c == '&') { return parseOctNumber("'&'"); - } else if (isDecDigit(c)) { + } else if (isDigit(c)) { return parseNumber(c); - } else if (isIdentChar(c)) { // Note that we match these *after* digit characters! - std::string ident = readIdent(c); + } else if (isLetter(c)) { + std::string keyword = readKeyword(c); static UpperMap const sectTypes{ {"WRAM0", SECTTYPE_WRAM0}, @@ -305,7 +289,7 @@ yy::parser::symbol_type yylex() { {"SRAM", SECTTYPE_SRAM }, {"OAM", SECTTYPE_OAM }, }; - if (auto search = sectTypes.find(ident); search != sectTypes.end()) { + if (auto search = sectTypes.find(keyword); search != sectTypes.end()) { return yy::parser::make_sect_type(search->second); } @@ -317,11 +301,11 @@ yy::parser::symbol_type yylex() { {"DS", yy::parser::make_DS }, {"OPTIONAL", yy::parser::make_OPTIONAL}, }; - if (auto search = keywords.find(ident); search != keywords.end()) { + if (auto search = keywords.find(keyword); search != keywords.end()) { return search->second(); } - scriptError("Unknown keyword `%s`", ident.c_str()); + scriptError("Unknown keyword `%s`", keyword.c_str()); return yylex(); } else { scriptError("Unexpected character %s", printChar(c)); diff --git a/src/util.cpp b/src/util.cpp index f6049529..90410fbe 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -21,14 +21,34 @@ bool isPrintable(int c) { return c >= ' ' && c <= '~'; } +bool isLetter(int c) { + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); +} + +bool isDigit(int c) { + return c >= '0' && c <= '9'; +} + +bool isOctDigit(int c) { + return c >= '0' && c <= '7'; +} + +bool isHexDigit(int c) { + return isDigit(c) || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); +} + +bool isAlphanumeric(int c) { + return isLetter(c) || isDigit(c); +} + bool startsIdentifier(int c) { // This returns false for anonymous labels, which internally start with a '!', // and for section fragment literal labels, which internally start with a '$'. - return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '.' || c == '_'; + return isLetter(c) || c == '.' || c == '_'; } bool continuesIdentifier(int c) { - return startsIdentifier(c) || (c >= '0' && c <= '9') || c == '#' || c == '$' || c == '@'; + return startsIdentifier(c) || isDigit(c) || c == '#' || c == '$' || c == '@'; } char const *printChar(int c) {