Add more character utility functions

This commit is contained in:
Rangi42
2025-08-19 19:12:58 -04:00
parent 94e9ef5213
commit 386fb5f398
9 changed files with 64 additions and 64 deletions

View File

@@ -53,7 +53,8 @@ common_obj := \
src/extern/getopt.o \ src/extern/getopt.o \
src/diagnostics.o \ src/diagnostics.o \
src/style.o \ src/style.o \
src/usage.o src/usage.o \
src/util.o
rgbasm_obj := \ rgbasm_obj := \
${common_obj} \ ${common_obj} \
@@ -76,7 +77,6 @@ rgbasm_obj := \
src/backtrace.o \ src/backtrace.o \
src/linkdefs.o \ src/linkdefs.o \
src/opmath.o \ src/opmath.o \
src/util.o \
src/verbosity.o src/verbosity.o
src/asm/lexer.o src/asm/main.o: src/asm/parser.hpp src/asm/lexer.o src/asm/main.o: src/asm/parser.hpp
@@ -100,7 +100,6 @@ rgblink_obj := \
src/backtrace.o \ src/backtrace.o \
src/linkdefs.o \ src/linkdefs.o \
src/opmath.o \ src/opmath.o \
src/util.o \
src/verbosity.o src/verbosity.o
src/link/lexer.o src/link/main.o: src/link/script.hpp src/link/lexer.o src/link/main.o: src/link/script.hpp
@@ -109,8 +108,7 @@ rgbfix_obj := \
${common_obj} \ ${common_obj} \
src/fix/main.o \ src/fix/main.o \
src/fix/mbc.o \ src/fix/mbc.o \
src/fix/warning.o \ src/fix/warning.o
src/util.o
rgbgfx_obj := \ rgbgfx_obj := \
${common_obj} \ ${common_obj} \
@@ -124,7 +122,6 @@ rgbgfx_obj := \
src/gfx/reverse.o \ src/gfx/reverse.o \
src/gfx/rgba.o \ src/gfx/rgba.o \
src/gfx/warning.o \ src/gfx/warning.o \
src/util.o \
src/verbosity.o src/verbosity.o
rgbasm: ${rgbasm_obj} rgbasm: ${rgbasm_obj}

View File

@@ -16,6 +16,11 @@ bool isNewline(int c);
bool isBlankSpace(int c); bool isBlankSpace(int c);
bool isWhitespace(int c); bool isWhitespace(int c);
bool isPrintable(int c); bool isPrintable(int c);
bool isLetter(int c);
bool isDigit(int c);
bool isOctDigit(int c);
bool isHexDigit(int c);
bool isAlphanumeric(int c);
bool startsIdentifier(int c); bool startsIdentifier(int c);
bool continuesIdentifier(int c); bool continuesIdentifier(int c);

View File

@@ -7,6 +7,7 @@ set(common_src
"diagnostics.cpp" "diagnostics.cpp"
"style.cpp" "style.cpp"
"usage.cpp" "usage.cpp"
"util.cpp"
"_version.cpp" "_version.cpp"
) )
@@ -54,7 +55,6 @@ set(rgbasm_src
"backtrace.cpp" "backtrace.cpp"
"linkdefs.cpp" "linkdefs.cpp"
"opmath.cpp" "opmath.cpp"
"util.cpp"
"verbosity.cpp" "verbosity.cpp"
) )
@@ -76,7 +76,6 @@ set(rgblink_src
"backtrace.cpp" "backtrace.cpp"
"linkdefs.cpp" "linkdefs.cpp"
"opmath.cpp" "opmath.cpp"
"util.cpp"
"verbosity.cpp" "verbosity.cpp"
) )
@@ -84,7 +83,6 @@ set(rgbfix_src
"fix/main.cpp" "fix/main.cpp"
"fix/mbc.cpp" "fix/mbc.cpp"
"fix/warning.cpp" "fix/warning.cpp"
"util.cpp"
) )
set(rgbgfx_src set(rgbgfx_src
@@ -98,7 +96,6 @@ set(rgbgfx_src
"gfx/reverse.cpp" "gfx/reverse.cpp"
"gfx/rgba.cpp" "gfx/rgba.cpp"
"gfx/warning.cpp" "gfx/warning.cpp"
"util.cpp"
"verbosity.cpp" "verbosity.cpp"
) )

View File

@@ -533,7 +533,7 @@ static uint32_t readBracketedMacroArgNum() {
c = nextChar(); c = nextChar();
} }
if (c >= '0' && c <= '9') { if (isDigit(c)) {
uint32_t n = readDecimalNumber(bumpChar()); uint32_t n = readDecimalNumber(bumpChar());
if (n > INT32_MAX) { if (n > INT32_MAX) {
error("Number in bracketed macro argument is too large"); error("Number in bracketed macro argument is too large");
@@ -940,13 +940,13 @@ static uint32_t readFractionalPart(uint32_t integer) {
} else if (c == 'q' || c == 'Q') { } else if (c == 'q' || c == 'Q') {
state = READFRACTIONALPART_PRECISION; state = READFRACTIONALPART_PRECISION;
continue; continue;
} else if (c < '0' || c > '9') { } else if (!isDigit(c)) {
break; break;
} }
if (divisor > (UINT32_MAX - (c - '0')) / 10) { if (divisor > (UINT32_MAX - (c - '0')) / 10) {
warning(WARNING_LARGE_CONSTANT, "Precision of fixed-point constant is too large"); warning(WARNING_LARGE_CONSTANT, "Precision of fixed-point constant is too large");
// Discard any additional digits // Discard any additional digits
skipChars([](int d) { return (d >= '0' && d <= '9') || d == '_'; }); skipChars([](int d) { return isDigit(d) || d == '_'; });
break; break;
} }
value = value * 10 + (c - '0'); value = value * 10 + (c - '0');
@@ -955,7 +955,7 @@ static uint32_t readFractionalPart(uint32_t integer) {
if (c == '.' && state == READFRACTIONALPART_PRECISION) { if (c == '.' && state == READFRACTIONALPART_PRECISION) {
state = READFRACTIONALPART_PRECISION_DIGITS; state = READFRACTIONALPART_PRECISION_DIGITS;
continue; continue;
} else if (c < '0' || c > '9') { } else if (!isDigit(c)) {
break; break;
} }
precision = precision * 10 + (c - '0'); precision = precision * 10 + (c - '0');
@@ -985,8 +985,7 @@ static uint32_t readFractionalPart(uint32_t integer) {
} }
static bool isValidDigit(char c) { static bool isValidDigit(char c) {
return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '.' return isAlphanumeric(c) || c == '.' || c == '#' || c == '@';
|| c == '#' || c == '@';
} }
static bool checkDigitErrors(char const *digits, size_t n, char const *type) { static bool checkDigitErrors(char const *digits, size_t n, char const *type) {
@@ -1070,7 +1069,7 @@ static uint32_t readOctalNumber() {
for (int c = peek();; c = nextChar()) { for (int c = peek();; c = nextChar()) {
if (c == '_' && !empty) { if (c == '_' && !empty) {
continue; continue;
} else if (c >= '0' && c <= '7') { } else if (isOctDigit(c)) {
c = c - '0'; c = c - '0';
} else { } else {
break; break;
@@ -1079,7 +1078,7 @@ static uint32_t readOctalNumber() {
if (value > (UINT32_MAX - c) / 8) { if (value > (UINT32_MAX - c) / 8) {
warning(WARNING_LARGE_CONSTANT, "Integer constant is too large"); warning(WARNING_LARGE_CONSTANT, "Integer constant is too large");
// Discard any additional digits // Discard any additional digits
skipChars([](int d) { return (d >= '0' && d <= '7') || d == '_'; }); skipChars([](int d) { return isOctDigit(d) || d == '_'; });
return 0; return 0;
} }
value = value * 8 + c; value = value * 8 + c;
@@ -1095,13 +1094,13 @@ static uint32_t readOctalNumber() {
} }
static uint32_t readDecimalNumber(int initial) { static uint32_t readDecimalNumber(int initial) {
assume(initial >= '0' && initial <= '9'); assume(isDigit(initial));
uint32_t value = initial - '0'; uint32_t value = initial - '0';
for (int c = peek();; c = nextChar()) { for (int c = peek();; c = nextChar()) {
if (c == '_') { if (c == '_') {
continue; continue;
} else if (c >= '0' && c <= '9') { } else if (isDigit(c)) {
c = c - '0'; c = c - '0';
} else { } else {
break; break;
@@ -1110,7 +1109,7 @@ static uint32_t readDecimalNumber(int initial) {
if (value > (UINT32_MAX - c) / 10) { if (value > (UINT32_MAX - c) / 10) {
warning(WARNING_LARGE_CONSTANT, "Integer constant is too large"); warning(WARNING_LARGE_CONSTANT, "Integer constant is too large");
// Discard any additional digits // Discard any additional digits
skipChars([](int d) { return (d >= '0' && d <= '9') || d == '_'; }); skipChars([](int d) { return isDigit(d) || d == '_'; });
return 0; return 0;
} }
value = value * 10 + c; value = value * 10 + c;
@@ -1130,7 +1129,7 @@ static uint32_t readHexNumber() {
c = c - 'a' + 10; c = c - 'a' + 10;
} else if (c >= 'A' && c <= 'F') { } else if (c >= 'A' && c <= 'F') {
c = c - 'A' + 10; c = c - 'A' + 10;
} else if (c >= '0' && c <= '9') { } else if (isDigit(c)) {
c = c - '0'; c = c - '0';
} else { } else {
break; break;
@@ -1139,10 +1138,7 @@ static uint32_t readHexNumber() {
if (value > (UINT32_MAX - c) / 16) { if (value > (UINT32_MAX - c) / 16) {
warning(WARNING_LARGE_CONSTANT, "Integer constant is too large"); warning(WARNING_LARGE_CONSTANT, "Integer constant is too large");
// Discard any additional digits // Discard any additional digits
skipChars([](int d) { skipChars([](int d) { return isHexDigit(d) || d == '_'; });
return (d >= '0' && d <= '9') || (d >= 'a' && d <= 'f') || (d >= 'A' && d <= 'f')
|| d == '_';
});
return 0; return 0;
} }
value = value * 16 + c; value = value * 16 + c;
@@ -1825,7 +1821,7 @@ static Token yylex_NORMAL() {
} else if (c == '&') { } else if (c == '&') {
shiftChar(); shiftChar();
return Token(T_(OP_LOGICAND)); return Token(T_(OP_LOGICAND));
} else if (c >= '0' && c <= '7') { } else if (isOctDigit(c)) {
return Token(T_(NUMBER), readOctalNumber()); return Token(T_(NUMBER), readOctalNumber());
} }
return Token(T_(OP_AND)); return Token(T_(OP_AND));

View File

@@ -3,6 +3,7 @@
#include "diagnostics.hpp" #include "diagnostics.hpp"
#include "style.hpp" #include "style.hpp"
#include "util.hpp" // isDigit
void warnx(char const *fmt, ...) { void warnx(char const *fmt, ...) {
va_list ap; va_list ap;
@@ -60,7 +61,7 @@ std::pair<WarningState, std::optional<uint32_t>> getInitialWarningState(std::str
uint32_t param = 0; uint32_t param = 0;
bool overflowed = false; bool overflowed = false;
for (; *ptr >= '0' && *ptr <= '9'; ++ptr) { for (; isDigit(*ptr); ++ptr) {
if (overflowed) { if (overflowed) {
continue; continue;
} }

View File

@@ -8,7 +8,7 @@
#include "helpers.hpp" // unreachable_ #include "helpers.hpp" // unreachable_
#include "platform.hpp" // strcasecmp #include "platform.hpp" // strcasecmp
#include "util.hpp" // isBlankSpace #include "util.hpp" // isBlankSpace, isDigit
#include "fix/warning.hpp" #include "fix/warning.hpp"
@@ -143,7 +143,7 @@ MbcType mbc_ParseName(char const *name, uint8_t &tpp1Major, uint8_t &tpp1Minor)
exit(0); exit(0);
} }
if ((name[0] >= '0' && name[0] <= '9') || name[0] == '$') { if (isDigit(name[0]) || name[0] == '$') {
int base = 0; int base = 0;
if (name[0] == '$') { if (name[0] == '$') {

View File

@@ -20,7 +20,7 @@
#include "diagnostics.hpp" #include "diagnostics.hpp"
#include "helpers.hpp" #include "helpers.hpp"
#include "platform.hpp" #include "platform.hpp"
#include "util.hpp" // UpperMap #include "util.hpp" // UpperMap, isDigit
#include "gfx/main.hpp" #include "gfx/main.hpp"
#include "gfx/png.hpp" #include "gfx/png.hpp"
@@ -43,7 +43,7 @@ static constexpr uint8_t nibble(char c) {
assume(c <= 'F'); assume(c <= 'F');
return c - 'A' + 10; return c - 'A' + 10;
} else { } else {
assume(c >= '0' && c <= '9'); assume(isDigit(c));
return c - '0'; return c - '0';
} }
} }

View File

@@ -82,28 +82,20 @@ static yy::parser::symbol_type yywrap() {
return yy::parser::make_YYEOF(); return yy::parser::make_YYEOF();
} }
static bool isIdentChar(int c) { static std::string readKeyword(int c) {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9');
}
static std::string readIdent(int c) {
LexerStackEntry &context = lexerStack.back(); LexerStackEntry &context = lexerStack.back();
std::string ident; std::string keyword;
ident.push_back(c); keyword.push_back(c);
for (c = context.file.sgetc(); isIdentChar(c); c = context.file.snextc()) { for (c = context.file.sgetc(); isAlphanumeric(c); c = context.file.snextc()) {
ident.push_back(c); keyword.push_back(c);
} }
return ident; return keyword;
}
static bool isDecDigit(int c) {
return c >= '0' && c <= '9';
} }
static yy::parser::symbol_type parseDecNumber(int c) { static yy::parser::symbol_type parseDecNumber(int c) {
LexerStackEntry &context = lexerStack.back(); LexerStackEntry &context = lexerStack.back();
uint32_t number = c - '0'; uint32_t number = c - '0';
for (c = context.file.sgetc(); isDecDigit(c) || c == '_'; c = context.file.sgetc()) { for (c = context.file.sgetc(); isDigit(c) || c == '_'; c = context.file.sgetc()) {
if (c != '_') { if (c != '_') {
number = number * 10 + (c - '0'); number = number * 10 + (c - '0');
} }
@@ -135,10 +127,6 @@ static yy::parser::symbol_type parseBinNumber(char const *prefix) {
return yy::parser::make_number(number); return yy::parser::make_number(number);
} }
static bool isOctDigit(int c) {
return c >= '0' && c <= '7';
}
static yy::parser::symbol_type parseOctNumber(char const *prefix) { static yy::parser::symbol_type parseOctNumber(char const *prefix) {
LexerStackEntry &context = lexerStack.back(); LexerStackEntry &context = lexerStack.back();
int c = context.file.sgetc(); int c = context.file.sgetc();
@@ -158,12 +146,8 @@ static yy::parser::symbol_type parseOctNumber(char const *prefix) {
return yy::parser::make_number(number); return yy::parser::make_number(number);
} }
static bool isHexDigit(int c) {
return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
}
static uint8_t parseHexDigit(int c) { static uint8_t parseHexDigit(int c) {
if (c >= '0' && c <= '9') { if (isDigit(c)) {
return c - '0'; return c - '0';
} else if (c >= 'A' && c <= 'F') { } else if (c >= 'A' && c <= 'F') {
return c - 'A' + 10; return c - 'A' + 10;
@@ -290,10 +274,10 @@ yy::parser::symbol_type yylex() {
return parseBinNumber("'%'"); return parseBinNumber("'%'");
} else if (c == '&') { } else if (c == '&') {
return parseOctNumber("'&'"); return parseOctNumber("'&'");
} else if (isDecDigit(c)) { } else if (isDigit(c)) {
return parseNumber(c); return parseNumber(c);
} else if (isIdentChar(c)) { // Note that we match these *after* digit characters! } else if (isLetter(c)) {
std::string ident = readIdent(c); std::string keyword = readKeyword(c);
static UpperMap<SectionType> const sectTypes{ static UpperMap<SectionType> const sectTypes{
{"WRAM0", SECTTYPE_WRAM0}, {"WRAM0", SECTTYPE_WRAM0},
@@ -305,7 +289,7 @@ yy::parser::symbol_type yylex() {
{"SRAM", SECTTYPE_SRAM }, {"SRAM", SECTTYPE_SRAM },
{"OAM", SECTTYPE_OAM }, {"OAM", SECTTYPE_OAM },
}; };
if (auto search = sectTypes.find(ident); search != sectTypes.end()) { if (auto search = sectTypes.find(keyword); search != sectTypes.end()) {
return yy::parser::make_sect_type(search->second); return yy::parser::make_sect_type(search->second);
} }
@@ -317,11 +301,11 @@ yy::parser::symbol_type yylex() {
{"DS", yy::parser::make_DS }, {"DS", yy::parser::make_DS },
{"OPTIONAL", yy::parser::make_OPTIONAL}, {"OPTIONAL", yy::parser::make_OPTIONAL},
}; };
if (auto search = keywords.find(ident); search != keywords.end()) { if (auto search = keywords.find(keyword); search != keywords.end()) {
return search->second(); return search->second();
} }
scriptError("Unknown keyword `%s`", ident.c_str()); scriptError("Unknown keyword `%s`", keyword.c_str());
return yylex(); return yylex();
} else { } else {
scriptError("Unexpected character %s", printChar(c)); scriptError("Unexpected character %s", printChar(c));

View File

@@ -21,14 +21,34 @@ bool isPrintable(int c) {
return c >= ' ' && c <= '~'; return c >= ' ' && c <= '~';
} }
bool isLetter(int c) {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
}
bool isDigit(int c) {
return c >= '0' && c <= '9';
}
bool isOctDigit(int c) {
return c >= '0' && c <= '7';
}
bool isHexDigit(int c) {
return isDigit(c) || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
}
bool isAlphanumeric(int c) {
return isLetter(c) || isDigit(c);
}
bool startsIdentifier(int c) { bool startsIdentifier(int c) {
// This returns false for anonymous labels, which internally start with a '!', // This returns false for anonymous labels, which internally start with a '!',
// and for section fragment literal labels, which internally start with a '$'. // and for section fragment literal labels, which internally start with a '$'.
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '.' || c == '_'; return isLetter(c) || c == '.' || c == '_';
} }
bool continuesIdentifier(int c) { bool continuesIdentifier(int c) {
return startsIdentifier(c) || (c >= '0' && c <= '9') || c == '#' || c == '$' || c == '@'; return startsIdentifier(c) || isDigit(c) || c == '#' || c == '$' || c == '@';
} }
char const *printChar(int c) { char const *printChar(int c) {