Add more character utility functions

This commit is contained in:
Rangi42
2025-08-19 19:12:58 -04:00
parent 94e9ef5213
commit 386fb5f398
9 changed files with 64 additions and 64 deletions

View File

@@ -53,7 +53,8 @@ common_obj := \
src/extern/getopt.o \
src/diagnostics.o \
src/style.o \
src/usage.o
src/usage.o \
src/util.o
rgbasm_obj := \
${common_obj} \
@@ -76,7 +77,6 @@ rgbasm_obj := \
src/backtrace.o \
src/linkdefs.o \
src/opmath.o \
src/util.o \
src/verbosity.o
src/asm/lexer.o src/asm/main.o: src/asm/parser.hpp
@@ -100,7 +100,6 @@ rgblink_obj := \
src/backtrace.o \
src/linkdefs.o \
src/opmath.o \
src/util.o \
src/verbosity.o
src/link/lexer.o src/link/main.o: src/link/script.hpp
@@ -109,8 +108,7 @@ rgbfix_obj := \
${common_obj} \
src/fix/main.o \
src/fix/mbc.o \
src/fix/warning.o \
src/util.o
src/fix/warning.o
rgbgfx_obj := \
${common_obj} \
@@ -124,7 +122,6 @@ rgbgfx_obj := \
src/gfx/reverse.o \
src/gfx/rgba.o \
src/gfx/warning.o \
src/util.o \
src/verbosity.o
rgbasm: ${rgbasm_obj}

View File

@@ -16,6 +16,11 @@ bool isNewline(int c);
bool isBlankSpace(int c);
bool isWhitespace(int c);
bool isPrintable(int c);
bool isLetter(int c);
bool isDigit(int c);
bool isOctDigit(int c);
bool isHexDigit(int c);
bool isAlphanumeric(int c);
bool startsIdentifier(int c);
bool continuesIdentifier(int c);

View File

@@ -7,6 +7,7 @@ set(common_src
"diagnostics.cpp"
"style.cpp"
"usage.cpp"
"util.cpp"
"_version.cpp"
)
@@ -54,7 +55,6 @@ set(rgbasm_src
"backtrace.cpp"
"linkdefs.cpp"
"opmath.cpp"
"util.cpp"
"verbosity.cpp"
)
@@ -76,7 +76,6 @@ set(rgblink_src
"backtrace.cpp"
"linkdefs.cpp"
"opmath.cpp"
"util.cpp"
"verbosity.cpp"
)
@@ -84,7 +83,6 @@ set(rgbfix_src
"fix/main.cpp"
"fix/mbc.cpp"
"fix/warning.cpp"
"util.cpp"
)
set(rgbgfx_src
@@ -98,7 +96,6 @@ set(rgbgfx_src
"gfx/reverse.cpp"
"gfx/rgba.cpp"
"gfx/warning.cpp"
"util.cpp"
"verbosity.cpp"
)

View File

@@ -533,7 +533,7 @@ static uint32_t readBracketedMacroArgNum() {
c = nextChar();
}
if (c >= '0' && c <= '9') {
if (isDigit(c)) {
uint32_t n = readDecimalNumber(bumpChar());
if (n > INT32_MAX) {
error("Number in bracketed macro argument is too large");
@@ -940,13 +940,13 @@ static uint32_t readFractionalPart(uint32_t integer) {
} else if (c == 'q' || c == 'Q') {
state = READFRACTIONALPART_PRECISION;
continue;
} else if (c < '0' || c > '9') {
} else if (!isDigit(c)) {
break;
}
if (divisor > (UINT32_MAX - (c - '0')) / 10) {
warning(WARNING_LARGE_CONSTANT, "Precision of fixed-point constant is too large");
// Discard any additional digits
skipChars([](int d) { return (d >= '0' && d <= '9') || d == '_'; });
skipChars([](int d) { return isDigit(d) || d == '_'; });
break;
}
value = value * 10 + (c - '0');
@@ -955,7 +955,7 @@ static uint32_t readFractionalPart(uint32_t integer) {
if (c == '.' && state == READFRACTIONALPART_PRECISION) {
state = READFRACTIONALPART_PRECISION_DIGITS;
continue;
} else if (c < '0' || c > '9') {
} else if (!isDigit(c)) {
break;
}
precision = precision * 10 + (c - '0');
@@ -985,8 +985,7 @@ static uint32_t readFractionalPart(uint32_t integer) {
}
static bool isValidDigit(char c) {
return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '.'
|| c == '#' || c == '@';
return isAlphanumeric(c) || c == '.' || c == '#' || c == '@';
}
static bool checkDigitErrors(char const *digits, size_t n, char const *type) {
@@ -1070,7 +1069,7 @@ static uint32_t readOctalNumber() {
for (int c = peek();; c = nextChar()) {
if (c == '_' && !empty) {
continue;
} else if (c >= '0' && c <= '7') {
} else if (isOctDigit(c)) {
c = c - '0';
} else {
break;
@@ -1079,7 +1078,7 @@ static uint32_t readOctalNumber() {
if (value > (UINT32_MAX - c) / 8) {
warning(WARNING_LARGE_CONSTANT, "Integer constant is too large");
// Discard any additional digits
skipChars([](int d) { return (d >= '0' && d <= '7') || d == '_'; });
skipChars([](int d) { return isOctDigit(d) || d == '_'; });
return 0;
}
value = value * 8 + c;
@@ -1095,13 +1094,13 @@ static uint32_t readOctalNumber() {
}
static uint32_t readDecimalNumber(int initial) {
assume(initial >= '0' && initial <= '9');
assume(isDigit(initial));
uint32_t value = initial - '0';
for (int c = peek();; c = nextChar()) {
if (c == '_') {
continue;
} else if (c >= '0' && c <= '9') {
} else if (isDigit(c)) {
c = c - '0';
} else {
break;
@@ -1110,7 +1109,7 @@ static uint32_t readDecimalNumber(int initial) {
if (value > (UINT32_MAX - c) / 10) {
warning(WARNING_LARGE_CONSTANT, "Integer constant is too large");
// Discard any additional digits
skipChars([](int d) { return (d >= '0' && d <= '9') || d == '_'; });
skipChars([](int d) { return isDigit(d) || d == '_'; });
return 0;
}
value = value * 10 + c;
@@ -1130,7 +1129,7 @@ static uint32_t readHexNumber() {
c = c - 'a' + 10;
} else if (c >= 'A' && c <= 'F') {
c = c - 'A' + 10;
} else if (c >= '0' && c <= '9') {
} else if (isDigit(c)) {
c = c - '0';
} else {
break;
@@ -1139,10 +1138,7 @@ static uint32_t readHexNumber() {
if (value > (UINT32_MAX - c) / 16) {
warning(WARNING_LARGE_CONSTANT, "Integer constant is too large");
// Discard any additional digits
skipChars([](int d) {
return (d >= '0' && d <= '9') || (d >= 'a' && d <= 'f') || (d >= 'A' && d <= 'f')
|| d == '_';
});
skipChars([](int d) { return isHexDigit(d) || d == '_'; });
return 0;
}
value = value * 16 + c;
@@ -1825,7 +1821,7 @@ static Token yylex_NORMAL() {
} else if (c == '&') {
shiftChar();
return Token(T_(OP_LOGICAND));
} else if (c >= '0' && c <= '7') {
} else if (isOctDigit(c)) {
return Token(T_(NUMBER), readOctalNumber());
}
return Token(T_(OP_AND));

View File

@@ -3,6 +3,7 @@
#include "diagnostics.hpp"
#include "style.hpp"
#include "util.hpp" // isDigit
void warnx(char const *fmt, ...) {
va_list ap;
@@ -60,7 +61,7 @@ std::pair<WarningState, std::optional<uint32_t>> getInitialWarningState(std::str
uint32_t param = 0;
bool overflowed = false;
for (; *ptr >= '0' && *ptr <= '9'; ++ptr) {
for (; isDigit(*ptr); ++ptr) {
if (overflowed) {
continue;
}

View File

@@ -8,7 +8,7 @@
#include "helpers.hpp" // unreachable_
#include "platform.hpp" // strcasecmp
#include "util.hpp" // isBlankSpace
#include "util.hpp" // isBlankSpace, isDigit
#include "fix/warning.hpp"
@@ -143,7 +143,7 @@ MbcType mbc_ParseName(char const *name, uint8_t &tpp1Major, uint8_t &tpp1Minor)
exit(0);
}
if ((name[0] >= '0' && name[0] <= '9') || name[0] == '$') {
if (isDigit(name[0]) || name[0] == '$') {
int base = 0;
if (name[0] == '$') {

View File

@@ -20,7 +20,7 @@
#include "diagnostics.hpp"
#include "helpers.hpp"
#include "platform.hpp"
#include "util.hpp" // UpperMap
#include "util.hpp" // UpperMap, isDigit
#include "gfx/main.hpp"
#include "gfx/png.hpp"
@@ -43,7 +43,7 @@ static constexpr uint8_t nibble(char c) {
assume(c <= 'F');
return c - 'A' + 10;
} else {
assume(c >= '0' && c <= '9');
assume(isDigit(c));
return c - '0';
}
}

View File

@@ -82,28 +82,20 @@ static yy::parser::symbol_type yywrap() {
return yy::parser::make_YYEOF();
}
static bool isIdentChar(int c) {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9');
}
static std::string readIdent(int c) {
static std::string readKeyword(int c) {
LexerStackEntry &context = lexerStack.back();
std::string ident;
ident.push_back(c);
for (c = context.file.sgetc(); isIdentChar(c); c = context.file.snextc()) {
ident.push_back(c);
std::string keyword;
keyword.push_back(c);
for (c = context.file.sgetc(); isAlphanumeric(c); c = context.file.snextc()) {
keyword.push_back(c);
}
return ident;
}
static bool isDecDigit(int c) {
return c >= '0' && c <= '9';
return keyword;
}
static yy::parser::symbol_type parseDecNumber(int c) {
LexerStackEntry &context = lexerStack.back();
uint32_t number = c - '0';
for (c = context.file.sgetc(); isDecDigit(c) || c == '_'; c = context.file.sgetc()) {
for (c = context.file.sgetc(); isDigit(c) || c == '_'; c = context.file.sgetc()) {
if (c != '_') {
number = number * 10 + (c - '0');
}
@@ -135,10 +127,6 @@ static yy::parser::symbol_type parseBinNumber(char const *prefix) {
return yy::parser::make_number(number);
}
static bool isOctDigit(int c) {
return c >= '0' && c <= '7';
}
static yy::parser::symbol_type parseOctNumber(char const *prefix) {
LexerStackEntry &context = lexerStack.back();
int c = context.file.sgetc();
@@ -158,12 +146,8 @@ static yy::parser::symbol_type parseOctNumber(char const *prefix) {
return yy::parser::make_number(number);
}
static bool isHexDigit(int c) {
return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
}
static uint8_t parseHexDigit(int c) {
if (c >= '0' && c <= '9') {
if (isDigit(c)) {
return c - '0';
} else if (c >= 'A' && c <= 'F') {
return c - 'A' + 10;
@@ -290,10 +274,10 @@ yy::parser::symbol_type yylex() {
return parseBinNumber("'%'");
} else if (c == '&') {
return parseOctNumber("'&'");
} else if (isDecDigit(c)) {
} else if (isDigit(c)) {
return parseNumber(c);
} else if (isIdentChar(c)) { // Note that we match these *after* digit characters!
std::string ident = readIdent(c);
} else if (isLetter(c)) {
std::string keyword = readKeyword(c);
static UpperMap<SectionType> const sectTypes{
{"WRAM0", SECTTYPE_WRAM0},
@@ -305,7 +289,7 @@ yy::parser::symbol_type yylex() {
{"SRAM", SECTTYPE_SRAM },
{"OAM", SECTTYPE_OAM },
};
if (auto search = sectTypes.find(ident); search != sectTypes.end()) {
if (auto search = sectTypes.find(keyword); search != sectTypes.end()) {
return yy::parser::make_sect_type(search->second);
}
@@ -317,11 +301,11 @@ yy::parser::symbol_type yylex() {
{"DS", yy::parser::make_DS },
{"OPTIONAL", yy::parser::make_OPTIONAL},
};
if (auto search = keywords.find(ident); search != keywords.end()) {
if (auto search = keywords.find(keyword); search != keywords.end()) {
return search->second();
}
scriptError("Unknown keyword `%s`", ident.c_str());
scriptError("Unknown keyword `%s`", keyword.c_str());
return yylex();
} else {
scriptError("Unexpected character %s", printChar(c));

View File

@@ -21,14 +21,34 @@ bool isPrintable(int c) {
return c >= ' ' && c <= '~';
}
bool isLetter(int c) {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
}
bool isDigit(int c) {
return c >= '0' && c <= '9';
}
bool isOctDigit(int c) {
return c >= '0' && c <= '7';
}
bool isHexDigit(int c) {
return isDigit(c) || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
}
bool isAlphanumeric(int c) {
return isLetter(c) || isDigit(c);
}
bool startsIdentifier(int c) {
// This returns false for anonymous labels, which internally start with a '!',
// and for section fragment literal labels, which internally start with a '$'.
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '.' || c == '_';
return isLetter(c) || c == '.' || c == '_';
}
bool continuesIdentifier(int c) {
return startsIdentifier(c) || (c >= '0' && c <= '9') || c == '#' || c == '$' || c == '@';
return startsIdentifier(c) || isDigit(c) || c == '#' || c == '$' || c == '@';
}
char const *printChar(int c) {