diff --git a/include/itertools.hpp b/include/itertools.hpp index f7d2b430..b1d0d32a 100644 --- a/include/itertools.hpp +++ b/include/itertools.hpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include diff --git a/include/util.hpp b/include/util.hpp index f87cb343..07c6ddc2 100644 --- a/include/util.hpp +++ b/include/util.hpp @@ -4,8 +4,9 @@ #define RGBDS_UTIL_HPP #include -#include +#include // toupper #include +#include #include #include #include @@ -13,12 +14,21 @@ #include "helpers.hpp" +enum NumberBase { + BASE_AUTO = 0, + BASE_2 = 2, + BASE_8 = 8, + BASE_10 = 10, + BASE_16 = 16, +}; + bool isNewline(int c); bool isBlankSpace(int c); bool isWhitespace(int c); bool isPrintable(int c); bool isLetter(int c); bool isDigit(int c); +bool isBinDigit(int c); bool isOctDigit(int c); bool isHexDigit(int c); bool isAlphanumeric(int c); @@ -27,6 +37,8 @@ bool startsIdentifier(int c); bool continuesIdentifier(int c); uint8_t parseHexDigit(int c); +std::optional parseNumber(char const *&str, NumberBase base = BASE_AUTO); +std::optional parseWholeNumber(char const *str, NumberBase base = BASE_AUTO); char const *printChar(int c); diff --git a/include/verbosity.hpp b/include/verbosity.hpp index 45e0fd33..f7658880 100644 --- a/include/verbosity.hpp +++ b/include/verbosity.hpp @@ -3,6 +3,8 @@ #ifndef RGBDS_VERBOSITY_HPP #define RGBDS_VERBOSITY_HPP +#include + #include "style.hpp" // This macro does not evaluate its arguments unless the condition is true. diff --git a/src/asm/format.cpp b/src/asm/format.cpp index a1b3ec5d..cc9cb541 100644 --- a/src/asm/format.cpp +++ b/src/asm/format.cpp @@ -11,24 +11,21 @@ #include #include -#include "util.hpp" // isDigit +#include "util.hpp" // parseNumber #include "asm/main.hpp" // options #include "asm/warning.hpp" -static size_t parseNumber(char const *spec, size_t &value) { - size_t i = 0; - - value = 0; - for (; isDigit(spec[i]); ++i) { - value = value * 10 + (spec[i] - '0'); - } - - return i; -} - size_t FormatSpec::parseSpec(char const *spec) { size_t i = 0; + + auto parseSpecNumber = [&spec, &i]() { + char const *end = &spec[i]; + size_t number = parseNumber(end, BASE_10).value_or(0); + i += end - &spec[i]; + return number; + }; + // if (char c = spec[i]; c == ' ' || c == '+') { ++i; @@ -51,19 +48,19 @@ size_t FormatSpec::parseSpec(char const *spec) { } // if (isDigit(spec[i])) { - i += parseNumber(&spec[i], width); + width = parseSpecNumber(); } // if (spec[i] == '.') { ++i; hasFrac = true; - i += parseNumber(&spec[i], fracWidth); + fracWidth = parseSpecNumber(); } // if (spec[i] == 'q') { ++i; hasPrec = true; - i += parseNumber(&spec[i], precision); + precision = parseSpecNumber(); } // switch (char c = spec[i]; c) { diff --git a/src/asm/lexer.cpp b/src/asm/lexer.cpp index a4dff1de..583a21fa 100644 --- a/src/asm/lexer.cpp +++ b/src/asm/lexer.cpp @@ -1009,8 +1009,8 @@ static bool isValidDigit(char c) { return isAlphanumeric(c) || c == '.' || c == '#' || c == '@'; } -static bool isBinDigit(int c) { - return c == '0' || c == '1' || c == options.binDigits[0] || c == options.binDigits[1]; +static bool isCustomBinDigit(int c) { + return isBinDigit(c) || c == options.binDigits[0] || c == options.binDigits[1]; } static bool checkDigitErrors(char const *digits, size_t n, char const *type) { @@ -1078,7 +1078,7 @@ static uint32_t readBinaryNumber(char const *prefix) { if (value > (UINT32_MAX - bit) / 2) { warning(WARNING_LARGE_CONSTANT, "Integer constant is too large"); // Discard any additional digits - skipChars([](int d) { return isBinDigit(d) || d == '_'; }); + skipChars([](int d) { return isCustomBinDigit(d) || d == '_'; }); return 0; } value = value * 2 + bit; @@ -1836,7 +1836,7 @@ static Token yylex_NORMAL() { case '%': // Either %=, MOD, or a binary constant c = peek(); - if (isBinDigit(c) || c == '_') { + if (isCustomBinDigit(c) || c == '_') { return Token(T_(NUMBER), readBinaryNumber("'%'")); } return oneOrTwo('=', T_(POP_MODEQ), T_(OP_MOD)); diff --git a/src/asm/main.cpp b/src/asm/main.cpp index b477309c..303e8386 100644 --- a/src/asm/main.cpp +++ b/src/asm/main.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -299,6 +300,8 @@ int main(int argc, char *argv[]) { // https://reproducible-builds.org/docs/source-date-epoch/ time_t now = time(nullptr); if (char const *sourceDateEpoch = getenv("SOURCE_DATE_EPOCH"); sourceDateEpoch) { + // Use `strtoul`, not `parseWholeNumber`, because SOURCE_DATE_EPOCH does + // not conventionally support our custom base prefixes now = static_cast(strtoul(sourceDateEpoch, nullptr, 0)); } sym_Init(now); @@ -378,51 +381,41 @@ int main(int argc, char *argv[]) { fstk_AddPreIncludeFile(musl_optarg); break; - case 'p': { - char *endptr; - unsigned long padByte = strtoul(musl_optarg, &endptr, 0); - - if (musl_optarg[0] == '\0' || *endptr != '\0') { + case 'p': + if (std::optional padByte = parseWholeNumber(musl_optarg); !padByte) { fatal("Invalid argument for option '-p'"); - } - - if (padByte > 0xFF) { + } else if (*padByte > 0xFF) { fatal("Argument for option '-p' must be between 0 and 0xFF"); + } else { + opt_P(*padByte); } - - opt_P(padByte); break; - } case 'Q': { char const *precisionArg = musl_optarg; if (precisionArg[0] == '.') { ++precisionArg; } - char *endptr; - unsigned long precision = strtoul(precisionArg, &endptr, 0); - if (precisionArg[0] == '\0' || *endptr != '\0') { + if (std::optional precision = parseWholeNumber(precisionArg); !precision) { fatal("Invalid argument for option '-Q'"); - } - - if (precision < 1 || precision > 31) { + } else if (*precision < 1 || *precision > 31) { fatal("Argument for option '-Q' must be between 1 and 31"); + } else { + opt_Q(*precision); } - - opt_Q(precision); break; } - case 'r': { - char *endptr; - options.maxRecursionDepth = strtoul(musl_optarg, &endptr, 0); - - if (musl_optarg[0] == '\0' || *endptr != '\0') { + case 'r': + if (std::optional maxDepth = parseWholeNumber(musl_optarg); !maxDepth) { fatal("Invalid argument for option '-r'"); + } else if (errno == ERANGE) { + fatal("Argument for option '-r' is out of range"); + } else { + options.maxRecursionDepth = *maxDepth; } break; - } case 's': { // Split ":" so `musl_optarg` is "" and `name` is "" @@ -459,21 +452,15 @@ int main(int argc, char *argv[]) { warnings.state.warningsEnabled = false; break; - case 'X': { - char *endptr; - uint64_t maxErrors = strtoul(musl_optarg, &endptr, 0); - - if (musl_optarg[0] == '\0' || *endptr != '\0') { + case 'X': + if (std::optional maxErrors = parseWholeNumber(musl_optarg); !maxErrors) { fatal("Invalid argument for option '-X'"); - } - - if (maxErrors > UINT64_MAX) { + } else if (*maxErrors > UINT64_MAX) { fatal("Argument for option '-X' must be between 0 and %" PRIu64, UINT64_MAX); + } else { + options.maxErrors = *maxErrors; } - - options.maxErrors = maxErrors; break; - } case 0: // Long-only options switch (longOpt) { diff --git a/src/asm/opt.cpp b/src/asm/opt.cpp index e4249581..08978d41 100644 --- a/src/asm/opt.cpp +++ b/src/asm/opt.cpp @@ -2,6 +2,7 @@ #include #include // std::size +#include #include #include #include @@ -9,8 +10,7 @@ #include #include "diagnostics.hpp" -#include "helpers.hpp" // assume -#include "util.hpp" // isBlankSpace +#include "util.hpp" #include "asm/fstack.hpp" #include "asm/lexer.hpp" @@ -81,50 +81,38 @@ void opt_Parse(char const *s) { } break; - case 'p': { - char *endptr; - unsigned long padByte = strtoul(s, &endptr, 0); - - if (s[0] == '\0' || *endptr != '\0') { + case 'p': + if (std::optional padByte = parseWholeNumber(s); !padByte) { error("Invalid argument for option 'p'"); - } else if (padByte > 0xFF) { + } else if (*padByte > 0xFF) { error("Argument for option 'p' must be between 0 and 0xFF"); } else { - opt_P(padByte); + opt_P(*padByte); } break; - } - case 'Q': { + case 'Q': if (s[0] == '.') { ++s; // Skip leading '.' } - char *endptr; - unsigned long precision = strtoul(s, &endptr, 0); - - if (s[0] == '\0' || *endptr != '\0') { + if (std::optional precision = parseWholeNumber(s); !precision) { error("Invalid argument for option 'Q'"); - } else if (precision < 1 || precision > 31) { + } else if (*precision < 1 || *precision > 31) { error("Argument for option 'Q' must be between 1 and 31"); } else { - opt_Q(precision); + opt_Q(*precision); } break; - } - case 'r': { - char *endptr; - unsigned long maxRecursionDepth = strtoul(s, &endptr, 0); - - if (s[0] == '\0' || *endptr != '\0') { + case 'r': + if (std::optional maxRecursionDepth = parseWholeNumber(s); !maxRecursionDepth) { error("Invalid argument for option 'r'"); } else if (errno == ERANGE) { error("Argument for option 'r' is out of range"); } else { - opt_R(maxRecursionDepth); + opt_R(*maxRecursionDepth); } break; - } case 'W': if (strlen(s) > 0) { diff --git a/src/backtrace.cpp b/src/backtrace.cpp index a6061fe8..c113ca7c 100644 --- a/src/backtrace.cpp +++ b/src/backtrace.cpp @@ -2,9 +2,11 @@ #include "backtrace.hpp" -#include // strtoul +#include +#include #include "platform.hpp" // strcasecmp +#include "util.hpp" // parseWholeNumber Tracing tracing; @@ -22,8 +24,10 @@ bool trace_ParseTraceDepth(char const *arg) { tracing.loud = false; return true; } else { - char *endptr; - tracing.depth = strtoul(arg, &endptr, 0); - return arg[0] != '\0' && *endptr == '\0'; + std::optional depth = parseWholeNumber(arg); + if (depth) { + tracing.depth = *depth; + } + return depth.has_value(); } } diff --git a/src/diagnostics.cpp b/src/diagnostics.cpp index d60a86a6..af3c81dd 100644 --- a/src/diagnostics.cpp +++ b/src/diagnostics.cpp @@ -63,25 +63,9 @@ std::pair> getInitialWarningState(std::str return {state, std::nullopt}; } - // Is the rest of the string a decimal number? - // We want to avoid `strtoul`'s whitespace and sign handling, so we parse manually + // If the rest of the string is a decimal number, it's the parameter value char const *ptr = flag.c_str() + equals + 1; - uint32_t param = 0; - bool overflowed = false; - - for (; isDigit(*ptr); ++ptr) { - if (overflowed) { - continue; - } - - uint32_t c = *ptr - '0'; - if (param > (UINT32_MAX - c) / 10) { - overflowed = true; - param = UINT32_MAX; - continue; - } - param = param * 10 + c; - } + uint64_t param = parseNumber(ptr, BASE_10).value_or(0); // If we reached the end of the string, truncate it at the '=' if (*ptr == '\0') { @@ -92,5 +76,5 @@ std::pair> getInitialWarningState(std::str } } - return {state, param}; + return {state, param > UINT32_MAX ? UINT32_MAX : param}; } diff --git a/src/fix/main.cpp b/src/fix/main.cpp index 526cf59a..c8e74899 100644 --- a/src/fix/main.cpp +++ b/src/fix/main.cpp @@ -3,7 +3,9 @@ #include "fix/main.hpp" #include +#include #include +#include #include #include #include @@ -16,6 +18,7 @@ #include "platform.hpp" #include "style.hpp" #include "usage.hpp" +#include "util.hpp" #include "version.hpp" #include "fix/fix.hpp" @@ -89,25 +92,17 @@ static Usage usage = { // clang-format on static void parseByte(uint16_t &output, char name) { - if (musl_optarg[0] == 0) { + if (musl_optarg[0] == '\0') { fatal("Argument to option '-%c' may not be empty", name); } - char *endptr; - unsigned long value; - if (musl_optarg[0] == '$') { - value = strtoul(&musl_optarg[1], &endptr, 16); - } else { - value = strtoul(musl_optarg, &endptr, 0); - } - - if (*endptr) { + if (std::optional value = parseWholeNumber(musl_optarg); !value) { fatal("Expected number as argument to option '-%c', got \"%s\"", name, musl_optarg); } else if (value > 0xFF) { - fatal("Argument to option '-%c' is larger than 255: %lu", name, value); + fatal("Argument to option '-%c' is larger than 255: %" PRIu64, name, *value); + } else { + output = *value; } - - output = value; } static uint8_t const nintendoLogo[] = { diff --git a/src/fix/mbc.cpp b/src/fix/mbc.cpp index 3a211d6c..bf3bbeac 100644 --- a/src/fix/mbc.cpp +++ b/src/fix/mbc.cpp @@ -2,9 +2,11 @@ #include "fix/mbc.hpp" +#include #include #include #include +#include #include #include @@ -97,15 +99,11 @@ bool mbc_HasRAM(MbcType type) { } static void skipBlankSpace(char const *&ptr) { - while (isBlankSpace(*ptr)) { - ++ptr; - } + ptr += strspn(ptr, " \t"); } static void skipMBCSpace(char const *&ptr) { - while (isBlankSpace(*ptr) || *ptr == '_') { - ++ptr; - } + ptr += strspn(ptr, " \t_"); } static char normalizeMBCChar(char c) { @@ -128,53 +126,42 @@ static bool readMBCSlice(char const *&name, char const *expected) { } [[noreturn]] -static void fatalUnknownMBC(char const *fullName) { - fatal("Unknown MBC \"%s\"\n%s", fullName, acceptedMBCNames); +static void fatalUnknownMBC(char const *name) { + fatal("Unknown MBC \"%s\"\n%s", name, acceptedMBCNames); } [[noreturn]] -static void fatalWrongMBCFeatures(char const *fullName) { - fatal("Features incompatible with MBC (\"%s\")\n%s", fullName, acceptedMBCNames); +static void fatalWrongMBCFeatures(char const *name) { + fatal("Features incompatible with MBC (\"%s\")\n%s", name, acceptedMBCNames); } MbcType mbc_ParseName(char const *name, uint8_t &tpp1Major, uint8_t &tpp1Minor) { - char const *fullName = name; + char const *ptr = name; + skipBlankSpace(ptr); // Trim off leading blank space - if (!strcasecmp(name, "help") || !strcasecmp(name, "list")) { + if (!strcasecmp(ptr, "help") || !strcasecmp(ptr, "list")) { puts(acceptedMBCNames); // Outputs to stdout and appends a newline exit(0); } - if (isDigit(name[0]) || name[0] == '$') { - int base = 0; - - if (name[0] == '$') { - ++name; - base = 16; + // Parse numeric MBC and return it as-is (unless it's too large) + if (char c = *ptr; isDigit(c) || c == '$' || c == '&' || c == '%') { + if (std::optional mbc = parseWholeNumber(ptr); !mbc) { + fatalUnknownMBC(name); + } else if (*mbc > 0xFF) { + fatal("Specified MBC ID out of range 0-255: \"%s\"", name); + } else { + return static_cast(*mbc); } - // Parse number, and return it as-is (unless it's too large) - char *endptr; - unsigned long mbc = strtoul(name, &endptr, base); - - if (*endptr) { - fatalUnknownMBC(fullName); - } - if (mbc > 0xFF) { - fatal("Specified MBC ID out of range 0-255: \"%s\"", fullName); - } - return static_cast(mbc); } // Begin by reading the MBC type: uint16_t mbc; - char const *ptr = name; - - skipBlankSpace(ptr); // Trim off leading blank space #define tryReadSlice(expected) \ do { \ if (!readMBCSlice(ptr, expected)) { \ - fatalUnknownMBC(fullName); \ + fatalUnknownMBC(name); \ } \ } while (0) @@ -201,7 +188,7 @@ MbcType mbc_ParseName(char const *name, uint8_t &tpp1Major, uint8_t &tpp1Minor) case 'c': break; default: - fatalUnknownMBC(fullName); + fatalUnknownMBC(name); } switch (*ptr++) { case '1': @@ -223,7 +210,7 @@ MbcType mbc_ParseName(char const *name, uint8_t &tpp1Major, uint8_t &tpp1Minor) mbc = MBC7_SENSOR_RUMBLE_RAM_BATTERY; break; default: - fatalUnknownMBC(fullName); + fatalUnknownMBC(name); } break; case 'M': @@ -232,7 +219,7 @@ MbcType mbc_ParseName(char const *name, uint8_t &tpp1Major, uint8_t &tpp1Minor) mbc = MMM01; break; default: - fatalUnknownMBC(fullName); + fatalUnknownMBC(name); } break; @@ -260,33 +247,27 @@ MbcType mbc_ParseName(char const *name, uint8_t &tpp1Major, uint8_t &tpp1Minor) // Parse version skipMBCSpace(ptr); // Major - char *endptr; - unsigned long val = strtoul(ptr, &endptr, 10); - - if (endptr == ptr) { + if (std::optional major = parseNumber(ptr, BASE_10); !major) { fatal("Failed to parse TPP1 major revision number"); - } - ptr = endptr; - if (val != 1) { + } else if (*major != 1) { fatal("RGBFIX only supports TPP1 version 1.0"); + } else { + tpp1Major = *major; } - tpp1Major = val; tryReadSlice("."); // Minor - val = strtoul(ptr, &endptr, 10); - if (endptr == ptr) { + if (std::optional minor = parseNumber(ptr, BASE_10); !minor) { fatal("Failed to parse TPP1 minor revision number"); - } - ptr = endptr; - if (val > 0xFF) { + } else if (*minor > 0xFF) { fatal("TPP1 minor revision number must be 8-bit"); + } else { + tpp1Minor = *minor; } - tpp1Minor = val; mbc = TPP1; break; } default: - fatalUnknownMBC(fullName); + fatalUnknownMBC(name); } break; @@ -301,12 +282,12 @@ MbcType mbc_ParseName(char const *name, uint8_t &tpp1Major, uint8_t &tpp1Minor) mbc = HUC3; break; default: - fatalUnknownMBC(fullName); + fatalUnknownMBC(name); } break; default: - fatalUnknownMBC(fullName); + fatalUnknownMBC(name); } // Read "additional features" @@ -330,7 +311,7 @@ MbcType mbc_ParseName(char const *name, uint8_t &tpp1Major, uint8_t &tpp1Minor) // We expect a '+' at this point skipMBCSpace(ptr); if (*ptr++ != '+') { - fatalUnknownMBC(fullName); + fatalUnknownMBC(name); } skipMBCSpace(ptr); @@ -361,7 +342,7 @@ MbcType mbc_ParseName(char const *name, uint8_t &tpp1Major, uint8_t &tpp1Minor) features |= RAM; break; default: - fatalUnknownMBC(fullName); + fatalUnknownMBC(name); } break; @@ -378,7 +359,7 @@ MbcType mbc_ParseName(char const *name, uint8_t &tpp1Major, uint8_t &tpp1Minor) break; default: - fatalUnknownMBC(fullName); + fatalUnknownMBC(name); } } #undef tryReadSlice @@ -402,7 +383,7 @@ MbcType mbc_ParseName(char const *name, uint8_t &tpp1Major, uint8_t &tpp1Minor) } else if (features == (RAM | BATTERY)) { mbc += 2; } else if (features) { - fatalWrongMBCFeatures(fullName); + fatalWrongMBCFeatures(name); } break; @@ -410,7 +391,7 @@ MbcType mbc_ParseName(char const *name, uint8_t &tpp1Major, uint8_t &tpp1Minor) if (features == BATTERY) { mbc = MBC2_BATTERY; } else if (features) { - fatalWrongMBCFeatures(fullName); + fatalWrongMBCFeatures(name); } break; @@ -434,7 +415,7 @@ MbcType mbc_ParseName(char const *name, uint8_t &tpp1Major, uint8_t &tpp1Minor) } else if (features == (RAM | BATTERY)) { mbc += 2; } else if (features) { - fatalWrongMBCFeatures(fullName); + fatalWrongMBCFeatures(name); } break; @@ -452,7 +433,7 @@ MbcType mbc_ParseName(char const *name, uint8_t &tpp1Major, uint8_t &tpp1Minor) } else if (features == (RAM | BATTERY)) { mbc += 2; } else if (features) { - fatalWrongMBCFeatures(fullName); + fatalWrongMBCFeatures(name); } break; @@ -462,19 +443,19 @@ MbcType mbc_ParseName(char const *name, uint8_t &tpp1Major, uint8_t &tpp1Minor) case HUC3: // No extra features accepted if (features) { - fatalWrongMBCFeatures(fullName); + fatalWrongMBCFeatures(name); } break; case MBC7_SENSOR_RUMBLE_RAM_BATTERY: if (features != (SENSOR | RUMBLE | RAM | BATTERY)) { - fatalWrongMBCFeatures(fullName); + fatalWrongMBCFeatures(name); } break; case HUC1_RAM_BATTERY: if (features != (RAM | BATTERY)) { // HuC1 expects RAM+BATTERY - fatalWrongMBCFeatures(fullName); + fatalWrongMBCFeatures(name); } break; @@ -495,7 +476,7 @@ MbcType mbc_ParseName(char const *name, uint8_t &tpp1Major, uint8_t &tpp1Minor) mbc |= 0x01; } if (features & SENSOR) { - fatalWrongMBCFeatures(fullName); + fatalWrongMBCFeatures(name); } // Multiple rumble speeds imply rumble if (mbc & 0x01) { @@ -508,7 +489,7 @@ MbcType mbc_ParseName(char const *name, uint8_t &tpp1Major, uint8_t &tpp1Minor) // If there is still something left, error out if (*ptr) { - fatalUnknownMBC(fullName); + fatalUnknownMBC(name); } return static_cast(mbc); diff --git a/src/gfx/main.cpp b/src/gfx/main.cpp index 201e4c47..69c736bc 100644 --- a/src/gfx/main.cpp +++ b/src/gfx/main.cpp @@ -3,10 +3,10 @@ #include "gfx/main.hpp" #include -#include #include #include #include +#include #include #include #include @@ -122,76 +122,19 @@ static Usage usage = { // Parses a number at the beginning of a string, moving the pointer to skip the parsed characters. // Returns the provided errVal on error. -static uint16_t parseNumber(char *&string, char const *errPrefix, uint16_t errVal = UINT16_MAX) { - uint8_t base = 10; - if (*string == '\0') { +static uint16_t readNumber(char const *&str, char const *errPrefix, uint16_t errVal = UINT16_MAX) { + if (std::optional number = parseNumber(str); !number) { error("%s: expected number, but found nothing", errPrefix); return errVal; - } else if (*string == '$') { - base = 16; - ++string; - } else if (*string == '%') { - base = 2; - ++string; - } else if (*string == '0' && string[1] != '\0') { - // Check if we have a "0x" or "0b" here - if (string[1] == 'x' || string[1] == 'X') { - base = 16; - string += 2; - } else if (string[1] == 'b' || string[1] == 'B') { - base = 2; - string += 2; - } - } - - // Turns a digit into its numeric value in the current base, if it has one. - // Maximum is inclusive. The string_view is modified to "consume" all digits. - // Returns 255 on parse failure (including wrong char for base), in which case - // the string_view may be pointing on garbage. - auto charIndex = [&base](unsigned char c) -> uint8_t { - unsigned char index = c - '0'; // Use wrapping semantics - if (base == 2 && index >= 2) { - return 255; - } else if (index < 10) { - return index; - } else if (base != 16) { - return 255; // Letters are only valid in hex - } - index = tolower(c) - 'a'; // OK because we pass an `unsigned char` - if (index < 6) { - return index + 10; - } - return 255; - }; - - if (charIndex(*string) == 255) { - error( - "%s: expected digit%s, but found nothing", errPrefix, base != 10 ? " after base" : "" - ); + } else if (*number > UINT16_MAX) { + error("%s: the number is too large!", errPrefix); return errVal; + } else { + return *number; } - uint16_t number = 0; - do { - // Read a character, and check if it's valid in the given base - uint8_t index = charIndex(*string); - if (index == 255) { - break; // Found an invalid character, end - } - ++string; - - number *= base; - number += index; - // The lax check covers the addition on top of the multiplication - if (number >= UINT16_MAX / base) { - error("%s: the number is too large!", errPrefix); - return errVal; - } - } while (*string != '\0'); // No more characters? - - return number; } -static void skipBlankSpace(char *&arg) { +static void skipBlankSpace(char const *&arg) { arg += strspn(arg, " \t"); } @@ -263,7 +206,7 @@ static std::vector readAtFile(std::string const &path, std::vector // to an "at-file" path if one is encountered. static char *parseArgv(int argc, char *argv[]) { for (int ch; (ch = musl_getopt_long_only(argc, argv, optstring, longopts, nullptr)) != -1;) { - char *arg = musl_optarg; // Make a copy for scanning + char const *arg = musl_optarg; // Make a copy for scanning switch (ch) { case 'A': @@ -283,7 +226,7 @@ static char *parseArgv(int argc, char *argv[]) { break; case 'b': { - uint16_t number = parseNumber(arg, "Bank 0 base tile ID", 0); + uint16_t number = readNumber(arg, "Bank 0 base tile ID", 0); if (number >= 256) { error("Bank 0 base tile ID must be below 256"); } else { @@ -303,7 +246,7 @@ static char *parseArgv(int argc, char *argv[]) { } ++arg; // Skip comma skipBlankSpace(arg); - number = parseNumber(arg, "Bank 1 base tile ID", 0); + number = readNumber(arg, "Bank 1 base tile ID", 0); if (number >= 256) { error("Bank 1 base tile ID must be below 256"); } else { @@ -346,7 +289,7 @@ static char *parseArgv(int argc, char *argv[]) { break; case 'd': - options.bitDepth = parseNumber(arg, "Bit depth", 2); + options.bitDepth = readNumber(arg, "Bit depth", 2); if (*arg != '\0') { error("Bit depth ('-b') argument must be a valid number, not \"%s\"", musl_optarg); } else if (options.bitDepth != 1 && options.bitDepth != 2) { @@ -368,7 +311,7 @@ static char *parseArgv(int argc, char *argv[]) { break; case 'L': - options.inputSlice.left = parseNumber(arg, "Input slice left coordinate"); + options.inputSlice.left = readNumber(arg, "Input slice left coordinate"); if (options.inputSlice.left > INT16_MAX) { error("Input slice left coordinate is out of range!"); break; @@ -380,7 +323,7 @@ static char *parseArgv(int argc, char *argv[]) { } ++arg; skipBlankSpace(arg); - options.inputSlice.top = parseNumber(arg, "Input slice upper coordinate"); + options.inputSlice.top = readNumber(arg, "Input slice upper coordinate"); skipBlankSpace(arg); if (*arg != ':') { error("Missing colon after upper coordinate in \"%s\"", musl_optarg); @@ -388,7 +331,7 @@ static char *parseArgv(int argc, char *argv[]) { } ++arg; skipBlankSpace(arg); - options.inputSlice.width = parseNumber(arg, "Input slice width"); + options.inputSlice.width = readNumber(arg, "Input slice width"); skipBlankSpace(arg); if (options.inputSlice.width == 0) { error("Input slice width may not be 0!"); @@ -399,7 +342,7 @@ static char *parseArgv(int argc, char *argv[]) { } ++arg; skipBlankSpace(arg); - options.inputSlice.height = parseNumber(arg, "Input slice height"); + options.inputSlice.height = readNumber(arg, "Input slice height"); if (options.inputSlice.height == 0) { error("Input slice height may not be 0!"); } @@ -409,7 +352,7 @@ static char *parseArgv(int argc, char *argv[]) { break; case 'l': { - uint16_t number = parseNumber(arg, "Base palette ID", 0); + uint16_t number = readNumber(arg, "Base palette ID", 0); if (*arg != '\0') { error("Base palette ID must be a valid number, not \"%s\"", musl_optarg); } else if (number >= 256) { @@ -430,7 +373,7 @@ static char *parseArgv(int argc, char *argv[]) { break; case 'N': - options.maxNbTiles[0] = parseNumber(arg, "Number of tiles in bank 0", 256); + options.maxNbTiles[0] = readNumber(arg, "Number of tiles in bank 0", 256); if (options.maxNbTiles[0] > 256) { error("Bank 0 cannot contain more than 256 tiles"); } @@ -448,7 +391,7 @@ static char *parseArgv(int argc, char *argv[]) { } ++arg; // Skip comma skipBlankSpace(arg); - options.maxNbTiles[1] = parseNumber(arg, "Number of tiles in bank 1", 256); + options.maxNbTiles[1] = readNumber(arg, "Number of tiles in bank 1", 256); if (options.maxNbTiles[1] > 256) { error("Bank 1 cannot contain more than 256 tiles"); } @@ -462,7 +405,7 @@ static char *parseArgv(int argc, char *argv[]) { break; case 'n': { - uint16_t number = parseNumber(arg, "Number of palettes", 256); + uint16_t number = readNumber(arg, "Number of palettes", 256); if (*arg != '\0') { error("Number of palettes ('-n') must be a valid number, not \"%s\"", musl_optarg); } @@ -513,7 +456,7 @@ static char *parseArgv(int argc, char *argv[]) { case 'r': localOptions.reverse = true; - options.reversedWidth = parseNumber(arg, "Reversed image stride"); + options.reversedWidth = readNumber(arg, "Reversed image stride"); if (*arg != '\0') { error( "Reversed image stride ('-r') must be a valid number, not \"%s\"", musl_optarg @@ -522,7 +465,7 @@ static char *parseArgv(int argc, char *argv[]) { break; case 's': - options.nbColorsPerPal = parseNumber(arg, "Number of colors per palette", 4); + options.nbColorsPerPal = readNumber(arg, "Number of colors per palette", 4); if (*arg != '\0') { error("Palette size ('-s') must be a valid number, not \"%s\"", musl_optarg); } @@ -564,7 +507,7 @@ static char *parseArgv(int argc, char *argv[]) { break; case 'x': - options.trim = parseNumber(arg, "Number of tiles to trim", 0); + options.trim = readNumber(arg, "Number of tiles to trim", 0); if (*arg != '\0') { error("Tile trim ('-x') argument must be a valid number, not \"%s\"", musl_optarg); } diff --git a/src/gfx/pal_packing.cpp b/src/gfx/pal_packing.cpp index 0f5dbe74..b9749400 100644 --- a/src/gfx/pal_packing.cpp +++ b/src/gfx/pal_packing.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/src/link/lexer.cpp b/src/link/lexer.cpp index 5159ab7e..9f6fcf28 100644 --- a/src/link/lexer.cpp +++ b/src/link/lexer.cpp @@ -106,10 +106,6 @@ static yy::parser::symbol_type parseDecNumber(int c) { return yy::parser::make_number(number); } -static bool isBinDigit(int c) { - return c == '0' || c == '1'; -} - static yy::parser::symbol_type parseBinNumber(char const *prefix) { LexerStackEntry &context = lexerStack.back(); int c = context.file.sgetc(); @@ -167,7 +163,7 @@ static yy::parser::symbol_type parseHexNumber(char const *prefix) { return yy::parser::make_number(number); } -static yy::parser::symbol_type parseNumber(int c) { +static yy::parser::symbol_type parseAnyNumber(int c) { LexerStackEntry &context = lexerStack.back(); if (c == '0') { switch (context.file.sgetc()) { @@ -265,7 +261,7 @@ yy::parser::symbol_type yylex() { } else if (c == '&') { return parseOctNumber("'&'"); } else if (isDigit(c)) { - return parseNumber(c); + return parseAnyNumber(c); } else if (isLetter(c)) { std::string keyword = readKeyword(c); diff --git a/src/link/main.cpp b/src/link/main.cpp index c7cb0b75..a95ec15f 100644 --- a/src/link/main.cpp +++ b/src/link/main.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -265,21 +266,18 @@ static void parseScrambleSpec(char *spec) { uint16_t limit = search->second.second; if (regionSize) { - char *endptr; - unsigned long value = strtoul(regionSize, &endptr, 0); - - if (*endptr != '\0') { + char const *ptr = regionSize + skipBlankSpace(regionSize); + if (std::optional value = parseWholeNumber(ptr); !value) { fatal("Invalid region size limit \"%s\" for option '-S'", regionSize); - } - if (value > limit) { + } else if (*value > limit) { fatal( "%s region size for option '-S' must be between 0 and %" PRIu16, search->first.c_str(), limit ); + } else { + limit = *value; } - - limit = value; } else if (search->second.first != &options.scrambleWRAMX) { // Only WRAMX limit can be implied, since ROMX and SRAM size may vary. fatal("Missing %s region size limit for option '-S'", search->first.c_str()); @@ -353,21 +351,16 @@ int main(int argc, char *argv[]) { options.outputFileName = musl_optarg; break; - case 'p': { - char *endptr; - unsigned long value = strtoul(musl_optarg, &endptr, 0); - - if (musl_optarg[0] == '\0' || *endptr != '\0') { + case 'p': + if (std::optional value = parseWholeNumber(musl_optarg); !value) { fatal("Invalid argument for option '-p'"); - } - if (value > 0xFF) { + } else if (*value > 0xFF) { fatal("Argument for option '-p' must be between 0 and 0xFF"); + } else { + options.padValue = *value; + options.hasPadValue = true; } - - options.padValue = value; - options.hasPadValue = true; break; - } case 'S': parseScrambleSpec(musl_optarg); diff --git a/src/link/sdas_obj.cpp b/src/link/sdas_obj.cpp index 8d4ab11d..78e0835c 100644 --- a/src/link/sdas_obj.cpp +++ b/src/link/sdas_obj.cpp @@ -2,9 +2,9 @@ #include "link/sdas_obj.hpp" -#include #include #include +#include #include #include #include @@ -15,18 +15,13 @@ #include "helpers.hpp" // assume, literal_strlen #include "linkdefs.hpp" #include "platform.hpp" +#include "util.hpp" // parseWholeNumber #include "link/fstack.hpp" #include "link/section.hpp" #include "link/symbol.hpp" #include "link/warning.hpp" -enum NumberType { - HEX = 16, // X - DEC = 10, // D - OCT = 8, // Q -}; - struct Location { FileStackNode const *src; uint32_t lineNo; @@ -84,36 +79,26 @@ static int nextLine(std::vector &lineBuf, Location &where, FILE *file) { } } -static uint32_t readNumber(char const *str, char const *&endptr, NumberType base) { - for (uint32_t res = 0;;) { - static char const *digits = "0123456789ABCDEF"; - char const *ptr = strchr(digits, toupper(*str)); +static uint64_t readNumber(Location const &where, char const *str, NumberBase base) { + std::optional res = parseWholeNumber(str, base); - if (!ptr || ptr - digits >= base) { - endptr = str; - return res; - } - ++str; - res = res * base + (ptr - digits); - } -} - -static uint32_t parseNumber(Location const &where, char const *str, NumberType base) { - if (str[0] == '\0') { - fatalAt(where, "Expected number, got empty string"); - } - - char const *endptr; - uint32_t res = readNumber(str, endptr, base); - - if (*endptr != '\0') { + if (!res) { fatalAt(where, "Expected number, got \"%s\"", str); } - return res; + return *res; } -static uint8_t parseByte(Location const &where, char const *str, NumberType base) { - uint32_t num = parseNumber(where, str, base); +static uint32_t readInt(Location const &where, char const *str, NumberBase base) { + uint64_t num = readNumber(where, str, base); + + if (num > UINT32_MAX) { + fatalAt(where, "\"%s\" is not an int", str); + } + return num; +} + +static uint8_t readByte(Location const &where, char const *str, NumberBase base) { + uint64_t num = readNumber(where, str, base); if (num > UINT8_MAX) { fatalAt(where, "\"%s\" is not a byte", str); @@ -184,18 +169,18 @@ void sdobj_ReadFile(FileStackNode const &src, FILE *file, std::vector &f int lineType = nextLine(line, where, file); // The first letter (thus, the line type) identifies the integer type - NumberType numberType; + NumberBase numberBase; switch (lineType) { case EOF: fatalAt(where, "SDCC object only contains comments and empty lines"); case 'X': - numberType = HEX; + numberBase = BASE_16; break; case 'D': - numberType = DEC; + numberBase = BASE_10; break; case 'Q': - numberType = OCT; + numberBase = BASE_8; break; default: fatalAt( @@ -239,12 +224,12 @@ void sdobj_ReadFile(FileStackNode const &src, FILE *file, std::vector &f // Expected format: "A areas S global symbols" getToken(line.data(), "Empty 'H' line"); - uint32_t expectedNbAreas = parseNumber(where, token, numberType); + uint32_t expectedNbAreas = readInt(where, token, numberBase); expectToken("areas", 'H'); getToken(nullptr, "'H' line is too short"); - uint32_t expectedNbSymbols = parseNumber(where, token, numberType); + uint32_t expectedNbSymbols = readInt(where, token, numberBase); fileSymbols.reserve(expectedNbSymbols); expectToken("global", 'H'); @@ -296,7 +281,7 @@ void sdobj_ReadFile(FileStackNode const &src, FILE *file, std::vector &f getToken(nullptr, "'A' line is too short"); - uint32_t tmp = parseNumber(where, token, numberType); + uint32_t tmp = readInt(where, token, numberBase); if (tmp > UINT16_MAX) { fatalAt( @@ -310,7 +295,7 @@ void sdobj_ReadFile(FileStackNode const &src, FILE *file, std::vector &f expectToken("flags", 'A'); getToken(nullptr, "'A' line is too short"); - tmp = parseNumber(where, token, numberType); + tmp = readInt(where, token, numberBase); if (tmp & (1 << AREA_PAGING)) { fatalAt(where, "Paging is not supported"); } @@ -329,7 +314,7 @@ void sdobj_ReadFile(FileStackNode const &src, FILE *file, std::vector &f expectToken("addr", 'A'); getToken(nullptr, "'A' line is too short"); - tmp = parseNumber(where, token, numberType); + tmp = readInt(where, token, numberBase); curSection->org = tmp; // Truncation keeps the address portion only curSection->bank = tmp >> 16; @@ -386,7 +371,7 @@ void sdobj_ReadFile(FileStackNode const &src, FILE *file, std::vector &f getToken(nullptr, "'S' line is too short"); - if (int32_t value = parseNumber(where, &token[3], numberType); !fileSections.empty()) { + if (int32_t value = readInt(where, &token[3], numberBase); !fileSections.empty()) { // Symbols in sections are labels; their value is an offset Section *section = fileSections.back().section.get(); if (section->isAddressFixed) { @@ -465,7 +450,7 @@ void sdobj_ReadFile(FileStackNode const &src, FILE *file, std::vector &f data.clear(); for (token = strtok(line.data(), delim); token; token = strtok(nullptr, delim)) { - data.push_back(parseByte(where, token, numberType)); + data.push_back(readByte(where, token, numberBase)); } if (data.size() < addrSize) { @@ -487,9 +472,9 @@ void sdobj_ReadFile(FileStackNode const &src, FILE *file, std::vector &f uint16_t areaIdx; getToken(nullptr, "'R' line is too short"); - areaIdx = parseByte(where, token, numberType); + areaIdx = readByte(where, token, numberBase); getToken(nullptr, "'R' line is too short"); - areaIdx |= static_cast(parseByte(where, token, numberType)) << 8; + areaIdx |= static_cast(readByte(where, token, numberBase)) << 8; if (areaIdx >= fileSections.size()) { fatalAt( where, @@ -549,16 +534,16 @@ void sdobj_ReadFile(FileStackNode const &src, FILE *file, std::vector &f // appropriate RPN expression (depending on flags), plus an addition for the // bytes being patched over. while ((token = strtok(nullptr, delim)) != nullptr) { - uint16_t flags = parseByte(where, token, numberType); + uint16_t flags = readByte(where, token, numberBase); if ((flags & 0xF0) == 0xF0) { getToken(nullptr, "Incomplete relocation"); flags = (flags & 0x0F) - | static_cast(parseByte(where, token, numberType)) << 4; + | static_cast(readByte(where, token, numberBase)) << 4; } getToken(nullptr, "Incomplete relocation"); - uint8_t offset = parseByte(where, token, numberType); + uint8_t offset = readByte(where, token, numberBase); if (offset < addrSize) { fatalAt( @@ -578,10 +563,10 @@ void sdobj_ReadFile(FileStackNode const &src, FILE *file, std::vector &f } getToken(nullptr, "Incomplete relocation"); - uint16_t idx = parseByte(where, token, numberType); + uint16_t idx = readByte(where, token, numberBase); getToken(nullptr, "Incomplete relocation"); - idx |= static_cast(parseByte(where, token, numberType)); + idx |= static_cast(readByte(where, token, numberBase)); // Loudly fail on unknown flags if (flags & (1 << RELOC_ZPAGE | 1 << RELOC_NPAGE)) { diff --git a/src/util.cpp b/src/util.cpp index c24d3249..a3bac118 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -2,8 +2,11 @@ #include "util.hpp" +#include +#include #include #include +#include // strspn #include "helpers.hpp" // assume @@ -31,6 +34,10 @@ bool isDigit(int c) { return c >= '0' && c <= '9'; } +bool isBinDigit(int c) { + return c == '0' || c == '1'; +} + bool isOctDigit(int c) { return c >= '0' && c <= '7'; } @@ -64,6 +71,97 @@ uint8_t parseHexDigit(int c) { } } +// Parses a number from a string, moving the pointer to skip the parsed characters. +std::optional parseNumber(char const *&str, NumberBase base) { + // Identify the base if not specified + // Does *not* support '+' or '-' sign prefix (unlike `strtoul` and `std::from_chars`) + if (base == BASE_AUTO) { + // Skips leading blank space (like `strtoul`) + str += strspn(str, " \t"); + + // Supports traditional ("0b", "0o", "0x") and RGBASM ('%', '&', '$') base prefixes + switch (str[0]) { + case '%': + base = BASE_2; + ++str; + break; + case '&': + base = BASE_8; + ++str; + break; + case '$': + base = BASE_16; + ++str; + break; + case '0': + switch (str[1]) { + case 'B': + case 'b': + base = BASE_2; + str += 2; + break; + case 'O': + case 'o': + base = BASE_8; + str += 2; + break; + case 'X': + case 'x': + base = BASE_16; + str += 2; + break; + default: + base = BASE_10; + break; + } + break; + default: + base = BASE_10; + break; + } + } + assume(base != BASE_AUTO); + + // Get the digit-condition function corresponding to the base + bool (*canParseDigit)(int c) = base == BASE_2 ? isBinDigit + : base == BASE_8 ? isOctDigit + : base == BASE_10 ? isDigit + : base == BASE_16 ? isHexDigit + : nullptr; // LCOV_EXCL_LINE + assume(canParseDigit != nullptr); + + char const * const startDigits = str; + + // Parse the number one digit at a time + // Does *not* support '_' digit separators + uint64_t result = 0; + for (; canParseDigit(str[0]); ++str) { + uint8_t digit = parseHexDigit(str[0]); + if (result > (UINT64_MAX - digit) / base) { + // Skip remaining digits and set errno = ERANGE on overflow + while (canParseDigit(str[0])) { + ++str; + } + result = UINT64_MAX; + errno = ERANGE; + break; + } + result = result * base + digit; + } + + // Return the parsed number if there were any digit characters + if (str - startDigits == 0) { + return std::nullopt; + } + return result; +} + +// Parses a number from an entire string, returning nothing if there are more unparsed characters. +std::optional parseWholeNumber(char const *str, NumberBase base) { + std::optional result = parseNumber(str, base); + return str[0] == '\0' ? result : std::nullopt; +} + char const *printChar(int c) { // "'A'" + '\0': 4 bytes // "'\\n'" + '\0': 5 bytes diff --git a/test/gfx/warn_group_output.png b/test/gfx/warn_group_output.png new file mode 100644 index 00000000..3233c135 Binary files /dev/null and b/test/gfx/warn_group_output.png differ