Simplify format specs to not use a per-character state machine

This commit is contained in:
Rangi42
2025-08-30 12:23:01 -04:00
parent 531278961f
commit 0ccdbf509a
6 changed files with 90 additions and 151 deletions

View File

@@ -7,19 +7,7 @@
#include <stdint.h> #include <stdint.h>
#include <string> #include <string>
enum FormatState {
FORMAT_SIGN, // expects '+' or ' ' (optional)
FORMAT_EXACT, // expects '#' (optional)
FORMAT_ALIGN, // expects '-' (optional)
FORMAT_WIDTH, // expects '0'-'9', max 255 (optional) (leading '0' indicates pad)
FORMAT_FRAC, // got '.', expects '0'-'9', max 255 (optional)
FORMAT_PREC, // got 'q', expects '0'-'9', range 1-31 (optional)
FORMAT_DONE, // got [duXxbofs] (required)
FORMAT_INVALID, // got unexpected character
};
class FormatSpec { class FormatSpec {
FormatState state;
int sign; int sign;
bool exact; bool exact;
bool alignLeft; bool alignLeft;
@@ -30,15 +18,13 @@ class FormatSpec {
bool hasPrec; bool hasPrec;
size_t precision; size_t precision;
int type; int type;
bool valid; bool parsed;
public: public:
bool isEmpty() const { return !state; } bool isValid() const { return !!type; }
bool isValid() const { return valid || state == FORMAT_DONE; } bool isParsed() const { return parsed; }
bool isFinished() const { return state >= FORMAT_DONE; }
void useCharacter(int c); size_t parseSpec(char const *spec);
void finishCharacters();
void appendString(std::string &str, std::string const &value) const; void appendString(std::string &str, std::string const &value) const;
void appendNumber(std::string &str, uint32_t value) const; void appendNumber(std::string &str, uint32_t value) const;

View File

@@ -551,40 +551,30 @@ std::string act_StringFormat(
std::string str; std::string str;
size_t argIndex = 0; size_t argIndex = 0;
for (size_t i = 0; spec[i] != '\0'; ++i) { for (size_t i = 0; spec[i] != '\0';) {
int c = spec[i]; if (int c = spec[i]; c != '%') {
if (c != '%') {
str += c; str += c;
++i;
continue; continue;
} }
c = spec[++i]; if (int c = spec[++i]; c == '%') {
if (c == '%') {
str += c; str += c;
++i;
continue; continue;
} } else if (c == '\0') {
FormatSpec fmt{};
while (c != '\0') {
fmt.useCharacter(c);
if (fmt.isFinished()) {
break;
}
c = spec[++i];
}
if (fmt.isEmpty()) {
error("STRFMT: Illegal '%%' at end of format string"); error("STRFMT: Illegal '%%' at end of format string");
str += '%'; str += '%';
break; break;
} }
FormatSpec fmt{};
size_t n = fmt.parseSpec(spec.c_str() + i);
i += n;
if (!fmt.isValid()) { if (!fmt.isValid()) {
error("STRFMT: Invalid format spec for argument %zu", argIndex + 1); error("STRFMT: Invalid format spec for argument %zu", argIndex + 1);
str += '%'; str += spec.substr(i - n - 1, n + 1); // include the '%'
} else if (argIndex >= args.size()) { } else if (argIndex >= args.size()) {
// Will warn after formatting is done. // Will warn after formatting is done.
str += '%'; str += '%';

View File

@@ -11,91 +11,70 @@
#include <string.h> #include <string.h>
#include <string> #include <string>
#include "util.hpp" // isDigit
#include "asm/main.hpp" // options #include "asm/main.hpp" // options
#include "asm/warning.hpp" #include "asm/warning.hpp"
void FormatSpec::useCharacter(int c) { static size_t parseNumber(char const *spec, size_t &value) {
if (state == FORMAT_INVALID) { size_t i = 0;
return;
value = 0;
for (; isDigit(spec[i]); ++i) {
value = value * 10 + (spec[i] - '0');
} }
switch (c) { return i;
// sign }
case ' ':
case '+': size_t FormatSpec::parseSpec(char const *spec) {
if (state > FORMAT_SIGN) { size_t i = 0;
break;
} // <sign>
state = FORMAT_EXACT; if (char c = spec[i]; c == ' ' || c == '+') {
++i;
sign = c; sign = c;
return; }
// exact // <exact>
case '#': if (spec[i] == '#') {
if (state > FORMAT_EXACT) { ++i;
break;
}
state = FORMAT_ALIGN;
exact = true; exact = true;
return; }
// align // <align>
case '-': if (spec[i] == '-') {
if (state > FORMAT_ALIGN) { ++i;
break;
}
state = FORMAT_WIDTH;
alignLeft = true; alignLeft = true;
return; }
// pad, width, and prec values // <pad>
case '0': if (spec[i] == '0') {
if (state < FORMAT_WIDTH) { ++i;
padZero = true; padZero = true;
} }
[[fallthrough]];
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
if (state < FORMAT_WIDTH) {
state = FORMAT_WIDTH;
width = c - '0';
} else if (state == FORMAT_WIDTH) {
width = width * 10 + (c - '0');
} else if (state == FORMAT_FRAC) {
fracWidth = fracWidth * 10 + (c - '0');
} else if (state == FORMAT_PREC) {
precision = precision * 10 + (c - '0');
} else {
break;
}
return;
// frac // <width>
case '.': if (isDigit(spec[i])) {
if (state >= FORMAT_FRAC) { i += parseNumber(&spec[i], width);
break; }
}
state = FORMAT_FRAC; // <frac>
if (spec[i] == '.') {
++i;
hasFrac = true; hasFrac = true;
return; i += parseNumber(&spec[i], fracWidth);
}
// prec // <prec>
case 'q': if (spec[i] == 'q') {
if (state >= FORMAT_PREC) { ++i;
break;
}
state = FORMAT_PREC;
hasPrec = true; hasPrec = true;
return; i += parseNumber(&spec[i], precision);
}
// type // <type>
switch (char c = spec[i]; c) {
case 'd': case 'd':
case 'u': case 'u':
case 'X': case 'X':
@@ -104,26 +83,13 @@ void FormatSpec::useCharacter(int c) {
case 'o': case 'o':
case 'f': case 'f':
case 's': case 's':
if (state >= FORMAT_DONE) { ++i;
break;
}
state = FORMAT_DONE;
valid = true;
type = c; type = c;
return;
default:
break; break;
} }
state = FORMAT_INVALID; parsed = true;
valid = false; return i;
}
void FormatSpec::finishCharacters() {
if (!isValid()) {
state = FORMAT_INVALID;
}
} }
static std::string escapeString(std::string const &str) { static std::string escapeString(std::string const &str) {
@@ -158,7 +124,7 @@ static std::string escapeString(std::string const &str) {
void FormatSpec::appendString(std::string &str, std::string const &value) const { void FormatSpec::appendString(std::string &str, std::string const &value) const {
int useType = type; int useType = type;
if (isEmpty()) { if (!useType) {
// No format was specified // No format was specified
useType = 's'; useType = 's';
} }
@@ -197,7 +163,7 @@ void FormatSpec::appendString(std::string &str, std::string const &value) const
void FormatSpec::appendNumber(std::string &str, uint32_t value) const { void FormatSpec::appendNumber(std::string &str, uint32_t value) const {
int useType = type; int useType = type;
bool useExact = exact; bool useExact = exact;
if (isEmpty()) { if (!useType) {
// No format was specified; default to uppercase $hex // No format was specified; default to uppercase $hex
useType = 'X'; useType = 'X';
useExact = true; useExact = true;

View File

@@ -1305,7 +1305,7 @@ static std::pair<Symbol const *, std::shared_ptr<std::string>> readInterpolation
fatal("Recursion limit (%zu) exceeded", options.maxRecursionDepth); fatal("Recursion limit (%zu) exceeded", options.maxRecursionDepth);
} }
std::string fmtBuf; std::string identifier;
FormatSpec fmt{}; FormatSpec fmt{};
for (;;) { for (;;) {
@@ -1322,40 +1322,37 @@ static std::pair<Symbol const *, std::shared_ptr<std::string>> readInterpolation
} else if (c == '}') { } else if (c == '}') {
shiftChar(); shiftChar();
break; break;
} else if (c == ':' && !fmt.isFinished()) { // Format spec, only once } else if (c == ':' && !fmt.isParsed()) { // Format spec, only once
shiftChar(); shiftChar();
for (char f : fmtBuf) { size_t n = fmt.parseSpec(identifier.c_str());
fmt.useCharacter(f); if (!fmt.isValid() || n != identifier.length()) {
error("Invalid format spec \"%s\"", identifier.c_str());
} }
fmt.finishCharacters(); identifier.clear(); // Now that format has been set, restart at beginning of string
if (!fmt.isValid()) {
error("Invalid format spec \"%s\"", fmtBuf.c_str());
}
fmtBuf.clear(); // Now that format has been set, restart at beginning of string
} else { } else {
shiftChar(); shiftChar();
fmtBuf += c; identifier += c;
} }
} }
if (fmtBuf.starts_with('#')) { if (identifier.starts_with('#')) {
// Skip a '#' raw symbol prefix, but after expanding any nested interpolations. // Skip a '#' raw symbol prefix, but after expanding any nested interpolations.
fmtBuf.erase(0, 1); identifier.erase(0, 1);
} else if (keywordDict.find(fmtBuf) != keywordDict.end()) { } else if (keywordDict.find(identifier) != keywordDict.end()) {
// Don't allow symbols that alias keywords without a '#' prefix. // Don't allow symbols that alias keywords without a '#' prefix.
error( error(
"Interpolated symbol `%s` is a reserved keyword; add a '#' prefix to use it as a raw " "Interpolated symbol `%s` is a reserved keyword; add a '#' prefix to use it as a raw "
"symbol", "symbol",
fmtBuf.c_str() identifier.c_str()
); );
return {nullptr, nullptr}; return {nullptr, nullptr};
} }
if (Symbol const *sym = sym_FindScopedValidSymbol(fmtBuf); !sym || !sym->isDefined()) { if (Symbol const *sym = sym_FindScopedValidSymbol(identifier); !sym || !sym->isDefined()) {
if (sym_IsPurgedScoped(fmtBuf)) { if (sym_IsPurgedScoped(identifier)) {
error("Interpolated symbol `%s` does not exist; it was purged", fmtBuf.c_str()); error("Interpolated symbol `%s` does not exist; it was purged", identifier.c_str());
} else { } else {
error("Interpolated symbol `%s` does not exist", fmtBuf.c_str()); error("Interpolated symbol `%s` does not exist", identifier.c_str());
} }
return {sym, nullptr}; return {sym, nullptr};
} else if (sym->type == SYM_EQUS) { } else if (sym->type == SYM_EQUS) {
@@ -1367,7 +1364,7 @@ static std::pair<Symbol const *, std::shared_ptr<std::string>> readInterpolation
fmt.appendNumber(*buf, sym->getConstantValue()); fmt.appendNumber(*buf, sym->getConstantValue());
return {sym, buf}; return {sym, buf};
} else { } else {
error("Interpolated symbol `%s` is not a numeric or string symbol", fmtBuf.c_str()); error("Interpolated symbol `%s` is not a numeric or string symbol", identifier.c_str());
return {sym, nullptr}; return {sym, nullptr};
} }
} }

View File

@@ -1,8 +1,8 @@
+42 %d +42 %++d
$2a %x $2a %##x
42 %4d 42 %--4d
42 %f 42 %..f
42.00000 %16f %16f 42.00000 %q.16f %qq16f
42 42 42 42
2a 2a 2a 2a
hello hello

View File

@@ -8,5 +8,5 @@ void are 0
3.141586304 ~ three 3.141586304 ~ three
1 -> 1.235 -> 1.23456 1 -> 1.235 -> 1.23456
1 eol % 1 eol %
invalid % spec invalid %w spec
one=1 two=% three=% one=1 two=% three=%