Simplify format specs to not use a per-character state machine

This commit is contained in:
Rangi42
2025-08-30 12:23:01 -04:00
parent 531278961f
commit 0ccdbf509a
6 changed files with 90 additions and 151 deletions

View File

@@ -7,19 +7,7 @@
#include <stdint.h>
#include <string>
enum FormatState {
FORMAT_SIGN, // expects '+' or ' ' (optional)
FORMAT_EXACT, // expects '#' (optional)
FORMAT_ALIGN, // expects '-' (optional)
FORMAT_WIDTH, // expects '0'-'9', max 255 (optional) (leading '0' indicates pad)
FORMAT_FRAC, // got '.', expects '0'-'9', max 255 (optional)
FORMAT_PREC, // got 'q', expects '0'-'9', range 1-31 (optional)
FORMAT_DONE, // got [duXxbofs] (required)
FORMAT_INVALID, // got unexpected character
};
class FormatSpec {
FormatState state;
int sign;
bool exact;
bool alignLeft;
@@ -30,15 +18,13 @@ class FormatSpec {
bool hasPrec;
size_t precision;
int type;
bool valid;
bool parsed;
public:
bool isEmpty() const { return !state; }
bool isValid() const { return valid || state == FORMAT_DONE; }
bool isFinished() const { return state >= FORMAT_DONE; }
bool isValid() const { return !!type; }
bool isParsed() const { return parsed; }
void useCharacter(int c);
void finishCharacters();
size_t parseSpec(char const *spec);
void appendString(std::string &str, std::string const &value) const;
void appendNumber(std::string &str, uint32_t value) const;

View File

@@ -551,40 +551,30 @@ std::string act_StringFormat(
std::string str;
size_t argIndex = 0;
for (size_t i = 0; spec[i] != '\0'; ++i) {
int c = spec[i];
if (c != '%') {
for (size_t i = 0; spec[i] != '\0';) {
if (int c = spec[i]; c != '%') {
str += c;
++i;
continue;
}
c = spec[++i];
if (c == '%') {
if (int c = spec[++i]; c == '%') {
str += c;
++i;
continue;
}
FormatSpec fmt{};
while (c != '\0') {
fmt.useCharacter(c);
if (fmt.isFinished()) {
break;
}
c = spec[++i];
}
if (fmt.isEmpty()) {
} else if (c == '\0') {
error("STRFMT: Illegal '%%' at end of format string");
str += '%';
break;
}
FormatSpec fmt{};
size_t n = fmt.parseSpec(spec.c_str() + i);
i += n;
if (!fmt.isValid()) {
error("STRFMT: Invalid format spec for argument %zu", argIndex + 1);
str += '%';
str += spec.substr(i - n - 1, n + 1); // include the '%'
} else if (argIndex >= args.size()) {
// Will warn after formatting is done.
str += '%';

View File

@@ -11,91 +11,70 @@
#include <string.h>
#include <string>
#include "util.hpp" // isDigit
#include "asm/main.hpp" // options
#include "asm/warning.hpp"
void FormatSpec::useCharacter(int c) {
if (state == FORMAT_INVALID) {
return;
static size_t parseNumber(char const *spec, size_t &value) {
size_t i = 0;
value = 0;
for (; isDigit(spec[i]); ++i) {
value = value * 10 + (spec[i] - '0');
}
switch (c) {
// sign
case ' ':
case '+':
if (state > FORMAT_SIGN) {
break;
}
state = FORMAT_EXACT;
return i;
}
size_t FormatSpec::parseSpec(char const *spec) {
size_t i = 0;
// <sign>
if (char c = spec[i]; c == ' ' || c == '+') {
++i;
sign = c;
return;
}
// exact
case '#':
if (state > FORMAT_EXACT) {
break;
}
state = FORMAT_ALIGN;
// <exact>
if (spec[i] == '#') {
++i;
exact = true;
return;
}
// align
case '-':
if (state > FORMAT_ALIGN) {
break;
}
state = FORMAT_WIDTH;
// <align>
if (spec[i] == '-') {
++i;
alignLeft = true;
return;
}
// pad, width, and prec values
case '0':
if (state < FORMAT_WIDTH) {
padZero = true;
}
[[fallthrough]];
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
if (state < FORMAT_WIDTH) {
state = FORMAT_WIDTH;
width = c - '0';
} else if (state == FORMAT_WIDTH) {
width = width * 10 + (c - '0');
} else if (state == FORMAT_FRAC) {
fracWidth = fracWidth * 10 + (c - '0');
} else if (state == FORMAT_PREC) {
precision = precision * 10 + (c - '0');
} else {
break;
}
return;
// <pad>
if (spec[i] == '0') {
++i;
padZero = true;
}
// frac
case '.':
if (state >= FORMAT_FRAC) {
break;
}
state = FORMAT_FRAC;
// <width>
if (isDigit(spec[i])) {
i += parseNumber(&spec[i], width);
}
// <frac>
if (spec[i] == '.') {
++i;
hasFrac = true;
return;
i += parseNumber(&spec[i], fracWidth);
}
// prec
case 'q':
if (state >= FORMAT_PREC) {
break;
}
state = FORMAT_PREC;
// <prec>
if (spec[i] == 'q') {
++i;
hasPrec = true;
return;
i += parseNumber(&spec[i], precision);
}
// type
// <type>
switch (char c = spec[i]; c) {
case 'd':
case 'u':
case 'X':
@@ -104,26 +83,13 @@ void FormatSpec::useCharacter(int c) {
case 'o':
case 'f':
case 's':
if (state >= FORMAT_DONE) {
break;
}
state = FORMAT_DONE;
valid = true;
++i;
type = c;
return;
default:
break;
}
state = FORMAT_INVALID;
valid = false;
}
void FormatSpec::finishCharacters() {
if (!isValid()) {
state = FORMAT_INVALID;
}
parsed = true;
return i;
}
static std::string escapeString(std::string const &str) {
@@ -158,7 +124,7 @@ static std::string escapeString(std::string const &str) {
void FormatSpec::appendString(std::string &str, std::string const &value) const {
int useType = type;
if (isEmpty()) {
if (!useType) {
// No format was specified
useType = 's';
}
@@ -197,7 +163,7 @@ void FormatSpec::appendString(std::string &str, std::string const &value) const
void FormatSpec::appendNumber(std::string &str, uint32_t value) const {
int useType = type;
bool useExact = exact;
if (isEmpty()) {
if (!useType) {
// No format was specified; default to uppercase $hex
useType = 'X';
useExact = true;

View File

@@ -1305,7 +1305,7 @@ static std::pair<Symbol const *, std::shared_ptr<std::string>> readInterpolation
fatal("Recursion limit (%zu) exceeded", options.maxRecursionDepth);
}
std::string fmtBuf;
std::string identifier;
FormatSpec fmt{};
for (;;) {
@@ -1322,40 +1322,37 @@ static std::pair<Symbol const *, std::shared_ptr<std::string>> readInterpolation
} else if (c == '}') {
shiftChar();
break;
} else if (c == ':' && !fmt.isFinished()) { // Format spec, only once
} else if (c == ':' && !fmt.isParsed()) { // Format spec, only once
shiftChar();
for (char f : fmtBuf) {
fmt.useCharacter(f);
size_t n = fmt.parseSpec(identifier.c_str());
if (!fmt.isValid() || n != identifier.length()) {
error("Invalid format spec \"%s\"", identifier.c_str());
}
fmt.finishCharacters();
if (!fmt.isValid()) {
error("Invalid format spec \"%s\"", fmtBuf.c_str());
}
fmtBuf.clear(); // Now that format has been set, restart at beginning of string
identifier.clear(); // Now that format has been set, restart at beginning of string
} else {
shiftChar();
fmtBuf += c;
identifier += c;
}
}
if (fmtBuf.starts_with('#')) {
if (identifier.starts_with('#')) {
// Skip a '#' raw symbol prefix, but after expanding any nested interpolations.
fmtBuf.erase(0, 1);
} else if (keywordDict.find(fmtBuf) != keywordDict.end()) {
identifier.erase(0, 1);
} else if (keywordDict.find(identifier) != keywordDict.end()) {
// Don't allow symbols that alias keywords without a '#' prefix.
error(
"Interpolated symbol `%s` is a reserved keyword; add a '#' prefix to use it as a raw "
"symbol",
fmtBuf.c_str()
identifier.c_str()
);
return {nullptr, nullptr};
}
if (Symbol const *sym = sym_FindScopedValidSymbol(fmtBuf); !sym || !sym->isDefined()) {
if (sym_IsPurgedScoped(fmtBuf)) {
error("Interpolated symbol `%s` does not exist; it was purged", fmtBuf.c_str());
if (Symbol const *sym = sym_FindScopedValidSymbol(identifier); !sym || !sym->isDefined()) {
if (sym_IsPurgedScoped(identifier)) {
error("Interpolated symbol `%s` does not exist; it was purged", identifier.c_str());
} else {
error("Interpolated symbol `%s` does not exist", fmtBuf.c_str());
error("Interpolated symbol `%s` does not exist", identifier.c_str());
}
return {sym, nullptr};
} else if (sym->type == SYM_EQUS) {
@@ -1367,7 +1364,7 @@ static std::pair<Symbol const *, std::shared_ptr<std::string>> readInterpolation
fmt.appendNumber(*buf, sym->getConstantValue());
return {sym, buf};
} else {
error("Interpolated symbol `%s` is not a numeric or string symbol", fmtBuf.c_str());
error("Interpolated symbol `%s` is not a numeric or string symbol", identifier.c_str());
return {sym, nullptr};
}
}

View File

@@ -1,8 +1,8 @@
+42 %d
$2a %x
42 %4d
42 %f
42.00000 %16f %16f
+42 %++d
$2a %##x
42 %--4d
42 %..f
42.00000 %q.16f %qq16f
42 42
2a 2a
hello

View File

@@ -8,5 +8,5 @@ void are 0
3.141586304 ~ three
1 -> 1.235 -> 1.23456
1 eol %
invalid % spec
invalid %w spec
one=1 two=% three=%