From 6b8d33529ea24f97a2785e054e7bd5f031e6ba2f Mon Sep 17 00:00:00 2001
From: Sylvie <35663410+Rangi42@users.noreply.github.com>
Date: Sun, 1 Sep 2024 12:54:26 -0400
Subject: [PATCH] Improve string/interpolation formatting (#1491)

- The '#' component for type 's' now escapes the string characters
- The '#' component for type 'f' now prints a precision suffix
- The new 'q' component specifies a precision value
---
 include/asm/format.hpp             |  7 ++-
 man/rgbasm.5                       | 25 ++++++--
 src/asm/format.cpp                 | 93 ++++++++++++++++++++++++------
 test/asm/fixed-point-precision.asm |  5 +-
 test/asm/fixed-point-precision.err |  6 +-
 test/asm/fixed-point-precision.out |  3 +-
 test/asm/fixed-point-specific.asm  |  5 +-
 test/asm/invalid-format.asm        |  3 +-
 test/asm/invalid-format.err        | 20 ++++---
 test/asm/invalid-format.out        |  1 +
 test/asm/string-formatting.asm     |  3 +-
 test/asm/string-formatting.err     |  3 -
 test/asm/string-formatting.out     |  2 +-
 13 files changed, 128 insertions(+), 48 deletions(-)
 delete mode 100644 test/asm/string-formatting.err
diff --git a/include/asm/format.hpp b/include/asm/format.hpp
index 537160d1..2431b923 100644
--- a/include/asm/format.hpp
+++ b/include/asm/format.hpp
@@ -9,10 +9,11 @@
 
 enum FormatState {
 	FORMAT_SIGN,    // expects '+' or ' ' (optional)
-	FORMAT_PREFIX,  // expects '#' (optional)
+	FORMAT_EXACT,   // expects '#' (optional)
 	FORMAT_ALIGN,   // expects '-' (optional)
 	FORMAT_WIDTH,   // expects '0'-'9', max 255 (optional) (leading '0' indicates pad)
 	FORMAT_FRAC,    // got '.', expects '0'-'9', max 255 (optional)
+	FORMAT_PREC,    // got 'q', expects '0'-'9', range 1-31 (optional)
 	FORMAT_DONE,    // got [duXxbofs] (required)
 	FORMAT_INVALID, // got unexpected character
 };
@@ -20,12 +21,14 @@ enum FormatState {
 class FormatSpec {
 	FormatState state;
 	int sign;
-	bool prefix;
+	bool exact;
 	bool alignLeft;
 	bool padZero;
 	size_t width;
 	bool hasFrac;
 	size_t fracWidth;
+	bool hasPrec;
+	size_t precision;
 	int type;
 	bool valid;
 
diff --git a/man/rgbasm.5 b/man/rgbasm.5
index ea808363..3699f14e 100644
--- a/man/rgbasm.5
+++ b/man/rgbasm.5
@@ -164,19 +164,24 @@ It's possible to change the way symbols are printed by specifying a print format
 The
 .Ql fmt
 specifier consists of these parts:
-.Ql <sign><prefix><align><pad><width><frac><type> .
+.Ql <sign><exact><align><pad><width><frac><prec><type> .
 These parts are:
-.Bl -column "<prefix>"
+.Bl -column "<exact>"
 .It Sy Part Ta Sy Meaning
 .It Ql <sign> Ta May be
 .Ql +
 or
 .Ql \  .
 If specified, prints this character in front of non-negative numbers.
-.It Ql <prefix> Ta May be
+.It Ql <exact> Ta May be
 .Ql # .
-If specified, prints a base prefix for non-decimal integer types
-.Pq So $ Sc , So & Sc , or So % Sc .
+If specified, prints the value in an "exact" format: with a base prefix for non-decimal integer types
+.Pq So $ Sc , So & Sc , or So % Sc ;
+with a
+.Ql q
+precision suffix for fixed-point numbers; or with
+.Ql \e
+escape characters for strings.
 .It Ql <align> Ta May be
 .Ql - .
 If specified, aligns left instead of right.
@@ -196,6 +201,16 @@ followed by one or more
 .Ql 9 .
 If specified, prints this many fractional digits of a fixed-point number.
 Defaults to 5 digits, maximum 255 digits.
+.It Ql <prec> Ta May be
+.Ql q
+followed by one or more
+.Ql 0
+\[en]
+.Ql 9 .
+If specified, prints a fixed-point number at this precision.
+Defaults to the current
+.Fl Q
+option.
 .It Ql <type> Ta Specifies the type of value.
 .El
 .Pp
diff --git a/src/asm/format.cpp b/src/asm/format.cpp
index 333b6d99..0bce7eb9 100644
--- a/src/asm/format.cpp
+++ b/src/asm/format.cpp
@@ -22,16 +22,16 @@ void FormatSpec::useCharacter(int c) {
 	case '+':
 		if (state > FORMAT_SIGN)
 			goto invalid;
-		state = FORMAT_PREFIX;
+		state = FORMAT_EXACT;
 		sign = c;
 		break;
 
-	// prefix
+	// exact
 	case '#':
-		if (state > FORMAT_PREFIX)
+		if (state > FORMAT_EXACT)
 			goto invalid;
 		state = FORMAT_ALIGN;
-		prefix = true;
+		exact = true;
 		break;
 
 	// align
@@ -42,7 +42,7 @@ void FormatSpec::useCharacter(int c) {
 		alignLeft = true;
 		break;
 
-	// pad and width
+	// pad, width, and prec values
 	case '0':
 		if (state < FORMAT_WIDTH)
 			padZero = true;
@@ -63,11 +63,14 @@ void FormatSpec::useCharacter(int c) {
 			width = width * 10 + (c - '0');
 		} else if (state == FORMAT_FRAC) {
 			fracWidth = fracWidth * 10 + (c - '0');
+		} else if (state == FORMAT_PREC) {
+			precision = precision * 10 + (c - '0');
 		} else {
 			goto invalid;
 		}
 		break;
 
+	// width
 	case '.':
 		if (state > FORMAT_WIDTH)
 			goto invalid;
@@ -75,6 +78,14 @@ void FormatSpec::useCharacter(int c) {
 		hasFrac = true;
 		break;
 
+	// prec
+	case 'q':
+		if (state > FORMAT_PREC)
+			goto invalid;
+		state = FORMAT_PREC;
+		hasPrec = true;
+		break;
+
 	// type
 	case 'd':
 	case 'u':
@@ -103,6 +114,36 @@ void FormatSpec::finishCharacters() {
 		state = FORMAT_INVALID;
 }
 
+static std::string escapeString(std::string const &str) {
+	std::string escaped;
+	for (char c : str) {
+		// Escape characters that need escaping
+		switch (c) {
+		case '\\':
+		case '"':
+		case '{':
+			escaped += '\\';
+			[[fallthrough]];
+		default:
+			escaped += c;
+			break;
+		case '\n':
+			escaped += "\\n";
+			break;
+		case '\r':
+			escaped += "\\r";
+			break;
+		case '\t':
+			escaped += "\\t";
+			break;
+		case '\0':
+			escaped += "\\0";
+			break;
+		}
+	}
+	return escaped;
+}
+
 void FormatSpec::appendString(std::string &str, std::string const &value) const {
 	int useType = type;
 	if (isEmpty()) {
@@ -112,42 +153,45 @@ void FormatSpec::appendString(std::string &str, std::string const &value) const
 
 	if (sign)
 		error("Formatting string with sign flag '%c'\n", sign);
-	if (prefix)
-		error("Formatting string with prefix flag '#'\n");
 	if (padZero)
 		error("Formatting string with padding flag '0'\n");
 	if (hasFrac)
 		error("Formatting string with fractional width\n");
+	if (hasPrec)
+		error("Formatting string with fractional precision\n");
 	if (useType != 's')
 		error("Formatting string as type '%c'\n", useType);
 
-	size_t valueLen = value.length();
+	std::string useValue = exact ? escapeString(value) : value;
+	size_t valueLen = useValue.length();
 	size_t totalLen = width > valueLen ? width : valueLen;
 	size_t padLen = totalLen - valueLen;
 
 	str.reserve(str.length() + totalLen);
 	if (alignLeft) {
-		str.append(value);
+		str.append(useValue);
 		str.append(padLen, ' ');
 	} else {
 		str.append(padLen, ' ');
-		str.append(value);
+		str.append(useValue);
 	}
 }
 
 void FormatSpec::appendNumber(std::string &str, uint32_t value) const {
 	int useType = type;
-	bool usePrefix = prefix;
+	bool useExact = exact;
 	if (isEmpty()) {
 		// No format was specified; default to uppercase $hex
 		useType = 'X';
-		usePrefix = true;
+		useExact = true;
 	}
 
-	if (useType != 'X' && useType != 'x' && useType != 'b' && useType != 'o' && usePrefix)
-		error("Formatting type '%c' with prefix flag '#'\n", useType);
+	if (useType != 'X' && useType != 'x' && useType != 'b' && useType != 'o' && useType != 'f' && useExact)
+		error("Formatting type '%c' with exact flag '#'\n", useType);
 	if (useType != 'f' && hasFrac)
 		error("Formatting type '%c' with fractional width\n", useType);
+	if (useType != 'f' && hasPrec)
+		error("Formatting type '%c' with fractional precision\n", useType);
 	if (useType == 's')
 		error("Formatting number as type 's'\n");
 
@@ -161,7 +205,7 @@ void FormatSpec::appendNumber(std::string &str, uint32_t value) const {
 		}
 	}
 
-	char prefixChar = !usePrefix       ? 0
+	char prefixChar = !useExact        ? 0
 	                  : useType == 'X' ? '$'
 	                  : useType == 'x' ? '$'
 	                  : useType == 'b' ? '%'
@@ -188,14 +232,27 @@ void FormatSpec::appendNumber(std::string &str, uint32_t value) const {
 
 		// Default fractional width (C++'s is 6 for "%f"; here 5 is enough for Q16.16)
 		size_t useFracWidth = hasFrac ? fracWidth : 5;
-
 		if (useFracWidth > 255) {
 			error("Fractional width %zu too long, limiting to 255\n", useFracWidth);
 			useFracWidth = 255;
 		}
 
-		double fval = fabs(value / fix_PrecisionFactor());
-		snprintf(valueBuf, sizeof(valueBuf), "%.*f", (int)useFracWidth, fval);
+		size_t defaultPrec = fix_Precision();
+		size_t usePrec = hasPrec ? precision : defaultPrec;
+		if (usePrec < 1 || usePrec > 31) {
+			error(
+			    "Fixed-point constant precision %zu invalid, defaulting to %zu\n",
+			    usePrec,
+			    defaultPrec
+			);
+			usePrec = defaultPrec;
+		}
+
+		double fval = fabs(value / pow(2.0, usePrec));
+		if (useExact)
+			snprintf(valueBuf, sizeof(valueBuf), "%.*fq%zu", (int)useFracWidth, fval, usePrec);
+		else
+			snprintf(valueBuf, sizeof(valueBuf), "%.*f", (int)useFracWidth, fval);
 	} else if (useType == 'd') {
 		// Decimal numbers may be formatted with a '-' sign by `snprintf`, so `abs` prevents that,
 		// with a special case for `INT32_MIN` since `labs(INT32_MIN)` is UB. The sign will be
diff --git a/test/asm/fixed-point-precision.asm b/test/asm/fixed-point-precision.asm
index f7b80923..4e694933 100644
--- a/test/asm/fixed-point-precision.asm
+++ b/test/asm/fixed-point-precision.asm
@@ -16,7 +16,8 @@ def fr = MUL(20.0, 0.32)
 def q8 = 1.25q8
 def q16 = 1.25Q16
 def q24 = 1.25q.24
-	println "Q8 ${x:q8} Q16 ${x:q16} Q24 ${x:q24}"
+	println "{#f:q8} {#f:q16} {#f:q24}"
+	println "Q8 {#x:q8} Q16 {#x:q16} Q24 {#x:q24}"
 
 def qerr = 1.25q32
-	println qerr
+	println "{q0f:qerr}"
diff --git a/test/asm/fixed-point-precision.err b/test/asm/fixed-point-precision.err
index e0234c6d..d18d7c94 100644
--- a/test/asm/fixed-point-precision.err
+++ b/test/asm/fixed-point-precision.err
@@ -1,3 +1,5 @@
-error: fixed-point-precision.asm(21):
+error: fixed-point-precision.asm(22):
     Fixed-point constant precision must be between 1 and 31
-error: Assembly aborted (1 error)!
+error: fixed-point-precision.asm(23):
+    Fixed-point constant precision 0 invalid, defaulting to 16
+error: Assembly aborted (2 errors)!
diff --git a/test/asm/fixed-point-precision.out b/test/asm/fixed-point-precision.out
index c17b2fd6..2881ec0d 100644
--- a/test/asm/fixed-point-precision.out
+++ b/test/asm/fixed-point-precision.out
@@ -4,5 +4,6 @@
 `16.12`: 16.119995 -> $00101eb8
 `6.283185`: 6.283188 -> $0006487f
 32% of 20 = 6.40015 (~6.40) (~~6)
+0.00488q16 1.25000q16 320.00000q16
 Q8 $140 Q16 $14000 Q24 $1400000
-$14000
+1.25000
diff --git a/test/asm/fixed-point-specific.asm b/test/asm/fixed-point-specific.asm
index a0ef810f..88b24bf9 100644
--- a/test/asm/fixed-point-specific.asm
+++ b/test/asm/fixed-point-specific.asm
@@ -7,10 +7,7 @@ MACRO compare
 		def v1 = \3(\4q\1, \5q\1, \1)
 		def v2 = \3(\4q\2, \5q\2, \2)
 	endc
-	opt Q\1
-	print "{.4f:v1} == "
-	opt Q\2
-	println "{.4f:v2}"
+	println "{.4q\1f:v1} == {.4q\2f:v2}"
 ENDM
 
 	compare  8, 16, mul, 6.0, 7.0
diff --git a/test/asm/invalid-format.asm b/test/asm/invalid-format.asm
index 6c5e2b5e..6477ea4e 100644
--- a/test/asm/invalid-format.asm
+++ b/test/asm/invalid-format.asm
@@ -2,15 +2,16 @@ println STRFMT("%+d %++d", 42, 42)
 println STRFMT("%#x %##x", 42, 42)
 println STRFMT("%-4d %--4d", 42, 42)
 println STRFMT("%.f %..f", 42.0, 42.0)
+println STRFMT("%qf %q.16f", 42.0, 42.0)
 
 DEF N = 42
 println "{5d:N} {5d5:N}"
 println "{x:N} {xx:N}"
 
 println STRFMT("%+s", "hello")
-println STRFMT("%#s", "hello")
 println STRFMT("%0s", "hello")
 println STRFMT("%.5s", "hello")
+println STRFMT("%q16s", "hello")
 
 println STRFMT("%#d", 42)
 println STRFMT("%.5d", 42)
diff --git a/test/asm/invalid-format.err b/test/asm/invalid-format.err
index 4433ca8d..5e7c3fcc 100644
--- a/test/asm/invalid-format.err
+++ b/test/asm/invalid-format.err
@@ -6,20 +6,24 @@ error: invalid-format.asm(3):
     STRFMT: Invalid format spec for argument 2
 error: invalid-format.asm(4):
     STRFMT: Invalid format spec for argument 2
-error: invalid-format.asm(7):
-    Invalid format spec '5d5'
+error: invalid-format.asm(5):
+    Fixed-point constant precision 0 invalid, defaulting to 16
+error: invalid-format.asm(5):
+    STRFMT: Invalid format spec for argument 2
 error: invalid-format.asm(8):
+    Invalid format spec '5d5'
+error: invalid-format.asm(9):
     Invalid format spec 'xx'
-error: invalid-format.asm(10):
-    Formatting string with sign flag '+'
 error: invalid-format.asm(11):
-    Formatting string with prefix flag '#'
+    Formatting string with sign flag '+'
 error: invalid-format.asm(12):
     Formatting string with padding flag '0'
 error: invalid-format.asm(13):
     Formatting string with fractional width
-error: invalid-format.asm(15):
-    Formatting type 'd' with prefix flag '#'
+error: invalid-format.asm(14):
+    Formatting string with fractional precision
 error: invalid-format.asm(16):
+    Formatting type 'd' with exact flag '#'
+error: invalid-format.asm(17):
     Formatting type 'd' with fractional width
-error: Assembly aborted (12 errors)!
+error: Assembly aborted (14 errors)!
diff --git a/test/asm/invalid-format.out b/test/asm/invalid-format.out
index 129cbbbb..db1830ee 100644
--- a/test/asm/invalid-format.out
+++ b/test/asm/invalid-format.out
@@ -2,6 +2,7 @@
 $2a %x
 42   %4d
 42 %f
+42.00000 %16f
    42    42
 2a 2a
 hello
diff --git a/test/asm/string-formatting.asm b/test/asm/string-formatting.asm
index 12bdd1aa..fc892b49 100644
--- a/test/asm/string-formatting.asm
+++ b/test/asm/string-formatting.asm
@@ -3,11 +3,12 @@ def m equ -42
 def f equ -123.0456
 def pi equ 3.14159
 def s equs "hello"
+def t equs "\"\\t\" is '\t'"
 
 	println "<{ -6d:n}> <{+06u:n}> <{5x:n}> <{#16b:n}>"
 	println "<{u:m}> <{+3d:m}> <{#016o:m}>"
 	println "<{f:pi}> <{06.f:f}> <{.10f:f}>"
-	println "<{#-10s:s}> <{10s:s}>"
+	println "\"{#-20s:t}\", \"{#20s:t}\", \"{20s:t}\""
 
 macro foo
 	println "\1 <{\1}>"
diff --git a/test/asm/string-formatting.err b/test/asm/string-formatting.err
deleted file mode 100644
index 033fd367..00000000
--- a/test/asm/string-formatting.err
+++ /dev/null
@@ -1,3 +0,0 @@
-error: string-formatting.asm(10):
-    Formatting string with prefix flag '#'
-error: Assembly aborted (1 error)!
diff --git a/test/asm/string-formatting.out b/test/asm/string-formatting.out
index 4d7d175b..d6f26ae1 100644
--- a/test/asm/string-formatting.out
+++ b/test/asm/string-formatting.out
@@ -1,5 +1,5 @@
 < 300  > <+00300> <  12c> <      %100101100>
 <4294967254> <-42> <&000037777777726>
 <3.14159> <-00123> <-123.0455932617>
-<hello     > <     hello>
+"\"\\t\" is '\t'     ", "     \"\\t\" is '\t'", "         "\t" is '	'"
 -6d:n <300   >