From c0ce1da4c34f01493bbc5c4fd0735eb0cc2c4bfe Mon Sep 17 00:00:00 2001 From: Rangi <35663410+Rangi42@users.noreply.github.com> Date: Tue, 29 Dec 2020 16:53:15 -0500 Subject: [PATCH] Implement `STRFMT` and more printf-like format specifiers for string interpolation (#646) Fixes #570 Fixes #178 Use errors for inapplicable format spec flags instead of -Wstring-format --- Makefile | 1 + include/asm/format.h | 63 +++++++ include/asm/warning.h | 2 +- src/CMakeLists.txt | 1 + src/asm/format.c | 296 +++++++++++++++++++++++++++++++++ src/asm/lexer.c | 108 ++++-------- src/asm/main.c | 1 + src/asm/parser.y | 146 ++++++++++++++++ src/asm/rgbasm.5 | 71 +++++++- test/asm/bracketed-symbols.err | 2 +- test/asm/strfmt.asm | 24 +++ test/asm/strfmt.err | 11 ++ test/asm/strfmt.out | 10 ++ test/asm/string-formatting.asm | 15 ++ test/asm/string-formatting.err | 3 + test/asm/string-formatting.out | 5 + 16 files changed, 675 insertions(+), 84 deletions(-) create mode 100644 include/asm/format.h create mode 100644 src/asm/format.c create mode 100644 test/asm/strfmt.asm create mode 100644 test/asm/strfmt.err create mode 100644 test/asm/strfmt.out create mode 100644 test/asm/string-formatting.asm create mode 100644 test/asm/string-formatting.err create mode 100644 test/asm/string-formatting.out diff --git a/Makefile b/Makefile index f405c874..283118d3 100644 --- a/Makefile +++ b/Makefile @@ -54,6 +54,7 @@ all: rgbasm rgblink rgbfix rgbgfx rgbasm_obj := \ src/asm/charmap.o \ + src/asm/format.o \ src/asm/fstack.o \ src/asm/lexer.o \ src/asm/macro.o \ diff --git a/include/asm/format.h b/include/asm/format.h new file mode 100644 index 00000000..15b2fb48 --- /dev/null +++ b/include/asm/format.h @@ -0,0 +1,63 @@ +/* + * This file is part of RGBDS. + * + * Copyright (c) 2020, RGBDS contributors. + * + * SPDX-License-Identifier: MIT + */ + +#ifndef RGBDS_FORMAT_SPEC_H +#define RGBDS_FORMAT_SPEC_H + +#include +#include + +enum FormatState { + FORMAT_SIGN, // expects '+' or ' ' (optional) + FORMAT_PREFIX, // expects '#' (optional) + FORMAT_ALIGN, // expects '-' (optional) + FORMAT_WIDTH, // expects '0'-'9', max 255 (optional) (leading '0' indicates pad) + FORMAT_FRAC, // got '.', expects '0'-'9', max 255 (optional) + FORMAT_DONE, // got [duXxbofs] (required) + FORMAT_INVALID, // got unexpected character +}; + +struct FormatSpec { + enum FormatState state; + int sign; + bool prefix; + bool alignLeft; + bool padZero; + uint8_t width; + bool hasFrac; + uint8_t fracWidth; + int type; + bool valid; +}; + +struct StrFmtArg { + union { + uint32_t number; + char *string; + }; + bool isNumeric; +}; + +#define INITIAL_STRFMT_ARG_SIZE 4 +struct StrFmtArgList { + char *format; + size_t nbArgs; + size_t capacity; + struct StrFmtArg *args; +}; + +struct FormatSpec fmt_NewSpec(void); +bool fmt_IsEmpty(struct FormatSpec const *fmt); +bool fmt_IsValid(struct FormatSpec const *fmt); +bool fmt_IsFinished(struct FormatSpec const *fmt); +void fmt_UseCharacter(struct FormatSpec *fmt, int c); +void fmt_FinishCharacters(struct FormatSpec *fmt); +void fmt_PrintString(char *buf, size_t bufLen, struct FormatSpec const *fmt, char const *value); +void fmt_PrintNumber(char *buf, size_t bufLen, struct FormatSpec const *fmt, uint32_t value); + +#endif /* RGBDS_FORMAT_SPEC_H */ diff --git a/include/asm/warning.h b/include/asm/warning.h index c84394c7..bae72e23 100644 --- a/include/asm/warning.h +++ b/include/asm/warning.h @@ -22,7 +22,7 @@ enum WarningID { WARNING_EMPTY_ENTRY, /* Empty entry in `db`, `dw` or `dl` */ WARNING_LARGE_CONSTANT, /* Constants too large */ WARNING_LONG_STR, /* String too long for internal buffers */ - WARNING_NESTED_COMMENT, /* Comment-start delimeter in a block comment */ + WARNING_NESTED_COMMENT, /* Comment-start delimiter in a block comment */ WARNING_OBSOLETE, /* Obsolete things */ WARNING_SHIFT, /* Shifting undefined behavior */ WARNING_SHIFT_AMOUNT, /* Strange shift amount */ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index bdc9bfa0..95566bf4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -35,6 +35,7 @@ BISON_TARGET(PARSER "asm/parser.y" set(rgbasm_src "${BISON_PARSER_OUTPUT_SOURCE}" "asm/charmap.c" + "asm/format.c" "asm/fstack.c" "asm/lexer.c" "asm/macro.c" diff --git a/src/asm/format.c b/src/asm/format.c new file mode 100644 index 00000000..058ca39a --- /dev/null +++ b/src/asm/format.c @@ -0,0 +1,296 @@ +/* + * This file is part of RGBDS. + * + * Copyright (c) 2020, RGBDS contributors. + * + * SPDX-License-Identifier: MIT + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "asm/format.h" +#include "asm/warning.h" + +struct FormatSpec fmt_NewSpec(void) +{ + struct FormatSpec fmt = {0}; + + return fmt; +} + +bool fmt_IsEmpty(struct FormatSpec const *fmt) +{ + return !fmt->state; +} + +bool fmt_IsValid(struct FormatSpec const *fmt) +{ + return fmt->valid || fmt->state == FORMAT_DONE; +} + +bool fmt_IsFinished(struct FormatSpec const *fmt) +{ + return fmt->state >= FORMAT_DONE; +} + +void fmt_UseCharacter(struct FormatSpec *fmt, int c) +{ + if (fmt->state == FORMAT_INVALID) + return; + + switch (c) { + /* sign */ + case ' ': + case '+': + if (fmt->state > FORMAT_SIGN) + goto invalid; + fmt->state = FORMAT_PREFIX; + fmt->sign = c; + break; + + /* prefix */ + case '#': + if (fmt->state > FORMAT_PREFIX) + goto invalid; + fmt->state = FORMAT_ALIGN; + fmt->prefix = true; + break; + + /* align */ + case '-': + if (fmt->state > FORMAT_ALIGN) + goto invalid; + fmt->state = FORMAT_WIDTH; + fmt->alignLeft = true; + break; + + /* pad and width */ + case '0': + if (fmt->state < FORMAT_WIDTH) + fmt->padZero = true; + /* fallthrough */ + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (fmt->state < FORMAT_WIDTH) { + fmt->state = FORMAT_WIDTH; + fmt->width = c - '0'; + } else if (fmt->state == FORMAT_WIDTH) { + fmt->width = fmt->width * 10 + (c - '0'); + } else if (fmt->state == FORMAT_FRAC) { + fmt->fracWidth = fmt->fracWidth * 10 + (c - '0'); + } else { + goto invalid; + } + break; + + case '.': + if (fmt->state > FORMAT_WIDTH) + goto invalid; + fmt->state = FORMAT_FRAC; + fmt->hasFrac = true; + break; + + /* type */ + case 'd': + case 'u': + case 'X': + case 'x': + case 'b': + case 'o': + case 'f': + case 's': + if (fmt->state >= FORMAT_DONE) + goto invalid; + fmt->state = FORMAT_DONE; + fmt->valid = true; + fmt->type = c; + break; + + default: +invalid: + fmt->state = FORMAT_INVALID; + fmt->valid = false; + } +} + +void fmt_FinishCharacters(struct FormatSpec *fmt) +{ + if (!fmt_IsValid(fmt)) + fmt->state = FORMAT_INVALID; +} + +void fmt_PrintString(char *buf, size_t bufLen, struct FormatSpec const *fmt, char const *value) +{ + if (fmt->sign) + error("Formatting string with sign flag '%c'\n", fmt->sign); + if (fmt->prefix) + error("Formatting string with prefix flag '#'\n"); + if (fmt->padZero) + error("Formatting string with padding flag '0'\n"); + if (fmt->hasFrac) + error("Formatting string with fractional width\n"); + if (fmt->type != 's') + error("Formatting string as type '%c'\n", fmt->type); + + size_t len = strlen(value); + size_t totalLen = fmt->width > len ? fmt->width : len; + + if (totalLen + 1 > bufLen) /* bufLen includes terminator */ + error("Formatted string value too long\n"); + + size_t padLen = fmt->width > len ? fmt->width - len : 0; + + if (fmt->alignLeft) { + strncpy(buf, value, len < bufLen ? len : bufLen); + for (size_t i = 0; i < totalLen && len + i < bufLen; i++) + buf[len + i] = ' '; + } else { + for (size_t i = 0; i < padLen && i < bufLen; i++) + buf[i] = ' '; + if (bufLen > padLen) + strncpy(buf + padLen, value, bufLen - padLen - 1); + } + + buf[totalLen] = '\0'; +} + +void fmt_PrintNumber(char *buf, size_t bufLen, struct FormatSpec const *fmt, uint32_t value) +{ + if (fmt->type != 'X' && fmt->type != 'x' && fmt->type != 'b' && fmt->type != 'o' + && fmt->prefix) + error("Formatting type '%c' with prefix flag '#'\n", fmt->type); + if (fmt->type != 'f' && fmt->hasFrac) + error("Formatting type '%c' with fractional width\n", fmt->type); + if (fmt->type == 's') + error("Formatting number as type 's'\n"); + + char sign = fmt->sign; /* 0 or ' ' or '+' */ + + if (fmt->type == 'd' || fmt->type == 'f') { + int32_t v = value; + + if (v < 0) { + sign = '-'; + if (v != INT32_MIN) + value = -v; + } + } + + char prefix = !fmt->prefix ? 0 + : fmt->type == 'X' ? '$' + : fmt->type == 'x' ? '$' + : fmt->type == 'b' ? '%' + : fmt->type == 'o' ? '&' + : 0; + + char valueBuf[262]; /* Max 5 digits + decimal + 255 fraction digits + terminator */ + + if (fmt->type == 'b') { + /* Special case for binary */ + char *ptr = valueBuf; + + do { + *ptr++ = (value & 1) + '0'; + value >>= 1; + } while (value); + + *ptr = '\0'; + + /* Reverse the digits */ + size_t valueLen = ptr - valueBuf; + + for (size_t i = 0, j = valueLen - 1; i < j; i++, j--) { + char c = valueBuf[i]; + + valueBuf[i] = valueBuf[j]; + valueBuf[j] = c; + } + } else if (fmt->type == 'f') { + /* Special case for fixed-point */ + if (fmt->fracWidth) { + char spec[16]; /* Max "%" + 5-char PRIu32 + ".%0255.f" + terminator */ + + snprintf(spec, sizeof(spec), "%%" PRIu32 ".%%0%d.f", fmt->fracWidth); + snprintf(valueBuf, sizeof(valueBuf), spec, value >> 16, + (value % 65536) / 65536.0 * pow(10, fmt->fracWidth) + 0.5); + } else { + snprintf(valueBuf, sizeof(valueBuf), "%" PRIu32, value >> 16); + } + } else { + char const *spec = fmt->type == 'd' ? "%" PRId32 + : fmt->type == 'u' ? "%" PRIu32 + : fmt->type == 'X' ? "%" PRIX32 + : fmt->type == 'x' ? "%" PRIx32 + : fmt->type == 'o' ? "%" PRIo32 + : "%" PRId32; + + snprintf(valueBuf, sizeof(valueBuf), spec, value); + } + + size_t len = strlen(valueBuf); + size_t numLen = len; + + if (sign) + numLen++; + if (prefix) + numLen++; + + size_t totalLen = fmt->width > numLen ? fmt->width : numLen; + + if (totalLen + 1 > bufLen) /* bufLen includes terminator */ + error("Formatted numeric value too long\n"); + + size_t padLen = fmt->width > numLen ? fmt->width - numLen : 0; + + if (fmt->alignLeft) { + size_t pos = 0; + + if (sign && pos < bufLen) + buf[pos++] = sign; + if (prefix && pos < bufLen) + buf[pos++] = prefix; + + strcpy(buf + pos, valueBuf); + pos += len; + + for (size_t i = 0; i < totalLen && pos + i < bufLen; i++) + buf[pos + i] = ' '; + } else { + size_t pos = 0; + + if (fmt->padZero) { + /* sign, then prefix, then zero padding */ + if (sign && pos < bufLen) + buf[pos++] = sign; + if (prefix && pos < bufLen) + buf[pos++] = prefix; + for (size_t i = 0; i < padLen && pos < bufLen; i++) + buf[pos++] = '0'; + } else { + /* space padding, then sign, then prefix */ + for (size_t i = 0; i < padLen && pos < bufLen; i++) + buf[pos++] = ' '; + if (sign && pos < bufLen) + buf[pos++] = sign; + if (prefix && pos < bufLen) + buf[pos++] = prefix; + } + if (bufLen > pos) + strcpy(buf + pos, valueBuf); + } + + buf[totalLen] = '\0'; +} diff --git a/src/asm/lexer.c b/src/asm/lexer.c index 7eaf4aba..50bec439 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -28,6 +28,7 @@ #include "asm/asm.h" #include "asm/lexer.h" +#include "asm/format.h" #include "asm/fstack.h" #include "asm/macro.h" #include "asm/main.h" @@ -201,6 +202,7 @@ static struct KeywordMapping { {"STRCAT", T_OP_STRCAT}, {"STRUPR", T_OP_STRUPR}, {"STRLWR", T_OP_STRLWR}, + {"STRFMT", T_OP_STRFMT}, {"INCLUDE", T_POP_INCLUDE}, {"PRINTT", T_POP_PRINTT}, @@ -480,7 +482,7 @@ struct KeywordDictNode { uint16_t children[0x60 - ' ']; struct KeywordMapping const *keyword; /* Since the keyword structure is invariant, the min number of nodes is known at compile time */ -} keywordDict[347] = {0}; /* Make sure to keep this correct when adding keywords! */ +} keywordDict[350] = {0}; /* Make sure to keep this correct when adding keywords! */ /* Convert a char into its index into the dict */ static inline uint8_t dictIndex(char c) @@ -1273,57 +1275,11 @@ static int readIdentifier(char firstChar) /* Functions to read strings */ -enum PrintType { - TYPE_NONE, - TYPE_DECIMAL, /* d */ - TYPE_UPPERHEX, /* X */ - TYPE_LOWERHEX, /* x */ - TYPE_BINARY, /* b */ -}; - -static void intToString(char *dest, size_t bufSize, struct Symbol const *sym, enum PrintType type) -{ - uint32_t value = sym_GetConstantSymValue(sym); - int fullLength; - - /* Special cheat for binary */ - if (type == TYPE_BINARY) { - char binary[33]; /* 32 bits + 1 terminator */ - char *write_ptr = binary + 32; - - fullLength = 0; - binary[32] = 0; - do { - *(--write_ptr) = (value & 1) + '0'; - value >>= 1; - fullLength++; - } while (value); - strncpy(dest, write_ptr, bufSize - 1); - } else { - static char const * const formats[] = { - [TYPE_NONE] = "$%" PRIX32, - [TYPE_DECIMAL] = "%" PRId32, - [TYPE_UPPERHEX] = "%" PRIX32, - [TYPE_LOWERHEX] = "%" PRIx32 - }; - - fullLength = snprintf(dest, bufSize, formats[type], value); - if (fullLength < 0) { - error("snprintf encoding error: %s\n", strerror(errno)); - dest[0] = '\0'; - } - } - - if ((size_t)fullLength >= bufSize) - warning(WARNING_LONG_STR, "Interpolated symbol %s too long to fit buffer\n", - sym->name); -} - static char const *readInterpolation(void) { char symName[MAXSYMLEN + 1]; size_t i = 0; - enum PrintType type = TYPE_NONE; + struct FormatSpec fmt = fmt_NewSpec(); for (;;) { int c = peek(0); @@ -1342,33 +1298,24 @@ static char const *readInterpolation(void) } else if (c == '}') { shiftChars(1); break; - } else if (c == ':' && type == TYPE_NONE) { /* Print type, only once */ - if (i != 1) { - error("Print types are exactly 1 character long\n"); - } else { - switch (symName[0]) { - case 'b': - type = TYPE_BINARY; - break; - case 'd': - type = TYPE_DECIMAL; - break; - case 'X': - type = TYPE_UPPERHEX; - break; - case 'x': - type = TYPE_LOWERHEX; - break; - default: - error("Invalid print type '%s'\n", print(symName[0])); - } - } - i = 0; /* Now that type has been set, restart at beginning of string */ + } else if (c == ':' && !fmt_IsFinished(&fmt)) { /* Format spec, only once */ shiftChars(1); + for (size_t j = 0; j < i; j++) + fmt_UseCharacter(&fmt, symName[j]); + fmt_FinishCharacters(&fmt); + symName[i] = '\0'; + if (!fmt_IsValid(&fmt)) { + error("Invalid format spec '%s'\n", symName); + } else if (!strcmp(symName, "f")) { + /* Format 'f' defaults to '.5f' like PRINTF */ + fmt.hasFrac = true; + fmt.fracWidth = 5; + } + i = 0; /* Now that format has been set, restart at beginning of string */ } else { + shiftChars(1); if (i < sizeof(symName)) /* Allow writing an extra char to flag overflow */ symName[i++] = c; - shiftChars(1); } } @@ -1378,18 +1325,25 @@ static char const *readInterpolation(void) } symName[i] = '\0'; + static char buf[MAXSTRLEN + 1]; + struct Symbol const *sym = sym_FindScopedSymbol(symName); if (!sym) { error("Interpolated symbol \"%s\" does not exist\n", symName); } else if (sym->type == SYM_EQUS) { - if (type != TYPE_NONE) - error("Print types are only allowed for numbers\n"); - return sym_GetStringValue(sym); + if (fmt_IsEmpty(&fmt)) + /* No format was specified */ + fmt.type = 's'; + fmt_PrintString(buf, sizeof(buf), &fmt, sym_GetStringValue(sym)); + return buf; } else if (sym_IsNumeric(sym)) { - static char buf[33]; /* Worst case of 32 digits + terminator */ - - intToString(buf, sizeof(buf), sym, type); + if (fmt_IsEmpty(&fmt)) { + /* No format was specified; default to uppercase $hex */ + fmt.type = 'X'; + fmt.prefix = true; + } + fmt_PrintNumber(buf, sizeof(buf), &fmt, sym_GetConstantSymValue(sym)); return buf; } else { error("Only numerical and string symbols can be interpolated\n"); diff --git a/src/asm/main.c b/src/asm/main.c index 16a251f6..f0529ad3 100644 --- a/src/asm/main.c +++ b/src/asm/main.c @@ -19,6 +19,7 @@ #include #include "asm/charmap.h" +#include "asm/format.h" #include "asm/fstack.h" #include "asm/lexer.h" #include "asm/main.h" diff --git a/src/asm/parser.y b/src/asm/parser.y index face1782..ab39178a 100644 --- a/src/asm/parser.y +++ b/src/asm/parser.y @@ -18,6 +18,7 @@ #include "asm/asm.h" #include "asm/charmap.h" +#include "asm/format.h" #include "asm/fstack.h" #include "asm/lexer.h" #include "asm/macro.h" @@ -163,6 +164,106 @@ static void strsubUTF8(char *dest, const char *src, uint32_t pos, uint32_t len) dest[destIndex] = 0; } +static void initStrFmtArgList(struct StrFmtArgList *args) { + args->nbArgs = 0; + args->capacity = INITIAL_STRFMT_ARG_SIZE; + args->args = malloc(args->capacity * sizeof(*args->args)); + if (!args->args) + fatalerror("Failed to allocate memory for STRFMT arg list: %s\n", + strerror(errno)); +} + +static size_t nextStrFmtArgListIndex(struct StrFmtArgList *args) { + if (args->nbArgs == args->capacity) { + args->capacity = (args->capacity + 1) * 2; + args->args = realloc(args->args, args->capacity * sizeof(*args->args)); + if (!args->args) + fatalerror("realloc error while resizing STRFMT arg list: %s\n", + strerror(errno)); + } + return args->nbArgs++; +} + +static void freeStrFmtArgList(struct StrFmtArgList *args) { + free(args->format); + for (size_t i = 0; i < args->nbArgs; i++) + if (!args->args[i].isNumeric) + free(args->args[i].string); + free(args->args); +} + +static void strfmt(char *dest, size_t destLen, char const *fmt, size_t nbArgs, struct StrFmtArg *args) { + size_t a = 0; + size_t i; + + for (i = 0; i < destLen;) { + int c = *fmt++; + + if (c == '\0') { + break; + } else if (c != '%') { + dest[i++] = c; + continue; + } + + c = *fmt++; + + if (c == '%') { + dest[i++] = c; + continue; + } + + struct FormatSpec spec = fmt_NewSpec(); + + while (c != '\0') { + fmt_UseCharacter(&spec, c); + if (fmt_IsFinished(&spec)) + break; + c = *fmt++; + } + + if (fmt_IsEmpty(&spec)) { + error("STRFMT: Illegal '%%' at end of format string\n"); + dest[i++] = '%'; + break; + } else if (!fmt_IsValid(&spec)) { + error("STRFMT: Invalid format spec for argument %zu\n", a + 1); + dest[i++] = '%'; + a++; + continue; + } else if (a == nbArgs) { + error("STRFMT: Not enough arguments for format spec\n", a + 1); + dest[i++] = '%'; + a++; + continue; + } else if (a > nbArgs) { + // already warned for a == nbArgs + dest[i++] = '%'; + a++; + continue; + } + + struct StrFmtArg *arg = &args[a++]; + static char buf[MAXSTRLEN + 1]; + + if (arg->isNumeric) + fmt_PrintNumber(buf, sizeof(buf), &spec, arg->number); + else + fmt_PrintString(buf, sizeof(buf), &spec, arg->string); + + i += snprintf(&dest[i], destLen - i, "%s", buf); + } + + if (i > destLen - 1) { + warning(WARNING_LONG_STR, "STRFMT: String too long, got truncated\n"); + i = destLen - 1; + } + dest[i] = '\0'; + + if (a < nbArgs) + error("STRFMT: %zu unformatted argument(s)\n", nbArgs - a); +} + static inline void failAssert(enum AssertionType type) { switch (type) { @@ -210,6 +311,7 @@ static inline void failAssertMsg(enum AssertionType type, char const *msg) int32_t stop; int32_t step; } foreachArgs; + struct StrFmtArgList strfmtArgs; } %type relocexpr @@ -226,6 +328,8 @@ static inline void failAssertMsg(enum AssertionType type, char const *msg) %type string %type strcat_args +%type strfmt_args +%type strfmt_va_args %type sectorg %type sectattrs @@ -275,6 +379,7 @@ static inline void failAssertMsg(enum AssertionType type, char const *msg) %left T_OP_STRCAT %left T_OP_STRUPR %left T_OP_STRLWR +%left T_OP_STRFMT %left NEG /* negation -- unary minus */ @@ -1136,6 +1241,10 @@ string : T_STRING | T_OP_STRLWR T_LPAREN string T_RPAREN { lowerstring($$, $3); } + | T_OP_STRFMT T_LPAREN strfmt_args T_RPAREN { + strfmt($$, MAXSTRLEN + 1, $3.format, $3.nbArgs, $3.args); + freeStrFmtArgList(&$3); + } ; strcat_args : string @@ -1146,6 +1255,43 @@ strcat_args : string } ; +strfmt_args : string strfmt_va_args { + $$.format = strdup($1); + $$.capacity = $2.capacity; + $$.nbArgs = $2.nbArgs; + $$.args = $2.args; + } +; + +strfmt_va_args : /* empty */ { + initStrFmtArgList(&$$); + } + | strfmt_va_args T_COMMA relocexpr_no_str { + int32_t value; + + if (!rpn_isKnown(&$3)) { + error("Expected constant expression: %s\n", + $3.reason); + value = 0; + } else { + value = $3.nVal; + } + + size_t i = nextStrFmtArgListIndex(&$1); + + $1.args[i].number = value; + $1.args[i].isNumeric = true; + $$ = $1; + } + | strfmt_va_args T_COMMA string { + size_t i = nextStrFmtArgListIndex(&$1); + + $1.args[i].string = strdup($3); + $1.args[i].isNumeric = false; + $$ = $1; + } +; + section : T_POP_SECTION sectmod string T_COMMA sectiontype sectorg sectattrs { out_NewSection($3, $5, $6, &$7, $2); } diff --git a/src/asm/rgbasm.5 b/src/asm/rgbasm.5 index 36817b8c..ca1b947d 100644 --- a/src/asm/rgbasm.5 +++ b/src/asm/rgbasm.5 @@ -268,18 +268,72 @@ PRINTT "The answer to {TOPIC} is {ANSWER}\[rs]n" .Pp Symbol interpolations can be nested, too! .Pp -It's possible to change the way numeric symbols are converted by specifying a print type like so: -.Ql {d:symbol} . +It's possible to change the way symbols are converted by specifying a print format like so: +.Ql {fmt:symbol} . +The +.Ql fmt +specifier consists of parts +.Ql . +These parts are: +.Bl -column "" +.It Sy Part Ta Sy Meaning +.It Ql Ta May be +.Ql + +or +.Ql \ . +If specified, prints this character in front of non-negative numbers. +.It Ql Ta May be +.Ql # . +If specified, prints the appropriate prefix for numbers, +.Ql $ , +.Ql & , +or +.Ql % . +.It Ql Ta May be +.Ql - . +If specified, aligns left instead of right. +.It Ql Ta May be +.Ql 0 . +If specified, pads right-aligned numbers with zeros instead of spaces. +.It Ql Ta May be one or more +.Ql 0 +\[en] +.Ql 9 . +If specified, pads the value to this width, right-aligned with spaces by default. +.It Ql Ta May be +.Ql \&. +followed by one or more +.Ql 0 +\[en] +.Ql 9 . +If specified, prints this many digits of a fixed-point fraction. +.It Ql Ta Specifies the type of value. +.El +.Pp +All the format specifier parts are optional except the +.Ql . Valid print types are: .Bl -column -offset indent "Print type" "Lowercase hexadecimal" "Example" .It Sy Print type Ta Sy Format Ta Sy Example -.It Ql d Ta Decimal Ta 42 +.It Ql d Ta Signed decimal Ta -42 +.It Ql u Ta Unsigned decimal Ta 42 .It Ql x Ta Lowercase hexadecimal Ta 2a .It Ql X Ta Uppercase hexadecimal Ta 2A .It Ql b Ta Binary Ta 101010 +.It Ql o Ta Octal Ta 52 +.It Ql f Ta Fixed-point Ta 1234.56789 +.It Ql s Ta String Ta \&"example\&" .El .Pp -Note that print types should only be used with numeric values, not strings. +Examples: +.Bd -literal -offset indent +; Prints "%0010 + $3 == 5" +PRINTT STRFMT("%#05b + %#x == %d\n", 2, 3, 2+3) +; Prints "32% of 20 = 6.40" +PRINTT STRFMT("%d%% of %d = %.2f\n", 32, 20, MUL(20.0, 0.32)) +; Prints "Hello world!" +PRINTT STRFMT("Hello %s!\n", STRLWR("WORLD")) +.Ed .Pp HINT: The .Ic {symbol} @@ -304,7 +358,7 @@ The following functions operate on string expressions. Most of them return a string, however some of these functions actually return an integer and can be used as part of an integer expression! .Bl -column "STRSUB(str, pos, len)" .It Sy Name Ta Sy Operation -.It Fn STRLEN string Ta Returns the number of characters in Ar string . +.It Fn STRLEN str Ta Returns the number of characters in Ar str . .It Fn STRCAT str1 str2 Ta Appends Ar str2 No to Ar str1 . .It Fn STRCMP str1 str2 Ta Returns -1 if Ar str1 No is alphabetically lower than Ar str2 No , zero if they match, 1 if Ar str1 No is greater than Ar str2 . .It Fn STRIN str1 str2 Ta Returns the first position of Ar str2 No in Ar str1 No or zero if it's not present Pq first character is position 1 . @@ -312,6 +366,13 @@ Most of them return a string, however some of these functions actually return an .It Fn STRSUB str pos len Ta Returns a substring from Ar str No starting at Ar pos Po first character is position 1 Pc and Ar len No characters long. .It Fn STRUPR str Ta Converts all characters in Ar str No to capitals and returns the new string. .It Fn STRLWR str Ta Converts all characters in Ar str No to lower case and returns the new string. +.It Fn STRFMT fmt args... Ta Returns the string Ar fmt No with each +.Ql %spec +pattern replaced by interpolating the format +.Ar spec +with its corresponding argument in +.Ar args +.Pq So %% Sc is replaced by the So % Sc character . .El .Ss Character maps When writing text that is meant to be displayed in the Game Boy, the characters used in the source code may have a different encoding than the default of ASCII. diff --git a/test/asm/bracketed-symbols.err b/test/asm/bracketed-symbols.err index 12541947..06dbc153 100644 --- a/test/asm/bracketed-symbols.err +++ b/test/asm/bracketed-symbols.err @@ -1,5 +1,5 @@ ERROR: bracketed-symbols.asm(16): - Print types are only allowed for numbers + Formatting string as type 'X' ERROR: bracketed-symbols.asm(20): "Label" does not have a constant value ERROR: bracketed-symbols.asm(21): diff --git a/test/asm/strfmt.asm b/test/asm/strfmt.asm new file mode 100644 index 00000000..45f70a5a --- /dev/null +++ b/test/asm/strfmt.asm @@ -0,0 +1,24 @@ +VAL EQUS STRFMT("Hello %s! I am %d years old today!", "world", $f) +PRINTT "{VAL}\n" + +N = -42 +PRINTT STRFMT("signed %010d == unsigned %010u\n", N, N) + +N = 112 +FMT EQUS "X" +PRINTT STRFMT("\tdb %#03{s:FMT} %% 26\t; %#03{FMT}\n", N, N % 26) + +TEMPLATE EQUS "\"%s are %s\\n\"" +PRINTT STRFMT(TEMPLATE, "roses", "red") +PRINTT STRFMT(TEMPLATE, "violets", "blue") +PRINTT STRFMT(TEMPLATE, "void", 0, "extra") + +PRINTT STRCAT(STRFMT(STRFMT("%%%s.%d%s", "", 9, "f"), _PI), \ + STRFMT(" ~ %s\n", STRFMT("%s%x", "thr", 238))) + +PRINTT STRFMT("%d eol %", 1) +PRINTT "\n" + +PRINTT STRFMT("invalid %w spec\n", 42) + +PRINTT STRFMT("one=%d two=%d three=%d\n", 1) diff --git a/test/asm/strfmt.err b/test/asm/strfmt.err new file mode 100644 index 00000000..e70a84dc --- /dev/null +++ b/test/asm/strfmt.err @@ -0,0 +1,11 @@ +ERROR: strfmt.asm(14): + Formatting number as type 's' +ERROR: strfmt.asm(14): + STRFMT: 1 unformatted argument(s) +ERROR: strfmt.asm(19): + STRFMT: Illegal '%' at end of format string +ERROR: strfmt.asm(22): + STRFMT: Invalid format spec for argument 1 +ERROR: strfmt.asm(24): + STRFMT: Not enough arguments for format spec +error: Assembly aborted (5 errors)! diff --git a/test/asm/strfmt.out b/test/asm/strfmt.out new file mode 100644 index 00000000..2e4bd6b5 --- /dev/null +++ b/test/asm/strfmt.out @@ -0,0 +1,10 @@ +Hello world! I am 15 years old today! +signed -000000042 == unsigned 4294967254 + db $70 % 26 ; $08 +roses are red +violets are blue +void are 0 +3.141586304 ~ three +1 eol % +invalid % spec +one=1 two=% three=% diff --git a/test/asm/string-formatting.asm b/test/asm/string-formatting.asm new file mode 100644 index 00000000..2929e847 --- /dev/null +++ b/test/asm/string-formatting.asm @@ -0,0 +1,15 @@ +n equ 300 +m equ -42 +f equ -123.0456 +s equs "hello" + + printt "<{ -6d:n}> <{+06u:n}> <{5x:n}> <{#16b:n}>\n" + printt "<{u:m}> <{+3d:m}> <{#016o:m}>\n" + printt "<{f:_PI}> <{06f:f}> <{.10f:f}>\n" + printt "<{#-10s:s}> <{10s:s}>\n" + +foo: macro + printt "<{\1}>\n" +endm + + foo -6d:n ; space is trimmed diff --git a/test/asm/string-formatting.err b/test/asm/string-formatting.err new file mode 100644 index 00000000..341a4971 --- /dev/null +++ b/test/asm/string-formatting.err @@ -0,0 +1,3 @@ +ERROR: string-formatting.asm(9): + Formatting string with prefix flag '#' +error: Assembly aborted (1 errors)! diff --git a/test/asm/string-formatting.out b/test/asm/string-formatting.out new file mode 100644 index 00000000..7072d1c5 --- /dev/null +++ b/test/asm/string-formatting.out @@ -0,0 +1,5 @@ +< 300 > <+00300> < 12c> < %100101100> +<4294967254> <-42> <&000037777777726> +<3.14159> <-00123> <-123.0455932618> + < hello> +<300 >