Pass std::string references to parser semantic functions instead of .c_str() pointers

This also refactors some semantic functions to be more efficient
This commit is contained in:
Rangi42
2024-03-17 13:50:34 -04:00
committed by Sylvie
parent 8f77518406
commit 04899a21cd

View File

@@ -45,12 +45,14 @@
}; };
} }
%code { %code {
#include <algorithm>
#include <ctype.h> #include <ctype.h>
#include <errno.h> #include <errno.h>
#include <new> #include <new>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <string_view>
#include "asm/charmap.hpp" #include "asm/charmap.hpp"
#include "asm/fixpoint.hpp" #include "asm/fixpoint.hpp"
@@ -70,26 +72,21 @@
yy::parser::symbol_type yylex(); // Provided by lexer.cpp yy::parser::symbol_type yylex(); // Provided by lexer.cpp
static void upperstring(char *dest, char const *src);
static void lowerstring(char *dest, char const *src);
static uint32_t str2int2(std::vector<uint8_t> const &s); static uint32_t str2int2(std::vector<uint8_t> const &s);
static char const *strrstr(char const *s1, char const *s2);
static void errorInvalidUTF8Byte(uint8_t byte, char const *functionName); static void errorInvalidUTF8Byte(uint8_t byte, char const *functionName);
static size_t strlenUTF8(char const *s); static size_t strlenUTF8(std::string const &str);
static void strsubUTF8(char *dest, size_t destLen, char const *src, uint32_t pos, uint32_t len); static std::string strsubUTF8(std::string const &str, uint32_t pos, uint32_t len);
static size_t charlenUTF8(char const *str); static size_t charlenUTF8(std::string const &str);
static void charsubUTF8(char *dest, char const *src, uint32_t pos); static std::string charsubUTF8(std::string const &str, uint32_t pos);
static uint32_t adjustNegativePos(int32_t pos, size_t len, char const *functionName); static uint32_t adjustNegativePos(int32_t pos, size_t len, char const *functionName);
static void strrpl( static std::string strrpl(
char *dest, size_t destLen, char const *src, char const *old, char const *rep std::string_view str, std::string const &old, std::string const &rep
); );
static void strfmt( static std::string strfmt(
char *dest, std::string const &spec,
size_t destLen, std::vector<std::variant<uint32_t, std::string>> const &args
char const *spec,
std::vector<std::variant<uint32_t, std::string>> &args
); );
static void compoundAssignment(char const *symName, RPNCommand op, int32_t constValue); static void compoundAssignment(std::string const &symName, RPNCommand op, int32_t constValue);
static void failAssert(AssertionType type); static void failAssert(AssertionType type);
static void failAssertMsg(AssertionType type, char const *msg); static void failAssertMsg(AssertionType type, char const *msg);
@@ -653,7 +650,7 @@ assignment:
$1.c_str(), $1.c_str(),
compoundEqOperator compoundEqOperator
); );
compoundAssignment($1.c_str(), $2, $3); compoundAssignment($1, $2, $3);
} }
; ;
@@ -1043,10 +1040,10 @@ def_set:
sym_AddVar($1.c_str(), $3); sym_AddVar($1.c_str(), $3);
} }
| def_id compoundeq const { | def_id compoundeq const {
compoundAssignment($1.c_str(), $2, $3); compoundAssignment($1, $2, $3);
} }
| redef_id compoundeq const { | redef_id compoundeq const {
compoundAssignment($1.c_str(), $2, $3); compoundAssignment($1, $2, $3);
} }
; ;
@@ -1095,11 +1092,11 @@ purge:
purge_args: purge_args:
scoped_id { scoped_id {
$$.push_back($1.c_str()); $$.push_back($1);
} }
| purge_args COMMA scoped_id { | purge_args COMMA scoped_id {
$$ = std::move($1); $$ = std::move($1);
$$.push_back($3.c_str()); $$.push_back($3);
} }
; ;
@@ -1479,20 +1476,20 @@ relocexpr_no_str:
rpn_Number($$, $3.compare($5)); rpn_Number($$, $3.compare($5));
} }
| OP_STRIN LPAREN string COMMA string RPAREN { | OP_STRIN LPAREN string COMMA string RPAREN {
char const *p = strstr($3.c_str(), $5.c_str()); auto pos = $3.find($5);
rpn_Number($$, p ? p - $3.c_str() + 1 : 0); rpn_Number($$, pos != std::string::npos ? pos + 1 : 0);
} }
| OP_STRRIN LPAREN string COMMA string RPAREN { | OP_STRRIN LPAREN string COMMA string RPAREN {
char const *p = strrstr($3.c_str(), $5.c_str()); auto pos = $3.rfind($5);
rpn_Number($$, p ? p - $3.c_str() + 1 : 0); rpn_Number($$, pos != std::string::npos ? pos + 1 : 0);
} }
| OP_STRLEN LPAREN string RPAREN { | OP_STRLEN LPAREN string RPAREN {
rpn_Number($$, strlenUTF8($3.c_str())); rpn_Number($$, strlenUTF8($3));
} }
| OP_CHARLEN LPAREN string RPAREN { | OP_CHARLEN LPAREN string RPAREN {
rpn_Number($$, charlenUTF8($3.c_str())); rpn_Number($$, charlenUTF8($3));
} }
| OP_INCHARMAP LPAREN string RPAREN { | OP_INCHARMAP LPAREN string RPAREN {
rpn_Number($$, charmap_HasChar($3.c_str())); rpn_Number($$, charmap_HasChar($3.c_str()));
@@ -1546,28 +1543,22 @@ string:
$$ = $1.string; $$ = $1.string;
} }
| OP_STRSUB LPAREN string COMMA const COMMA uconst RPAREN { | OP_STRSUB LPAREN string COMMA const COMMA uconst RPAREN {
size_t len = strlenUTF8($3.c_str()); size_t len = strlenUTF8($3);
uint32_t pos = adjustNegativePos($5, len, "STRSUB"); uint32_t pos = adjustNegativePos($5, len, "STRSUB");
String tmp; $$ = strsubUTF8($3, pos, $7);
strsubUTF8(tmp.string, sizeof(tmp.string), $3.c_str(), pos, $7);
$$ = tmp.string;
} }
| OP_STRSUB LPAREN string COMMA const RPAREN { | OP_STRSUB LPAREN string COMMA const RPAREN {
size_t len = strlenUTF8($3.c_str()); size_t len = strlenUTF8($3);
uint32_t pos = adjustNegativePos($5, len, "STRSUB"); uint32_t pos = adjustNegativePos($5, len, "STRSUB");
String tmp; $$ = strsubUTF8($3, pos, pos > len ? 0 : len + 1 - pos);
strsubUTF8(tmp.string, sizeof(tmp.string), $3.c_str(), pos, pos > len ? 0 : len + 1 - pos);
$$ = tmp.string;
} }
| OP_CHARSUB LPAREN string COMMA const RPAREN { | OP_CHARSUB LPAREN string COMMA const RPAREN {
size_t len = charlenUTF8($3.c_str()); size_t len = charlenUTF8($3);
uint32_t pos = adjustNegativePos($5, len, "CHARSUB"); uint32_t pos = adjustNegativePos($5, len, "CHARSUB");
String tmp; $$ = charsubUTF8($3, pos);
charsubUTF8(tmp.string, $3.c_str(), pos);
$$ = tmp.string;
} }
| OP_STRCAT LPAREN RPAREN { | OP_STRCAT LPAREN RPAREN {
$$.clear(); $$.clear();
@@ -1576,25 +1567,18 @@ string:
$$ = std::move($3); $$ = std::move($3);
} }
| OP_STRUPR LPAREN string RPAREN { | OP_STRUPR LPAREN string RPAREN {
String tmp; $$ = std::move($3);
upperstring(tmp.string, $3.c_str()); std::transform(RANGE($$), $$.begin(), [](char c) { return toupper(c); });
$$ = tmp.string;
} }
| OP_STRLWR LPAREN string RPAREN { | OP_STRLWR LPAREN string RPAREN {
String tmp; $$ = std::move($3);
lowerstring(tmp.string, $3.c_str()); std::transform(RANGE($$), $$.begin(), [](char c) { return tolower(c); });
$$ = tmp.string;
} }
| OP_STRRPL LPAREN string COMMA string COMMA string RPAREN { | OP_STRRPL LPAREN string COMMA string COMMA string RPAREN {
String tmp; $$ = strrpl($3, $5, $7);
strrpl(tmp.string, sizeof(tmp.string), $3.c_str(), $5.c_str(), $7.c_str());
$$ = tmp.string;
} }
| OP_STRFMT LPAREN strfmt_args RPAREN { | OP_STRFMT LPAREN strfmt_args RPAREN {
StrFmtArgList args = std::move($3); $$ = strfmt($3.format, $3.args);
String tmp;
strfmt(tmp.string, sizeof(tmp.string), args.format.c_str(), args.args);
$$ = tmp.string;
} }
| POP_SECTION LPAREN scoped_anon_id RPAREN { | POP_SECTION LPAREN scoped_anon_id RPAREN {
Symbol *sym = sym_FindScopedValidSymbol($3.c_str()); Symbol *sym = sym_FindScopedValidSymbol($3.c_str());
@@ -2471,18 +2455,6 @@ void yy::parser::error(std::string const &str) {
::error("%s\n", str.c_str()); ::error("%s\n", str.c_str());
} }
static void upperstring(char *dest, char const *src) {
while (*src)
*dest++ = toupper(*src++);
*dest = '\0';
}
static void lowerstring(char *dest, char const *src) {
while (*src)
*dest++ = tolower(*src++);
*dest = '\0';
}
static uint32_t str2int2(std::vector<uint8_t> const &s) { static uint32_t str2int2(std::vector<uint8_t> const &s) {
uint32_t length = s.size(); uint32_t length = s.size();
@@ -2508,30 +2480,17 @@ static uint32_t str2int2(std::vector<uint8_t> const &s) {
return r; return r;
} }
static char const *strrstr(char const *s1, char const *s2) {
size_t len1 = strlen(s1);
size_t len2 = strlen(s2);
if (len2 > len1)
return nullptr;
for (char const *p = s1 + len1 - len2; p >= s1; p--)
if (!strncmp(p, s2, len2))
return p;
return nullptr;
}
static void errorInvalidUTF8Byte(uint8_t byte, char const *functionName) { static void errorInvalidUTF8Byte(uint8_t byte, char const *functionName) {
error("%s: Invalid UTF-8 byte 0x%02hhX\n", functionName, byte); error("%s: Invalid UTF-8 byte 0x%02hhX\n", functionName, byte);
} }
static size_t strlenUTF8(char const *s) { static size_t strlenUTF8(std::string const &str) {
char const *ptr = str.c_str();
size_t len = 0; size_t len = 0;
uint32_t state = 0; uint32_t state = 0;
for (uint32_t codep = 0; *s; s++) { for (uint32_t codep = 0; *ptr; ptr++) {
uint8_t byte = *s; uint8_t byte = *ptr;
switch (decode(&state, &codep, byte)) { switch (decode(&state, &codep, byte)) {
case 1: case 1:
@@ -2551,49 +2510,52 @@ static size_t strlenUTF8(char const *s) {
return len; return len;
} }
static void strsubUTF8(char *dest, size_t destLen, char const *src, uint32_t pos, uint32_t len) { static std::string strsubUTF8(std::string const &str, uint32_t pos, uint32_t len) {
size_t srcIndex = 0; char const *ptr = str.c_str();
size_t destIndex = 0; size_t index = 0;
uint32_t state = 0; uint32_t state = 0;
uint32_t codep = 0; uint32_t codep = 0;
uint32_t curLen = 0; uint32_t curPos = 1; // RGBASM strings are 1-indexed!
uint32_t curPos = 1;
// Advance to starting position in source string. // Advance to starting position in source string.
while (src[srcIndex] && curPos < pos) { while (ptr[index] && curPos < pos) {
switch (decode(&state, &codep, src[srcIndex])) { switch (decode(&state, &codep, ptr[index])) {
case 1: case 1:
errorInvalidUTF8Byte(src[srcIndex], "STRSUB"); errorInvalidUTF8Byte(ptr[index], "STRSUB");
state = 0; state = 0;
// fallthrough // fallthrough
case 0: case 0:
curPos++; curPos++;
break; break;
} }
srcIndex++; index++;
} }
// A position 1 past the end of the string is allowed, but will trigger the // A position 1 past the end of the string is allowed, but will trigger the
// "Length too big" warning below if the length is nonzero. // "Length too big" warning below if the length is nonzero.
if (!src[srcIndex] && pos > curPos) if (!ptr[index] && pos > curPos)
warning( warning(
WARNING_BUILTIN_ARG, "STRSUB: Position %" PRIu32 " is past the end of the string\n", pos WARNING_BUILTIN_ARG, "STRSUB: Position %" PRIu32 " is past the end of the string\n", pos
); );
// Copy from source to destination. size_t startIndex = index;
while (src[srcIndex] && destIndex < destLen - 1 && curLen < len) { uint32_t curLen = 0;
switch (decode(&state, &codep, src[srcIndex])) {
// Compute the result length in bytes.
while (ptr[index] && index - startIndex < MAXSTRLEN && curLen < len) {
switch (decode(&state, &codep, ptr[index])) {
case 1: case 1:
errorInvalidUTF8Byte(src[srcIndex], "STRSUB"); errorInvalidUTF8Byte(ptr[index], "STRSUB");
state = 0; state = 0;
// fallthrough // fallthrough
case 0: case 0:
curLen++; curLen++;
break; break;
} }
dest[destIndex++] = src[srcIndex++]; index++;
} }
// Check if `index - startIndex == MAXSTRLEN` before `curLen == len`.
if (curLen < len) if (curLen < len)
warning(WARNING_BUILTIN_ARG, "STRSUB: Length too big: %" PRIu32 "\n", len); warning(WARNING_BUILTIN_ARG, "STRSUB: Length too big: %" PRIu32 "\n", len);
@@ -2601,38 +2563,37 @@ static void strsubUTF8(char *dest, size_t destLen, char const *src, uint32_t pos
if (state != 0) if (state != 0)
error("STRSUB: Incomplete UTF-8 character\n"); error("STRSUB: Incomplete UTF-8 character\n");
dest[destIndex] = '\0'; return std::string(ptr + startIndex, ptr + index);
} }
static size_t charlenUTF8(char const *str) { static size_t charlenUTF8(std::string const &str) {
char const *ptr = str.c_str();
size_t len; size_t len;
for (len = 0; charmap_ConvertNext(str, nullptr); len++) for (len = 0; charmap_ConvertNext(ptr, nullptr); len++)
; ;
return len; return len;
} }
static void charsubUTF8(char *dest, char const *src, uint32_t pos) { static std::string charsubUTF8(std::string const &str, uint32_t pos) {
char const *ptr = str.c_str();
size_t charLen = 1; size_t charLen = 1;
// Advance to starting position in source string. // Advance to starting position in source string.
for (uint32_t curPos = 1; charLen && curPos < pos; curPos++) for (uint32_t curPos = 1; charLen && curPos < pos; curPos++)
charLen = charmap_ConvertNext(src, nullptr); charLen = charmap_ConvertNext(ptr, nullptr);
char const *start = src; char const *start = ptr;
if (!charmap_ConvertNext(src, nullptr)) if (!charmap_ConvertNext(ptr, nullptr))
warning( warning(
WARNING_BUILTIN_ARG, WARNING_BUILTIN_ARG,
"CHARSUB: Position %" PRIu32 " is past the end of the string\n", "CHARSUB: Position %" PRIu32 " is past the end of the string\n",
pos pos
); );
// Copy from source to destination. return std::string(start, ptr - start);
memcpy(dest, start, src - start);
dest[src - start] = '\0';
} }
static uint32_t adjustNegativePos(int32_t pos, size_t len, char const *functionName) { static uint32_t adjustNegativePos(int32_t pos, size_t len, char const *functionName) {
@@ -2647,73 +2608,55 @@ static uint32_t adjustNegativePos(int32_t pos, size_t len, char const *functionN
return (uint32_t)pos; return (uint32_t)pos;
} }
static void strrpl(char *dest, size_t destLen, char const *src, char const *old, char const *rep) { static std::string strrpl(std::string_view str, std::string const &old, std::string const &rep) {
size_t oldLen = strlen(old); if (old.empty()) {
size_t repLen = strlen(rep);
size_t i = 0;
if (!oldLen) {
warning(WARNING_EMPTY_STRRPL, "STRRPL: Cannot replace an empty string\n"); warning(WARNING_EMPTY_STRRPL, "STRRPL: Cannot replace an empty string\n");
strcpy(dest, src); return std::string(str);
return;
} }
for (char const *next = strstr(src, old); next && *next; next = strstr(src, old)) { std::string rpl;
// Copy anything before the substring to replace
unsigned int lenBefore = next - src;
memcpy(dest + i, src, lenBefore < destLen - i ? lenBefore : destLen - i); while (!str.empty()) {
i += next - src; auto pos = str.find(old);
if (i >= destLen) if (pos == str.npos) {
rpl.append(str);
break; break;
}
// Copy the replacement substring rpl.append(str, 0, pos);
memcpy(dest + i, rep, repLen < destLen - i ? repLen : destLen - i); rpl.append(rep);
i += repLen; str.remove_prefix(pos + old.size());
if (i >= destLen)
break;
src = next + oldLen;
} }
if (i < destLen) { if (rpl.length() > MAXSTRLEN) {
size_t srcLen = strlen(src);
// Copy anything after the last replaced substring
memcpy(dest + i, src, srcLen < destLen - i ? srcLen : destLen - i);
i += srcLen;
}
if (i >= destLen) {
warning(WARNING_LONG_STR, "STRRPL: String too long, got truncated\n"); warning(WARNING_LONG_STR, "STRRPL: String too long, got truncated\n");
i = destLen - 1; rpl.resize(MAXSTRLEN);
} }
dest[i] = '\0';
return rpl;
} }
static void strfmt( static std::string strfmt(
char *dest, std::string const &spec,
size_t destLen, std::vector<std::variant<uint32_t, std::string>> const &args
char const *spec,
std::vector<std::variant<uint32_t, std::string>> &args
) { ) {
size_t a = 0; std::string str;
size_t i = 0; size_t argIndex = 0;
char const *ptr = spec.c_str();
while (i < destLen) { while (str.length() <= MAXSTRLEN) {
int c = *spec++; int c = *ptr++;
if (c == '\0') { if (c == '\0') {
break; break;
} else if (c != '%') { } else if (c != '%') {
dest[i++] = c; str += c;
continue; continue;
} }
c = *spec++; c = *ptr++;
if (c == '%') { if (c == '%') {
dest[i++] = c; str += c;
continue; continue;
} }
@@ -2723,62 +2666,61 @@ static void strfmt(
fmt.useCharacter(c); fmt.useCharacter(c);
if (fmt.isFinished()) if (fmt.isFinished())
break; break;
c = *spec++; c = *ptr++;
} }
if (fmt.isEmpty()) { if (fmt.isEmpty()) {
error("STRFMT: Illegal '%%' at end of format string\n"); error("STRFMT: Illegal '%%' at end of format string\n");
dest[i++] = '%'; str += '%';
break; break;
} else if (!fmt.isValid()) {
error("STRFMT: Invalid format spec for argument %zu\n", a + 1);
dest[i++] = '%';
a++;
continue;
} else if (a >= args.size()) {
// Will warn after formatting is done.
dest[i++] = '%';
a++;
continue;
} }
std::variant<uint32_t, std::string> &arg = args[a++]; if (!fmt.isValid()) {
error("STRFMT: Invalid format spec for argument %zu\n", argIndex + 1);
str += '%';
} else if (argIndex >= args.size()) {
// Will warn after formatting is done.
str += '%';
} else {
static char buf[MAXSTRLEN + 1]; static char buf[MAXSTRLEN + 1];
std::visit( std::visit(
Visitor{ Visitor{
[&](uint32_t num) { fmt.printNumber(buf, sizeof(buf), num); }, [&](uint32_t n) { fmt.printNumber(buf, sizeof(buf), n); },
[&](std::string &str) { fmt.printString(buf, sizeof(buf), str.c_str()); }, [&](std::string const &s) { fmt.printString(buf, sizeof(buf), s.c_str()); },
}, },
arg args[argIndex]
); );
str.append(buf);
i += snprintf(&dest[i], destLen - i, "%s", buf);
} }
if (a < args.size()) argIndex++;
error("STRFMT: %zu unformatted argument(s)\n", args.size() - a); }
else if (a > args.size())
if (argIndex < args.size())
error("STRFMT: %zu unformatted argument(s)\n", args.size() - argIndex);
else if (argIndex > args.size())
error( error(
"STRFMT: Not enough arguments for format spec, got: %zu, need: %zu\n", args.size(), a "STRFMT: Not enough arguments for format spec, got: %zu, need: %zu\n",
args.size(),
argIndex
); );
if (i > destLen - 1) { if (str.length() > MAXSTRLEN) {
warning(WARNING_LONG_STR, "STRFMT: String too long, got truncated\n"); warning(WARNING_LONG_STR, "STRFMT: String too long, got truncated\n");
i = destLen - 1; str.resize(MAXSTRLEN);
} }
dest[i] = '\0'; return str;
} }
static void compoundAssignment(char const *symName, RPNCommand op, int32_t constValue) { static void compoundAssignment(std::string const &symName, RPNCommand op, int32_t constValue) {
Expression oldExpr, constExpr, newExpr; Expression oldExpr, constExpr, newExpr;
int32_t newValue; int32_t newValue;
rpn_Symbol(oldExpr, symName); rpn_Symbol(oldExpr, symName.c_str());
rpn_Number(constExpr, constValue); rpn_Number(constExpr, constValue);
rpn_BinaryOp(op, newExpr, std::move(oldExpr), constExpr); rpn_BinaryOp(op, newExpr, std::move(oldExpr), constExpr);
newValue = newExpr.getConstVal(); newValue = newExpr.getConstVal();
sym_AddVar(symName, newValue); sym_AddVar(symName.c_str(), newValue);
} }
static void failAssert(AssertionType type) { static void failAssert(AssertionType type) {