Use std::shared_ptr<std::string> for lexed/parsed strings

This commit is contained in:
ISSOtm
2024-03-17 15:32:46 -04:00
committed by Sylvie
parent 412073774c
commit 9f239f6dcc
9 changed files with 199 additions and 264 deletions

View File

@@ -4,6 +4,7 @@
#define RGBDS_ASM_LEXER_H #define RGBDS_ASM_LEXER_H
#include <deque> #include <deque>
#include <memory>
#include <optional> #include <optional>
#include <stdint.h> #include <stdint.h>
#include <string> #include <string>
@@ -31,13 +32,10 @@ enum LexerMode {
struct Expansion { struct Expansion {
std::optional<std::string> name; std::optional<std::string> name;
union { std::shared_ptr<std::string> contents;
char const *unowned;
char *owned; // Non-`const` only so it can be `delete []`d
} contents;
size_t size; // Length of the contents
size_t offset; // Cursor into the contents size_t offset; // Cursor into the contents
bool owned; // Whether or not to free contents when this expansion is freed
size_t size() const { return contents->size(); }
}; };
struct IfStackEntry { struct IfStackEntry {
@@ -140,10 +138,6 @@ struct CaptureBody {
size_t size; size_t size;
}; };
struct String {
char string[MAXSTRLEN + 1];
};
void lexer_CheckRecursionDepth(); void lexer_CheckRecursionDepth();
uint32_t lexer_GetLineNo(); uint32_t lexer_GetLineNo();
uint32_t lexer_GetColNo(); uint32_t lexer_GetColNo();

View File

@@ -3,26 +3,22 @@
#ifndef RGBDS_MACRO_H #ifndef RGBDS_MACRO_H
#define RGBDS_MACRO_H #define RGBDS_MACRO_H
#include <memory>
#include <stdint.h> #include <stdint.h>
#include <stdlib.h>
#include <string> #include <string>
#include <vector> #include <vector>
#include "helpers.hpp"
#include "asm/warning.hpp"
struct MacroArgs { struct MacroArgs {
unsigned int shift; unsigned int shift;
std::vector<std::string> args; std::vector<std::shared_ptr<std::string>> args;
void append(std::string s); void append(std::shared_ptr<std::string> arg);
}; };
MacroArgs *macro_GetCurrentArgs(); MacroArgs *macro_GetCurrentArgs();
void macro_UseNewArgs(MacroArgs *args); void macro_UseNewArgs(MacroArgs *args);
char const *macro_GetArg(uint32_t i); std::shared_ptr<std::string> macro_GetArg(uint32_t i);
char const *macro_GetAllArgs(); std::shared_ptr<std::string> macro_GetAllArgs();
void macro_ShiftCurrentArgs(int32_t count); void macro_ShiftCurrentArgs(int32_t count);
uint32_t macro_NbArgs(); uint32_t macro_NbArgs();

View File

@@ -39,7 +39,7 @@ struct Symbol {
int32_t, // If isNumeric() int32_t, // If isNumeric()
int32_t (*)(), // If isNumeric() and has a callback int32_t (*)(), // If isNumeric() and has a callback
std::string_view *, // For SYM_MACRO std::string_view *, // For SYM_MACRO
std::string * // For SYM_EQUS std::shared_ptr<std::string> // For SYM_EQUS
> >
data; data;
@@ -62,7 +62,7 @@ struct Symbol {
int32_t getValue() const; int32_t getValue() const;
int32_t getOutputValue() const; int32_t getOutputValue() const;
std::string_view *getMacro() const; std::string_view *getMacro() const;
std::string *getEqus() const; std::shared_ptr<std::string> getEqus() const;
uint32_t getConstantValue() const; uint32_t getConstantValue() const;
}; };
@@ -90,8 +90,8 @@ Symbol *sym_FindScopedValidSymbol(std::string const &symName);
Symbol const *sym_GetPC(); Symbol const *sym_GetPC();
Symbol *sym_AddMacro(std::string const &symName, int32_t defLineNo, char const *body, size_t size); Symbol *sym_AddMacro(std::string const &symName, int32_t defLineNo, char const *body, size_t size);
Symbol *sym_Ref(std::string const &symName); Symbol *sym_Ref(std::string const &symName);
Symbol *sym_AddString(std::string const &symName, char const *value); Symbol *sym_AddString(std::string const &symName, std::shared_ptr<std::string> value);
Symbol *sym_RedefString(std::string const &symName, char const *value); Symbol *sym_RedefString(std::string const &symName, std::shared_ptr<std::string> value);
void sym_Purge(std::string const &symName); void sym_Purge(std::string const &symName);
void sym_Init(time_t now); void sym_Init(time_t now);

View File

@@ -5,6 +5,7 @@
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/types.h> #include <sys/types.h>
#include <algorithm>
#include <assert.h> #include <assert.h>
#include <ctype.h> #include <ctype.h>
#include <errno.h> #include <errno.h>
@@ -95,13 +96,13 @@ static void mapFile(void *&mappingAddr, int fd, std::string const &path, size_t
struct Token { struct Token {
int type; int type;
std::variant<std::monostate, uint32_t, String, std::string> value; std::variant<std::monostate, uint32_t, std::string> value;
Token() : type(T_(NUMBER)), value(std::monostate{}) {} Token() : type(T_(NUMBER)), value(std::monostate{}) {}
Token(int type_) : type(type_), value(std::monostate{}) {} Token(int type_) : type(type_), value(std::monostate{}) {}
Token(int type_, uint32_t value_) : type(type_), value(value_) {} Token(int type_, uint32_t value_) : type(type_), value(value_) {}
Token(int type_, String &value_) : type(type_), value(value_) {} Token(int type_, std::string const &value_) : type(type_), value(value_) {}
Token(int type_, std::string &value_) : type(type_), value(value_) {} Token(int type_, std::string &&value_) : type(type_), value(value_) {}
}; };
struct CaseInsensitive { struct CaseInsensitive {
@@ -490,23 +491,15 @@ void lexer_ToggleStringExpansion(bool enable) {
// Functions for the actual lexer to obtain characters // Functions for the actual lexer to obtain characters
static void beginExpansion(char const *str, bool owned, char const *name) { static void beginExpansion(std::shared_ptr<std::string> str, std::optional<std::string> name) {
size_t size = strlen(str);
// Do not expand empty strings
if (!size)
return;
if (name) if (name)
lexer_CheckRecursionDepth(); lexer_CheckRecursionDepth();
lexerState->expansions.push_front({ // Do not expand empty strings
.name = name ? std::optional<std::string>(name) : std::nullopt, if (str->empty())
.contents = {.unowned = str}, return;
.size = size,
.offset = 0, lexerState->expansions.push_front({.name = name, .contents = str, .offset = 0});
.owned = owned,
});
} }
void lexer_CheckRecursionDepth() { void lexer_CheckRecursionDepth() {
@@ -514,11 +507,6 @@ void lexer_CheckRecursionDepth() {
fatalerror("Recursion limit (%zu) exceeded\n", maxRecursionDepth); fatalerror("Recursion limit (%zu) exceeded\n", maxRecursionDepth);
} }
static void freeExpansion(Expansion &expansion) {
if (expansion.owned)
delete[] expansion.contents.owned;
}
static bool isMacroChar(char c) { static bool isMacroChar(char c) {
return c == '@' || c == '#' || c == '<' || (c >= '0' && c <= '9'); return c == '@' || c == '#' || c == '<' || (c >= '0' && c <= '9');
} }
@@ -587,34 +575,42 @@ static uint32_t readBracketedMacroArgNum() {
return num; return num;
} }
static char const *readMacroArg(char name) { static std::shared_ptr<std::string> readMacroArg(char name) {
char const *str = nullptr;
if (name == '@') { if (name == '@') {
auto maybeStr = fstk_GetUniqueIDStr(); auto str = fstk_GetUniqueIDStr();
str = maybeStr ? maybeStr->c_str() : nullptr; if (!str) {
error("'\\@' cannot be used outside of a macro or REPT/FOR block\n");
}
return str;
} else if (name == '#') { } else if (name == '#') {
str = macro_GetAllArgs(); auto str = macro_GetAllArgs();
if (!str) {
error("'\\#' cannot be used outside of a macro");
}
return str;
} else if (name == '<') { } else if (name == '<') {
uint32_t num = readBracketedMacroArgNum(); uint32_t num = readBracketedMacroArgNum();
if (num == 0) {
if (num == 0) // The error was already reported by `readBracketedMacroArgNum`.
return nullptr; return nullptr;
str = macro_GetArg(num); }
if (!str)
auto str = macro_GetArg(num);
if (!str) {
error("Macro argument '\\<%" PRIu32 ">' not defined\n", num); error("Macro argument '\\<%" PRIu32 ">' not defined\n", num);
}
return str; return str;
} else if (name == '0') { } else if (name == '0') {
error("Invalid macro argument '\\0'\n"); error("Invalid macro argument '\\0'\n");
return nullptr; return nullptr;
} else { } else {
assert(name > '0' && name <= '9'); assert(name > '0' && name <= '9');
str = macro_GetArg(name - '0'); auto str = macro_GetArg(name - '0');
} if (!str) {
if (!str)
error("Macro argument '\\%c' not defined\n", name); error("Macro argument '\\%c' not defined\n", name);
}
return str; return str;
}
} }
static size_t readInternal(BufferedLexerState &cbuf, size_t bufIndex, size_t nbChars) { static size_t readInternal(BufferedLexerState &cbuf, size_t bufIndex, size_t nbChars) {
@@ -632,12 +628,12 @@ static size_t readInternal(BufferedLexerState &cbuf, size_t bufIndex, size_t nbC
// We only need one character of lookahead, for macro arguments // We only need one character of lookahead, for macro arguments
static int peekInternal(uint8_t distance) { static int peekInternal(uint8_t distance) {
for (Expansion &exp : lexerState->expansions) { for (Expansion &exp : lexerState->expansions) {
// An expansion that has reached its end will have `exp->offset` == `exp->size`, // An expansion that has reached its end will have `exp->offset` == `exp->size()`,
// and `peekInternal` will continue with its parent // and `peekInternal` will continue with its parent
assert(exp.offset <= exp.size); assert(exp.offset <= exp.size());
if (distance < exp.size - exp.offset) if (distance < exp.size() - exp.offset)
return exp.contents.unowned[exp.offset + distance]; return (*exp.contents)[exp.offset + distance];
distance -= exp.size - exp.offset; distance -= exp.size() - exp.offset;
} }
if (distance >= LEXER_BUF_SIZE) if (distance >= LEXER_BUF_SIZE)
@@ -697,7 +693,7 @@ static int peekInternal(uint8_t distance) {
// forward declarations for peek // forward declarations for peek
static void shiftChar(); static void shiftChar();
static char const *readInterpolation(size_t depth); static std::shared_ptr<std::string> readInterpolation(size_t depth);
static int peek() { static int peek() {
int c = peekInternal(0); int c = peekInternal(0);
@@ -714,30 +710,32 @@ static int peek() {
if (isMacroChar(c)) { if (isMacroChar(c)) {
shiftChar(); shiftChar();
shiftChar(); shiftChar();
char const *str = readMacroArg(c);
// If the macro arg is invalid or an empty string, it cannot be std::shared_ptr<std::string> str = readMacroArg(c);
// expanded, so skip it and keep peeking. // If the macro arg is invalid or an empty string, it cannot be expanded,
if (!str || !str[0]) // so skip it and keep peeking.
if (!str || str->empty()) {
return peek(); return peek();
}
beginExpansion(str, c == '#', nullptr); beginExpansion(str, std::nullopt);
// Assuming macro args can't be recursive (I'll be damned if a way // Assuming macro args can't be recursive (I'll be damned if a way
// is found...), then we mark the entire macro arg as scanned. // is found...), then we mark the entire macro arg as scanned.
lexerState->macroArgScanDistance += strlen(str); lexerState->macroArgScanDistance += str->length();
c = str[0]; c = str->front();
} else { } else {
c = '\\'; c = '\\';
} }
} else if (c == '{' && !lexerState->disableInterpolation) { } else if (c == '{' && !lexerState->disableInterpolation) {
// If character is an open brace, do symbol interpolation // If character is an open brace, do symbol interpolation
shiftChar(); shiftChar();
char const *str = readInterpolation(0);
if (str && str[0]) if (auto str = readInterpolation(0); str) {
beginExpansion(str, false, str); beginExpansion(str, *str);
}
return peek(); return peek();
} }
@@ -758,12 +756,11 @@ restart:
// Advance within the current expansion // Advance within the current expansion
Expansion &expansion = lexerState->expansions.front(); Expansion &expansion = lexerState->expansions.front();
assert(expansion.offset <= expansion.size); assert(expansion.offset <= expansion.size());
expansion.offset++; expansion.offset++;
if (expansion.offset > expansion.size) { if (expansion.offset > expansion.size()) {
// When advancing would go past an expansion's end, free it, // When advancing would go past an expansion's end,
// move up to its parent, and try again to advance // move up to its parent and try again to advance
freeExpansion(expansion);
lexerState->expansions.pop_front(); lexerState->expansions.pop_front();
goto restart; goto restart;
} }
@@ -1124,7 +1121,7 @@ static Token readIdentifier(char firstChar) {
// Functions to read strings // Functions to read strings
static char const *readInterpolation(size_t depth) { static std::shared_ptr<std::string> readInterpolation(size_t depth) {
if (depth > maxRecursionDepth) if (depth > maxRecursionDepth)
fatalerror("Recursion limit (%zu) exceeded\n", maxRecursionDepth); fatalerror("Recursion limit (%zu) exceeded\n", maxRecursionDepth);
@@ -1142,10 +1139,9 @@ static char const *readInterpolation(size_t depth) {
if (c == '{') { // Nested interpolation if (c == '{') { // Nested interpolation
shiftChar(); shiftChar();
char const *str = readInterpolation(depth + 1); auto str = readInterpolation(depth + 1);
if (str && str[0]) beginExpansion(str, *str);
beginExpansion(str, false, str);
continue; // Restart, reading from the new buffer continue; // Restart, reading from the new buffer
} else if (c == EOF || c == '\r' || c == '\n' || c == '"') { } else if (c == EOF || c == '\r' || c == '\n' || c == '"') {
error("Missing }\n"); error("Missing }\n");
@@ -1175,67 +1171,50 @@ static char const *readInterpolation(size_t depth) {
if (!sym) { if (!sym) {
error("Interpolated symbol \"%s\" does not exist\n", fmtBuf.c_str()); error("Interpolated symbol \"%s\" does not exist\n", fmtBuf.c_str());
} else if (sym->type == SYM_EQUS) { } else if (sym->type == SYM_EQUS) {
static std::string buf; auto buf = std::make_shared<std::string>();
buf.clear(); fmt.appendString(*buf, *sym->getEqus());
fmt.appendString(buf, *sym->getEqus()); return buf;
return buf.c_str();
} else if (sym->isNumeric()) { } else if (sym->isNumeric()) {
static std::string buf; auto buf = std::make_shared<std::string>();
buf.clear(); fmt.appendNumber(*buf, sym->getConstantValue());
fmt.appendNumber(buf, sym->getConstantValue()); return buf;
return buf.c_str();
} else { } else {
error("Only numerical and string symbols can be interpolated\n"); error("Only numerical and string symbols can be interpolated\n");
} }
return nullptr; return nullptr;
} }
#define append_yylval_string(c) \ static void appendEscapedSubstring(std::string &yylval, std::string const &str) {
do { \ for (char c : str) {
/* Evaluate c exactly once in case it has side effects */ \
if (char v = (c); i < sizeof(yylval.string)) \
yylval.string[i++] = v; \
} while (0)
static size_t appendEscapedSubstring(String &yylval, char const *str, size_t i) {
// Copy one extra to flag overflow
while (*str) {
char c = *str++;
// Escape characters that need escaping // Escape characters that need escaping
switch (c) { switch (c) {
case '\\': case '\\':
case '"': case '"':
case '{': case '{':
append_yylval_string('\\'); yylval += '\\';
// fallthrough
default:
yylval += c;
break; break;
case '\n': case '\n':
append_yylval_string('\\'); yylval += "\\n";
c = 'n';
break; break;
case '\r': case '\r':
append_yylval_string('\\'); yylval += "\\r";
c = 'r';
break; break;
case '\t': case '\t':
append_yylval_string('\\'); yylval += "\\t";
c = 't';
break; break;
} }
append_yylval_string(c);
} }
return i;
} }
static void readString(String &yylval, bool raw) { static std::string readString(bool raw) {
lexerState->disableMacroArgs = true; lexerState->disableMacroArgs = true;
lexerState->disableInterpolation = true; lexerState->disableInterpolation = true;
size_t i = 0; std::string yylval;
bool multiline = false; bool multiline = false;
char const *str;
// We reach this function after reading a single quote, but we also support triple quotes // We reach this function after reading a single quote, but we also support triple quotes
if (peek() == '"') { if (peek() == '"') {
@@ -1278,7 +1257,7 @@ static void readString(String &yylval, bool raw) {
break; break;
shiftChar(); shiftChar();
if (peek() != '"') { if (peek() != '"') {
append_yylval_string('"'); yylval += '"';
break; break;
} }
shiftChar(); shiftChar();
@@ -1332,10 +1311,8 @@ static void readString(String &yylval, bool raw) {
case '9': case '9':
case '<': case '<':
shiftChar(); shiftChar();
str = readMacroArg(c); if (auto str = readMacroArg(c); str) {
if (str) { yylval.append(*str);
while (*str)
append_yylval_string(*str++);
} }
continue; // Do not copy an additional character continue; // Do not copy an additional character
@@ -1357,10 +1334,8 @@ static void readString(String &yylval, bool raw) {
// We'll be exiting the string scope, so re-enable expansions // We'll be exiting the string scope, so re-enable expansions
// (Not interpolations, since they're handled by the function itself...) // (Not interpolations, since they're handled by the function itself...)
lexerState->disableMacroArgs = false; lexerState->disableMacroArgs = false;
str = readInterpolation(0); if (auto interpolation = readInterpolation(0); interpolation) {
if (str) { yylval.append(*interpolation);
while (*str)
append_yylval_string(*str++);
} }
lexerState->disableMacroArgs = true; lexerState->disableMacroArgs = true;
continue; // Do not copy an additional character continue; // Do not copy an additional character
@@ -1368,35 +1343,35 @@ static void readString(String &yylval, bool raw) {
// Regular characters will just get copied // Regular characters will just get copied
} }
append_yylval_string(c); yylval += c;
} }
finish: finish:
if (i == sizeof(yylval.string)) { if (yylval.length() > MAXSTRLEN) {
i--;
warning(WARNING_LONG_STR, "String constant too long\n"); warning(WARNING_LONG_STR, "String constant too long\n");
yylval.resize(MAXSTRLEN);
} }
yylval.string[i] = '\0';
lexerState->disableMacroArgs = false; lexerState->disableMacroArgs = false;
lexerState->disableInterpolation = false; lexerState->disableInterpolation = false;
return yylval;
} }
static size_t appendStringLiteral(String &yylval, size_t i, bool raw) { static void appendStringLiteral(std::string &yylval, bool raw) {
lexerState->disableMacroArgs = true; lexerState->disableMacroArgs = true;
lexerState->disableInterpolation = true; lexerState->disableInterpolation = true;
bool multiline = false; bool multiline = false;
char const *str;
// We reach this function after reading a single quote, but we also support triple quotes // We reach this function after reading a single quote, but we also support triple quotes
append_yylval_string('"'); yylval += '"';
if (peek() == '"') { if (peek() == '"') {
append_yylval_string('"'); yylval += '"';
shiftChar(); shiftChar();
if (peek() == '"') { if (peek() == '"') {
// """ begins a multi-line string // """ begins a multi-line string
append_yylval_string('"'); yylval += '"';
shiftChar(); shiftChar();
multiline = true; multiline = true;
} else { } else {
@@ -1431,14 +1406,14 @@ static size_t appendStringLiteral(String &yylval, size_t i, bool raw) {
// Only """ ends a multi-line string // Only """ ends a multi-line string
if (peek() != '"') if (peek() != '"')
break; break;
append_yylval_string('"'); yylval += '"';
shiftChar(); shiftChar();
if (peek() != '"') if (peek() != '"')
break; break;
append_yylval_string('"'); yylval += '"';
shiftChar(); shiftChar();
} }
append_yylval_string('"'); yylval += '"';
goto finish; goto finish;
case '\\': // Character escape or macro arg case '\\': // Character escape or macro arg
@@ -1455,7 +1430,7 @@ static size_t appendStringLiteral(String &yylval, size_t i, bool raw) {
case 'r': case 'r':
case 't': case 't':
// Return that character unchanged // Return that character unchanged
append_yylval_string('\\'); yylval += '\\';
shiftChar(); shiftChar();
break; break;
@@ -1479,12 +1454,14 @@ static size_t appendStringLiteral(String &yylval, size_t i, bool raw) {
case '7': case '7':
case '8': case '8':
case '9': case '9':
case '<': case '<': {
shiftChar(); shiftChar();
str = readMacroArg(c); auto str = readMacroArg(c);
if (str && str[0]) if (str) {
i = appendEscapedSubstring(yylval, str, i); appendEscapedSubstring(yylval, *str);
}
continue; // Do not copy an additional character continue; // Do not copy an additional character
}
case EOF: // Can't really print that one case EOF: // Can't really print that one
error("Illegal character escape at end of input\n"); error("Illegal character escape at end of input\n");
@@ -1504,29 +1481,27 @@ static size_t appendStringLiteral(String &yylval, size_t i, bool raw) {
// We'll be exiting the string scope, so re-enable expansions // We'll be exiting the string scope, so re-enable expansions
// (Not interpolations, since they're handled by the function itself...) // (Not interpolations, since they're handled by the function itself...)
lexerState->disableMacroArgs = false; lexerState->disableMacroArgs = false;
str = readInterpolation(0); auto str = readInterpolation(0);
if (str && str[0]) if (str) {
i = appendEscapedSubstring(yylval, str, i); appendEscapedSubstring(yylval, *str);
}
lexerState->disableMacroArgs = true; lexerState->disableMacroArgs = true;
continue; // Do not copy an additional character continue; // Do not copy an additional character
// Regular characters will just get copied // Regular characters will just get copied
} }
append_yylval_string(c); yylval += c;
} }
finish: finish:
if (i == sizeof(yylval.string)) { if (yylval.length() > MAXSTRLEN) {
i--;
warning(WARNING_LONG_STR, "String constant too long\n"); warning(WARNING_LONG_STR, "String constant too long\n");
yylval.resize(MAXSTRLEN);
} }
yylval.string[i] = '\0';
lexerState->disableMacroArgs = false; lexerState->disableMacroArgs = false;
lexerState->disableInterpolation = false; lexerState->disableInterpolation = false;
return i;
} }
// Lexer core // Lexer core
@@ -1749,11 +1724,8 @@ static Token yylex_NORMAL() {
// Handle strings // Handle strings
case '"': { case '"':
String yylval; return Token(T_(STRING), readString(false));
readString(yylval, false);
return Token(T_(STRING), yylval);
}
// Handle newlines and EOF // Handle newlines and EOF
@@ -1779,9 +1751,7 @@ static Token yylex_NORMAL() {
case '#': case '#':
if (peek() == '"') { if (peek() == '"') {
shiftChar(); shiftChar();
String yylval; return Token(T_(STRING), readString(true));
readString(yylval, true);
return Token(T_(STRING), yylval);
} }
// fallthrough // fallthrough
@@ -1809,11 +1779,10 @@ static Token yylex_NORMAL() {
Symbol const *sym = sym_FindExactSymbol(std::get<std::string>(token.value)); Symbol const *sym = sym_FindExactSymbol(std::get<std::string>(token.value));
if (sym && sym->type == SYM_EQUS) { if (sym && sym->type == SYM_EQUS) {
char const *str = sym->getEqus()->c_str(); std::shared_ptr<std::string> str = sym->getEqus();
assert(str); assert(str);
if (str[0]) beginExpansion(str, sym->name);
beginExpansion(str, false, sym->name.c_str());
continue; // Restart, reading from the new buffer continue; // Restart, reading from the new buffer
} }
} }
@@ -1836,9 +1805,8 @@ static Token yylex_NORMAL() {
static Token yylex_RAW() { static Token yylex_RAW() {
// This is essentially a modified `appendStringLiteral` // This is essentially a modified `appendStringLiteral`
String yylval; std::string yylval;
size_t parenDepth = 0; size_t parenDepth = 0;
size_t i = 0;
int c; int c;
// Trim left whitespace (stops at a block comment) // Trim left whitespace (stops at a block comment)
@@ -1865,15 +1833,15 @@ static Token yylex_RAW() {
switch (c) { switch (c) {
case '"': // String literals inside macro args case '"': // String literals inside macro args
shiftChar(); shiftChar();
i = appendStringLiteral(yylval, i, false); appendStringLiteral(yylval, false);
break; break;
case '#': // Raw string literals inside macro args case '#': // Raw string literals inside macro args
append_yylval_string(c); yylval += c;
shiftChar(); shiftChar();
if (peek() == '"') { if (peek() == '"') {
shiftChar(); shiftChar();
i = appendStringLiteral(yylval, i, true); appendStringLiteral(yylval, true);
} }
break; break;
@@ -1893,7 +1861,7 @@ static Token yylex_RAW() {
discardBlockComment(); discardBlockComment();
continue; continue;
} }
append_yylval_string(c); // Append the slash yylval += c; // Append the slash
break; break;
case ',': // End of macro arg case ',': // End of macro arg
@@ -1958,21 +1926,20 @@ backslash:
default: // Regular characters will just get copied default: // Regular characters will just get copied
append: append:
append_yylval_string(c); yylval += c;
shiftChar(); shiftChar();
break; break;
} }
} }
finish: finish:
if (i == sizeof(yylval.string)) { if (yylval.length() > MAXSTRLEN) {
i--;
warning(WARNING_LONG_STR, "Macro argument too long\n"); warning(WARNING_LONG_STR, "Macro argument too long\n");
yylval.resize(MAXSTRLEN);
} }
// Trim right whitespace // Trim right whitespace
while (i && isWhitespace(yylval.string[i - 1])) auto rightPos = std::find_if_not(yylval.rbegin(), yylval.rend(), isWhitespace);
i--; yylval.resize(rightPos.base() - yylval.begin());
yylval.string[i] = '\0';
// Returning COMMAs to the parser would mean that two consecutive commas // Returning COMMAs to the parser would mean that two consecutive commas
// (i.e. an empty argument) need to return two different tokens (STRING // (i.e. an empty argument) need to return two different tokens (STRING
@@ -1989,7 +1956,7 @@ finish:
// an empty raw string before it). This will not be treated as a // an empty raw string before it). This will not be treated as a
// macro argument. To pass an empty last argument, use a second // macro argument. To pass an empty last argument, use a second
// trailing comma. // trailing comma.
if (i > 0) if (!yylval.empty())
return Token(T_(STRING), yylval); return Token(T_(STRING), yylval);
lexer_SetMode(LEXER_NORMAL); lexer_SetMode(LEXER_NORMAL);
@@ -2002,8 +1969,6 @@ finish:
return Token(T_(YYEOF)); return Token(T_(YYEOF));
} }
#undef append_yylval_string
// This function uses the fact that `if`, etc. constructs are only valid when // This function uses the fact that `if`, etc. constructs are only valid when
// there's nothing before them on their lines. This enables filtering // there's nothing before them on their lines. This enables filtering
// "meaningful" (= at line start) vs. "meaningless" (everything else) tokens. // "meaningful" (= at line start) vs. "meaningless" (everything else) tokens.
@@ -2213,8 +2178,6 @@ yy::parser::symbol_type yylex() {
if (auto *numValue = std::get_if<uint32_t>(&token.value); numValue) { if (auto *numValue = std::get_if<uint32_t>(&token.value); numValue) {
return yy::parser::symbol_type(token.type, *numValue); return yy::parser::symbol_type(token.type, *numValue);
} else if (auto *stringValue = std::get_if<String>(&token.value); stringValue) {
return yy::parser::symbol_type(token.type, *stringValue);
} else if (auto *strValue = std::get_if<std::string>(&token.value); strValue) { } else if (auto *strValue = std::get_if<std::string>(&token.value); strValue) {
return yy::parser::symbol_type(token.type, *strValue); return yy::parser::symbol_type(token.type, *strValue);
} else { } else {

View File

@@ -2,22 +2,24 @@
#include "asm/macro.hpp" #include "asm/macro.hpp"
#include <errno.h>
#include <inttypes.h>
#include <new>
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include <string>
#include "helpers.hpp"
#include "asm/warning.hpp"
#define MAXMACROARGS 99999 #define MAXMACROARGS 99999
static MacroArgs *macroArgs = nullptr; static MacroArgs *macroArgs = nullptr;
void MacroArgs::append(std::string s) { void MacroArgs::append(std::shared_ptr<std::string> arg) {
if (s.empty()) if (arg->empty())
warning(WARNING_EMPTY_MACRO_ARG, "Empty macro argument\n"); warning(WARNING_EMPTY_MACRO_ARG, "Empty macro argument\n");
if (args.size() == MAXMACROARGS) if (args.size() == MAXMACROARGS)
error("A maximum of " EXPAND_AND_STR(MAXMACROARGS) " arguments is allowed\n"); error("A maximum of " EXPAND_AND_STR(MAXMACROARGS) " arguments is allowed\n");
args.push_back(s); args.push_back(arg);
} }
MacroArgs *macro_GetCurrentArgs() { MacroArgs *macro_GetCurrentArgs() {
@@ -28,47 +30,41 @@ void macro_UseNewArgs(MacroArgs *args) {
macroArgs = args; macroArgs = args;
} }
char const *macro_GetArg(uint32_t i) { std::shared_ptr<std::string> macro_GetArg(uint32_t i) {
if (!macroArgs) if (!macroArgs)
return nullptr; return nullptr;
uint32_t realIndex = i + macroArgs->shift - 1; uint32_t realIndex = i + macroArgs->shift - 1;
return realIndex >= macroArgs->args.size() ? nullptr : macroArgs->args[realIndex].c_str(); return realIndex >= macroArgs->args.size() ? nullptr : macroArgs->args[realIndex];
} }
char const *macro_GetAllArgs() { std::shared_ptr<std::string> macro_GetAllArgs() {
if (!macroArgs) if (!macroArgs)
return nullptr; return nullptr;
size_t nbArgs = macroArgs->args.size(); size_t nbArgs = macroArgs->args.size();
if (macroArgs->shift >= nbArgs) if (macroArgs->shift >= nbArgs)
return ""; return std::make_shared<std::string>("");
size_t len = 0; size_t len = 0;
for (uint32_t i = macroArgs->shift; i < nbArgs; i++) for (uint32_t i = macroArgs->shift; i < nbArgs; i++)
len += macroArgs->args[i].length() + 1; // 1 for comma len += macroArgs->args[i]->length() + 1; // 1 for comma
char *str = new (std::nothrow) char[len + 1]; // 1 for '\0' auto str = std::make_shared<std::string>();
char *ptr = str; str->reserve(len + 1); // 1 for comma
if (!str)
fatalerror("Failed to allocate memory for expanding '\\#': %s\n", strerror(errno));
for (uint32_t i = macroArgs->shift; i < nbArgs; i++) { for (uint32_t i = macroArgs->shift; i < nbArgs; i++) {
std::string const &arg = macroArgs->args[i]; auto const &arg = macroArgs->args[i];
size_t n = arg.length();
memcpy(ptr, arg.c_str(), n); str->append(*arg);
ptr += n;
// Commas go between args and after a last empty arg // Commas go between args and after a last empty arg
if (i < nbArgs - 1 || n == 0) if (i < nbArgs - 1 || arg->empty())
*ptr++ = ','; // no space after comma str->push_back(','); // no space after comma
} }
*ptr = '\0';
return str; return str;
} }

View File

@@ -2,13 +2,8 @@
#include "asm/main.hpp" #include "asm/main.hpp"
#include <ctype.h>
#include <errno.h>
#include <float.h>
#include <inttypes.h>
#include <limits.h> #include <limits.h>
#include <math.h> #include <memory>
#include <stdarg.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <time.h> #include <time.h>
@@ -19,13 +14,9 @@
#include "version.hpp" #include "version.hpp"
#include "asm/charmap.hpp" #include "asm/charmap.hpp"
#include "asm/fixpoint.hpp"
#include "asm/format.hpp"
#include "asm/fstack.hpp" #include "asm/fstack.hpp"
#include "asm/lexer.hpp"
#include "asm/opt.hpp" #include "asm/opt.hpp"
#include "asm/output.hpp" #include "asm/output.hpp"
#include "asm/rpn.hpp"
#include "asm/symbol.hpp" #include "asm/symbol.hpp"
#include "asm/warning.hpp" #include "asm/warning.hpp"
@@ -186,9 +177,9 @@ int main(int argc, char *argv[]) {
equals = strchr(musl_optarg, '='); equals = strchr(musl_optarg, '=');
if (equals) { if (equals) {
*equals = '\0'; *equals = '\0';
sym_AddString(musl_optarg, equals + 1); sym_AddString(musl_optarg, std::make_shared<std::string>(equals + 1));
} else { } else {
sym_AddString(musl_optarg, "1"); sym_AddString(musl_optarg, std::make_shared<std::string>("1"));
} }
break; break;

View File

@@ -6,16 +6,13 @@
%code requires { %code requires {
#include <stdint.h> #include <stdint.h>
#include <inttypes.h>
#include <string> #include <string>
#include <variant> #include <variant>
#include <vector> #include <vector>
#include "asm/format.hpp"
#include "asm/lexer.hpp"
#include "asm/macro.hpp" #include "asm/macro.hpp"
#include "asm/rpn.hpp" #include "asm/rpn.hpp"
#include "asm/symbol.hpp" #include "asm/section.hpp"
#include "linkdefs.hpp" #include "linkdefs.hpp"
@@ -48,6 +45,7 @@
#include <algorithm> #include <algorithm>
#include <ctype.h> #include <ctype.h>
#include <errno.h> #include <errno.h>
#include <inttypes.h>
#include <new> #include <new>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
@@ -56,12 +54,14 @@
#include "asm/charmap.hpp" #include "asm/charmap.hpp"
#include "asm/fixpoint.hpp" #include "asm/fixpoint.hpp"
#include "asm/format.hpp"
#include "asm/fstack.hpp" #include "asm/fstack.hpp"
#include "asm/lexer.hpp"
#include "asm/main.hpp" #include "asm/main.hpp"
#include "asm/opt.hpp" #include "asm/opt.hpp"
#include "asm/output.hpp" #include "asm/output.hpp"
#include "asm/section.hpp" #include "asm/section.hpp"
#include "util.hpp" #include "asm/symbol.hpp"
#include "asm/warning.hpp" #include "asm/warning.hpp"
#include "extern/utf8decoder.hpp" #include "extern/utf8decoder.hpp"
@@ -129,7 +129,7 @@
%type <SectionSpec> sect_attrs %type <SectionSpec> sect_attrs
%token <int32_t> NUMBER "number" %token <int32_t> NUMBER "number"
%token <String> STRING "string" %token <std::string> STRING "string"
%token PERIOD "." %token PERIOD "."
%token COMMA "," %token COMMA ","
@@ -524,7 +524,7 @@ macro_args:
} }
| macro_args STRING { | macro_args STRING {
$$ = $1; $$ = $1;
$$->append($2.string); $$->append(std::make_shared<std::string>($2));
} }
; ;
@@ -663,7 +663,7 @@ equs:
$1.c_str(), $1.c_str(),
$1.c_str() $1.c_str()
); );
sym_AddString($1, $3.c_str()); sym_AddString($1, std::make_shared<std::string>($3));
} }
; ;
@@ -757,7 +757,7 @@ opt_list:
opt_list_entry: opt_list_entry:
STRING { STRING {
opt_Parse($1.string); opt_Parse($1.c_str());
} }
; ;
@@ -1077,13 +1077,13 @@ def_rl:
def_equs: def_equs:
def_id POP_EQUS string { def_id POP_EQUS string {
sym_AddString($1, $3.c_str()); sym_AddString($1, std::make_shared<std::string>($3));
} }
; ;
redef_equs: redef_equs:
redef_id POP_EQUS string { redef_id POP_EQUS string {
sym_RedefString($1, $3.c_str()); sym_RedefString($1, std::make_shared<std::string>($3));
} }
; ;
@@ -1553,7 +1553,7 @@ opt_q_arg:
string: string:
STRING { STRING {
$$ = $1.string; $$ = std::move($1);
} }
| OP_STRSUB LPAREN string COMMA const COMMA uconst RPAREN { | OP_STRSUB LPAREN string COMMA const COMMA uconst RPAREN {
size_t len = strlenUTF8($3); size_t len = strlenUTF8($3);
@@ -2655,19 +2655,16 @@ static std::string strfmt(
) { ) {
std::string str; std::string str;
size_t argIndex = 0; size_t argIndex = 0;
char const *ptr = spec.c_str();
while (str.length() <= MAXSTRLEN) { for (size_t i = 0; spec[i] != '\0' && str.length() <= MAXSTRLEN; ++i) {
int c = *ptr++; int c = spec[i];
if (c == '\0') { if (c != '%') {
break;
} else if (c != '%') {
str += c; str += c;
continue; continue;
} }
c = *ptr++; c = spec[++i];
if (c == '%') { if (c == '%') {
str += c; str += c;
@@ -2680,7 +2677,7 @@ static std::string strfmt(
fmt.useCharacter(c); fmt.useCharacter(c);
if (fmt.isFinished()) if (fmt.isFinished())
break; break;
c = *ptr++; c = spec[++i];
} }
if (fmt.isEmpty()) { if (fmt.isEmpty()) {

View File

@@ -79,9 +79,9 @@ std::string_view *Symbol::getMacro() const {
return std::get<std::string_view *>(data); return std::get<std::string_view *>(data);
} }
std::string *Symbol::getEqus() const { std::shared_ptr<std::string> Symbol::getEqus() const {
assert(std::holds_alternative<std::string *>(data)); assert(std::holds_alternative<std::shared_ptr<std::string>>(data));
return std::get<std::string *>(data); return std::get<std::shared_ptr<std::string>>(data);
} }
static void dumpFilename(Symbol const &sym) { static void dumpFilename(Symbol const &sym) {
@@ -121,14 +121,6 @@ static Symbol &createSymbol(std::string const &symName) {
return sym; return sym;
} }
static void assignStringSymbol(Symbol &sym, char const *value) {
std::string *equs = new (std::nothrow) std::string(value);
if (!equs)
fatalerror("No memory for string equate: %s\n", strerror(errno));
sym.type = SYM_EQUS;
sym.data = equs;
}
Symbol *sym_FindExactSymbol(std::string const &symName) { Symbol *sym_FindExactSymbol(std::string const &symName) {
auto search = symbols.find(symName); auto search = symbols.find(symName);
return search != symbols.end() ? &search->second : nullptr; return search != symbols.end() ? &search->second : nullptr;
@@ -310,21 +302,22 @@ Symbol *sym_RedefEqu(std::string const &symName, int32_t value) {
* of the string are enough: sym_AddString("M_PI"s, "3.1415"). This is the same * of the string are enough: sym_AddString("M_PI"s, "3.1415"). This is the same
* as ``` M_PI EQUS "3.1415" ``` * as ``` M_PI EQUS "3.1415" ```
*/ */
Symbol *sym_AddString(std::string const &symName, char const *value) { Symbol *sym_AddString(std::string const &symName, std::shared_ptr<std::string> str) {
Symbol *sym = createNonrelocSymbol(symName, false); Symbol *sym = createNonrelocSymbol(symName, false);
if (!sym) if (!sym)
return nullptr; return nullptr;
assignStringSymbol(*sym, value); sym->type = SYM_EQUS;
sym->data = str;
return sym; return sym;
} }
Symbol *sym_RedefString(std::string const &symName, char const *value) { Symbol *sym_RedefString(std::string const &symName, std::shared_ptr<std::string> str) {
Symbol *sym = sym_FindExactSymbol(symName); Symbol *sym = sym_FindExactSymbol(symName);
if (!sym) if (!sym)
return sym_AddString(symName, value); return sym_AddString(symName, str);
if (sym->type != SYM_EQUS) { if (sym->type != SYM_EQUS) {
if (sym->isDefined()) if (sym->isDefined())
@@ -339,9 +332,7 @@ Symbol *sym_RedefString(std::string const &symName, char const *value) {
} }
updateSymbolFilename(*sym); updateSymbolFilename(*sym);
// FIXME: this leaks the previous `sym->getEqus()`, but this can't delete it because the sym->data = str;
// expansion may be redefining itself.
assignStringSymbol(*sym, value);
return sym; return sym;
} }
@@ -564,7 +555,8 @@ void sym_Init(time_t now) {
_RSSymbol = sym_AddVar("_RS"s, 0); _RSSymbol = sym_AddVar("_RS"s, 0);
_RSSymbol->isBuiltin = true; _RSSymbol->isBuiltin = true;
sym_AddString("__RGBDS_VERSION__"s, get_package_version_string())->isBuiltin = true; sym_AddString("__RGBDS_VERSION__"s, std::make_shared<std::string>(get_package_version_string()))
->isBuiltin = true;
sym_AddEqu("__RGBDS_MAJOR__"s, PACKAGE_VERSION_MAJOR)->isBuiltin = true; sym_AddEqu("__RGBDS_MAJOR__"s, PACKAGE_VERSION_MAJOR)->isBuiltin = true;
sym_AddEqu("__RGBDS_MINOR__"s, PACKAGE_VERSION_MINOR)->isBuiltin = true; sym_AddEqu("__RGBDS_MINOR__"s, PACKAGE_VERSION_MINOR)->isBuiltin = true;
sym_AddEqu("__RGBDS_PATCH__"s, PACKAGE_VERSION_PATCH)->isBuiltin = true; sym_AddEqu("__RGBDS_PATCH__"s, PACKAGE_VERSION_PATCH)->isBuiltin = true;
@@ -598,10 +590,16 @@ void sym_Init(time_t now) {
time_utc time_utc
); );
sym_AddString("__TIME__"s, savedTIME)->isBuiltin = true; sym_AddString("__TIME__"s, std::make_shared<std::string>(savedTIME))->isBuiltin = true;
sym_AddString("__DATE__"s, savedDATE)->isBuiltin = true; sym_AddString("__DATE__"s, std::make_shared<std::string>(savedDATE))->isBuiltin = true;
sym_AddString("__ISO_8601_LOCAL__"s, savedTIMESTAMP_ISO8601_LOCAL)->isBuiltin = true; sym_AddString(
sym_AddString("__ISO_8601_UTC__"s, savedTIMESTAMP_ISO8601_UTC)->isBuiltin = true; "__ISO_8601_LOCAL__"s,
std::make_shared<std::string>(savedTIMESTAMP_ISO8601_LOCAL)
)->isBuiltin = true;
sym_AddString(
"__ISO_8601_UTC__"s,
std::make_shared<std::string>(savedTIMESTAMP_ISO8601_UTC)
)->isBuiltin = true;
sym_AddEqu("__UTC_YEAR__"s, time_utc->tm_year + 1900)->isBuiltin = true; sym_AddEqu("__UTC_YEAR__"s, time_utc->tm_year + 1900)->isBuiltin = true;
sym_AddEqu("__UTC_MONTH__"s, time_utc->tm_mon + 1)->isBuiltin = true; sym_AddEqu("__UTC_MONTH__"s, time_utc->tm_mon + 1)->isBuiltin = true;

View File

@@ -23,7 +23,7 @@ warning: unique-id.asm(14) -> unique-id.asm::m(8): [-Wuser]
_u4! _u4!
while expanding symbol "warn_unique" while expanding symbol "warn_unique"
error: unique-id.asm(15): error: unique-id.asm(15):
Macro argument '\@' not defined '\@' cannot be used outside of a macro or REPT/FOR block
while expanding symbol "warn_unique" while expanding symbol "warn_unique"
warning: unique-id.asm(15): [-Wuser] warning: unique-id.asm(15): [-Wuser]
! !