From cfa0adf295a33c14de4972c1bb8dfeebe53dca62 Mon Sep 17 00:00:00 2001 From: Rangi <35663410+Rangi42@users.noreply.github.com> Date: Mon, 25 May 2026 16:22:29 -0400 Subject: [PATCH] Intern strings used as identifiers (for labels, constants, macros, charmaps, etc) (#1980) --- Makefile | 1 + docs/ARCHITECTURE.md | 3 + include/asm/actions.hpp | 7 +- include/asm/charmap.hpp | 9 +-- include/asm/fstack.hpp | 7 +- include/asm/intern.hpp | 34 +++++++++ include/asm/lexer.hpp | 4 +- include/asm/rpn.hpp | 10 +-- include/asm/section.hpp | 3 +- include/asm/symbol.hpp | 41 ++++++----- src/CMakeLists.txt | 1 + src/asm/actions.cpp | 5 +- src/asm/charmap.cpp | 24 ++++--- src/asm/fstack.cpp | 17 +++-- src/asm/intern.cpp | 45 ++++++++++++ src/asm/lexer.cpp | 82 +++++++++++++--------- src/asm/main.cpp | 4 +- src/asm/output.cpp | 5 +- src/asm/parser.y | 74 ++++++++++---------- src/asm/rpn.cpp | 30 ++++---- src/asm/section.cpp | 4 +- src/asm/symbol.cpp | 149 +++++++++++++++++++++------------------- 22 files changed, 341 insertions(+), 218 deletions(-) create mode 100644 include/asm/intern.hpp create mode 100644 src/asm/intern.cpp diff --git a/Makefile b/Makefile index 80a15c6a..ac0f0ac9 100644 --- a/Makefile +++ b/Makefile @@ -65,6 +65,7 @@ rgbasm_obj := \ src/asm/fixpoint.o \ src/asm/format.o \ src/asm/fstack.o \ + src/asm/intern.o \ src/asm/lexer.o \ src/asm/macro.o \ src/asm/main.o \ diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 313fa32f..55d59839 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -180,6 +180,9 @@ These files have been copied ("vendored") from external authors and adapted for - **`fstack.cpp`:** Functions and data related to "fstack" nodes (the contents of top-level or `INCLUDE`d files, macro expansions, or `REPT`/`FOR` loop iterations) and their "contexts" (metadata that is only relevant while a node's content is being lexed and parsed). This file *owns* the `Context`s in its `contextStack` collection. Each of those `Context`s *owns* its `LexerState`, and *refers* to its `FileStackNode`, `uniqueIDStr`, and `macroArgs`. Each `FileStackNode` also *references* its `parent`. +- **`intern.cpp`:** + [Interned strings](https://en.wikipedia.org/wiki/String_interning), which are used for the names of keywords, symbols (labels, constants, variables, string constants, macros, etc), and charmaps. + This file *owns* the strings in its `internedStrings` collection, which its `internedIndexes` collection *references* by view and index. - **`lexer.cpp`:** Functions and data related to [lexing](https://en.wikipedia.org/wiki/Lexical_analysis) assembly source code into tokens, which can then be parsed. This file maintains static `lexerState` and `lexerStateEOL` pointers to `LexerState`s from the `Context`s in `fstack.cpp`. diff --git a/include/asm/actions.hpp b/include/asm/actions.hpp index 72d4173b..8133f122 100644 --- a/include/asm/actions.hpp +++ b/include/asm/actions.hpp @@ -13,7 +13,8 @@ #include "linkdefs.hpp" // AssertionType, RPNCommand -#include "asm/rpn.hpp" // Expression +#include "asm/intern.hpp" // InternedStr +#include "asm/rpn.hpp" // Expression struct AlignmentSpec { uint8_t alignment; @@ -54,8 +55,8 @@ std::string act_StringFormat( std::string const &spec, std::vector> const &args ); -std::string act_SectionName(std::string const &symName); +std::string act_SectionName(InternedStr symName); -void act_CompoundAssignment(std::string const &symName, RPNCommand op, int32_t constValue); +void act_CompoundAssignment(InternedStr symName, RPNCommand op, int32_t constValue); #endif // RGBDS_ASM_ACTIONS_HPP diff --git a/include/asm/charmap.hpp b/include/asm/charmap.hpp index 6bedaeeb..d3a4ccc1 100644 --- a/include/asm/charmap.hpp +++ b/include/asm/charmap.hpp @@ -10,13 +10,14 @@ #include #include +#include "asm/intern.hpp" + void charmap_Init(); bool charmap_ForEach( - void (*mapFunc)(std::string const &), - void (*charFunc)(std::string const &, std::vector) + void (*mapFunc)(InternedStr), void (*charFunc)(std::string const &, std::vector) ); -void charmap_New(std::string const &name, std::string const *baseName); -void charmap_Set(std::string const &name); +void charmap_New(InternedStr name, InternedStr const *baseName); +void charmap_Set(InternedStr name); void charmap_Push(); void charmap_Pop(); void charmap_CheckStack(); diff --git a/include/asm/fstack.hpp b/include/asm/fstack.hpp index defb9d2e..8b22a243 100644 --- a/include/asm/fstack.hpp +++ b/include/asm/fstack.hpp @@ -15,6 +15,7 @@ #include "linkdefs.hpp" +#include "asm/intern.hpp" #include "asm/lexer.hpp" struct FileStackNode { @@ -68,12 +69,10 @@ bool fstk_FailedOnMissingInclude(); bool yywrap(); bool fstk_RunInclude(std::string const &path, bool isQuiet); -void fstk_RunMacro( - std::string const ¯oName, std::shared_ptr macroArgs, bool isQuiet -); +void fstk_RunMacro(InternedStr macroName, std::shared_ptr macroArgs, bool isQuiet); void fstk_RunRept(uint32_t count, int32_t reptLineNo, ContentSpan const &span, bool isQuiet); void fstk_RunFor( - std::string const &symName, + InternedStr symName, int32_t start, int32_t stop, int32_t step, diff --git a/include/asm/intern.hpp b/include/asm/intern.hpp new file mode 100644 index 00000000..3518ee74 --- /dev/null +++ b/include/asm/intern.hpp @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: MIT + +#ifndef RGBDS_ASM_INTERN_HPP +#define RGBDS_ASM_INTERN_HPP + +#include +#include +#include +#include // hash + +class InternedStr { + size_t index; + +public: + constexpr InternedStr() : index(static_cast(-1)) {} + explicit constexpr InternedStr(size_t index_) : index(index_) {} + + std::string const &str() const; + char const *c_str() const { return str().c_str(); } + + bool operator==(InternedStr const &rhs) const { return index == rhs.index; } + + template + friend struct std::hash; +}; + +template<> +struct std::hash { + size_t operator()(InternedStr const &str) const { return std::hash{}(str.index); } +}; + +InternedStr intern(std::string_view str); + +#endif // RGBDS_ASM_INTERN_HPP diff --git a/include/asm/lexer.hpp b/include/asm/lexer.hpp index deabb2d7..8492ef11 100644 --- a/include/asm/lexer.hpp +++ b/include/asm/lexer.hpp @@ -14,6 +14,8 @@ #include "platform.hpp" // SSIZE_MAX +#include "asm/intern.hpp" + // This value is a compromise between `LexerState` allocation performance when reading the entire // file works, and buffering performance when it doesn't (e.g. when piping a file into RGBASM). static constexpr size_t LEXER_BUF_SIZE = 64; @@ -32,7 +34,7 @@ enum LexerMode { }; struct Expansion { - std::optional name; + std::optional name; std::shared_ptr contents; size_t offset; // Cursor into `contents` diff --git a/include/asm/rpn.hpp b/include/asm/rpn.hpp index b04c1b44..2a0a341c 100644 --- a/include/asm/rpn.hpp +++ b/include/asm/rpn.hpp @@ -10,16 +10,18 @@ #include "linkdefs.hpp" +#include "asm/intern.hpp" + struct Symbol; struct RPNValue { RPNCommand command; // The RPN_* command ID - std::variant data; // Data after the ID, if any + std::variant data; // Data after the ID, if any RPNValue(RPNCommand cmd); RPNValue(RPNCommand cmd, uint8_t val); RPNValue(RPNCommand cmd, uint32_t val); - RPNValue(RPNCommand cmd, std::string const &name); + RPNValue(RPNCommand cmd, InternedStr name); void appendEncoded(std::vector &buffer) const; }; @@ -40,8 +42,8 @@ struct Expression { bool isDiffConstant(Symbol const *symName) const; void makeNumber(uint32_t value); - void makeSymbol(std::string const &symName); - void makeBankSymbol(std::string const &symName); + void makeSymbol(InternedStr symName); + void makeBankSymbol(InternedStr symName); void makeBankSection(std::string const §Name); void makeSizeOfSection(std::string const §Name); void makeStartOfSection(std::string const §Name); diff --git a/include/asm/section.hpp b/include/asm/section.hpp index 0c5bcd53..544a2110 100644 --- a/include/asm/section.hpp +++ b/include/asm/section.hpp @@ -11,6 +11,7 @@ #include #include +#include "intern.hpp" #include "linkdefs.hpp" struct Expression; @@ -107,6 +108,6 @@ void sect_PushSection(); void sect_PopSection(); void sect_CheckStack(); -std::string sect_PushSectionFragmentLiteral(); +InternedStr sect_PushSectionFragmentLiteral(); #endif // RGBDS_ASM_SECTION_HPP diff --git a/include/asm/symbol.hpp b/include/asm/symbol.hpp index acbdd42a..886a80e4 100644 --- a/include/asm/symbol.hpp +++ b/include/asm/symbol.hpp @@ -10,6 +10,7 @@ #include #include +#include "asm/intern.hpp" #include "asm/lexer.hpp" #include "asm/section.hpp" @@ -26,7 +27,7 @@ struct Symbol; // Forward declaration for `sym_IsPC` bool sym_IsPC(Symbol const *sym); // Forward declaration for `getSection` struct Symbol { - std::string name; + InternedStr name; SymbolType type; bool isBuiltin; bool isExported; // Not relevant for SYM_MACRO or SYM_EQUS @@ -68,36 +69,34 @@ struct Symbol { uint32_t getConstantValue() const; }; -bool sym_IsDotScope(std::string const &symName); +bool sym_IsDotScope(InternedStr symName); void sym_ForEach(void (*callback)(Symbol &)); -Symbol *sym_AddLocalLabel(std::string const &symName); -Symbol *sym_AddLabel(std::string const &symName); +Symbol *sym_AddLocalLabel(InternedStr symName); +Symbol *sym_AddLabel(InternedStr symName); Symbol *sym_AddAnonLabel(); -std::string sym_MakeAnonLabelName(uint32_t ofs, bool neg); -void sym_Export(std::string const &symName); -Symbol *sym_AddEqu(std::string const &symName, int32_t value); -Symbol *sym_RedefEqu(std::string const &symName, int32_t value); -Symbol *sym_AddVar(std::string const &symName, int32_t value); +InternedStr sym_MakeAnonLabelName(uint32_t ofs, bool neg); +void sym_Export(InternedStr symName); +Symbol *sym_AddEqu(InternedStr symName, int32_t value); +Symbol *sym_RedefEqu(InternedStr symName, int32_t value); +Symbol *sym_AddVar(InternedStr symName, int32_t value); int32_t sym_GetRSValue(); void sym_SetRSValue(int32_t value); // Find a symbol by exact name, bypassing expansion checks -Symbol *sym_FindExactSymbol(std::string const &symName); +Symbol *sym_FindExactSymbol(InternedStr symName); // Find a symbol, possibly scoped, by name -Symbol *sym_FindScopedSymbol(std::string const &symName); +Symbol *sym_FindScopedSymbol(InternedStr symName); // Find a scoped symbol by name; do not return `@` or `_NARG` when they have no value -Symbol *sym_FindScopedValidSymbol(std::string const &symName); +Symbol *sym_FindScopedValidSymbol(InternedStr symName); Symbol const *sym_GetPC(); -Symbol *sym_AddMacro( - std::string const &symName, int32_t defLineNo, ContentSpan const &span, bool isQuiet -); -Symbol *sym_Ref(std::string const &symName); -Symbol *sym_AddString(std::string const &symName, std::shared_ptr value); -Symbol *sym_RedefString(std::string const &symName, std::shared_ptr value); -void sym_Purge(std::string const &symName); -bool sym_IsPurgedExact(std::string const &symName); -bool sym_IsPurgedScoped(std::string const &symName); +Symbol *sym_AddMacro(InternedStr symName, int32_t defLineNo, ContentSpan const &span, bool isQuiet); +Symbol *sym_Ref(InternedStr symName); +Symbol *sym_AddString(InternedStr symName, std::shared_ptr value); +Symbol *sym_RedefString(InternedStr symName, std::shared_ptr value); +void sym_Purge(InternedStr symName); +bool sym_IsPurgedExact(InternedStr symName); +bool sym_IsPurgedScoped(InternedStr symName); void sym_Init(time_t now); // Functions to save and restore the current label scopes. diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f35c87a3..0c808877 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -38,6 +38,7 @@ add_executable(rgbasm $ "asm/fixpoint.cpp" "asm/format.cpp" "asm/fstack.cpp" + "asm/intern.cpp" "asm/lexer.cpp" "asm/macro.cpp" "asm/main.cpp" diff --git a/src/asm/actions.cpp b/src/asm/actions.cpp index eb545c4f..c4a758ba 100644 --- a/src/asm/actions.cpp +++ b/src/asm/actions.cpp @@ -22,6 +22,7 @@ #include "asm/charmap.hpp" #include "asm/format.hpp" #include "asm/fstack.hpp" +#include "asm/intern.hpp" #include "asm/lexer.hpp" #include "asm/output.hpp" #include "asm/rpn.hpp" // Expression @@ -607,7 +608,7 @@ std::string act_StringFormat( return str; } -std::string act_SectionName(std::string const &symName) { +std::string act_SectionName(InternedStr symName) { Symbol *sym = sym_FindScopedValidSymbol(symName); if (!sym) { if (sym_IsPurgedScoped(symName)) { @@ -625,7 +626,7 @@ std::string act_SectionName(std::string const &symName) { return section->name; } -void act_CompoundAssignment(std::string const &symName, RPNCommand op, int32_t constValue) { +void act_CompoundAssignment(InternedStr symName, RPNCommand op, int32_t constValue) { Expression oldExpr, constExpr, newExpr; oldExpr.makeSymbol(symName); constExpr.makeNumber(constValue); diff --git a/src/asm/charmap.cpp b/src/asm/charmap.cpp index 5ac22fa8..6e000ad4 100644 --- a/src/asm/charmap.cpp +++ b/src/asm/charmap.cpp @@ -21,9 +21,10 @@ #include "itertools.hpp" // InsertionOrderedMap #include "util.hpp" +#include "asm/intern.hpp" #include "asm/warning.hpp" -#define DEFAULT_CHARMAP_NAME "main" +static InternedStr mainCharmapName; static bool compareNode(std::pair edge, char c) { return edge.first < c; @@ -50,7 +51,7 @@ struct CharmapNode { }; struct Charmap { - std::string name; + InternedStr name; std::vector nodes; // Trie of mappings (first node is reserved for the root node) size_t nextIndexOrAdd(size_t nodeIdx, char c) { @@ -88,18 +89,18 @@ bool forEachChar( return true; } -static InsertionOrderedMap charmaps; +static InsertionOrderedMap charmaps; static Charmap *currentCharmap; static std::stack charmapStack; void charmap_Init() { - charmap_New(DEFAULT_CHARMAP_NAME, nullptr); + mainCharmapName = intern("main"); + charmap_New(mainCharmapName, nullptr); } bool charmap_ForEach( - void (*mapFunc)(std::string const &), - void (*charFunc)(std::string const &, std::vector) + void (*mapFunc)(InternedStr), void (*charFunc)(std::string const &, std::vector) ) { for (Charmap const &charmap : charmaps) { std::map mappings; @@ -116,7 +117,7 @@ bool charmap_ForEach( return !charmaps.empty(); } -void charmap_New(std::string const &name, std::string const *baseName) { +void charmap_New(InternedStr name, InternedStr const *baseName) { std::optional baseIdx = std::nullopt; if (baseName != nullptr) { @@ -143,7 +144,7 @@ void charmap_New(std::string const &name, std::string const *baseName) { currentCharmap = &charmap; } -void charmap_Set(std::string const &name) { +void charmap_Set(InternedStr name) { if (auto index = charmaps.findIndex(name); index) { currentCharmap = &charmaps[*index]; } else { @@ -294,11 +295,12 @@ size_t charmap_ConvertNext(std::string_view &input, std::vector *output // Warn if this character is not mapped but any others are if (int firstChar = input[inputIdx]; charmap.nodes.size() > 1) { warning(WARNING_UNMAPPED_CHAR_1, "Unmapped character %s", printChar(firstChar)); - } else if (charmap.name != DEFAULT_CHARMAP_NAME) { + } else if (charmap.name != mainCharmapName) { warning( WARNING_UNMAPPED_CHAR_2, - "Unmapped character %s not in `" DEFAULT_CHARMAP_NAME "` charmap", - printChar(firstChar) + "Unmapped character %s not in `%s` charmap", + printChar(firstChar), + mainCharmapName.c_str() ); } diff --git a/src/asm/fstack.cpp b/src/asm/fstack.cpp index 3ae73961..7d363bc8 100644 --- a/src/asm/fstack.cpp +++ b/src/asm/fstack.cpp @@ -25,6 +25,7 @@ #include "platform.hpp" // strncasecmp #include "verbosity.hpp" +#include "asm/intern.hpp" #include "asm/lexer.hpp" #include "asm/macro.hpp" #include "asm/main.hpp" @@ -47,7 +48,7 @@ struct Context { bool isForLoop = false; int32_t forValue = 0; int32_t forStep = 0; - std::string forName{}; + InternedStr forName{}; }; static std::stack contextStack; @@ -309,7 +310,7 @@ static void } } fileInfoName.append(NODE_SEPARATOR); - fileInfoName.append(macro.name); + fileInfoName.append(macro.name.str()); auto fileInfo = std::make_shared(NODE_MACRO, fileInfoName, isQuiet); assume(!contextStack.empty()); // The top level context cannot be a MACRO @@ -389,9 +390,7 @@ bool fstk_RunInclude(std::string const &path, bool isQuiet) { return fstk_FileError(path, "`INCLUDE`"); } -void fstk_RunMacro( - std::string const ¯oName, std::shared_ptr macroArgs, bool isQuiet -) { +void fstk_RunMacro(InternedStr macroName, std::shared_ptr macroArgs, bool isQuiet) { auto makeSuggestion = [¯oName, ¯oArgs]() -> std::optional { std::shared_ptr arg = macroArgs->getArg(1); if (!arg) { @@ -402,14 +401,14 @@ void fstk_RunMacro( static char const *types[] = {"EQUS", "EQU", "RB", "RW", "RL", "="}; for (char const *type : types) { if (strncasecmp(str, type, strlen(type)) == 0) { - return "\"DEF "s + macroName + " " + type + " ...\""; + return "\"DEF "s + macroName.str() + " " + type + " ...\""; } } if (strncasecmp(str, "SET", literal_strlen("SET")) == 0) { - return "\"DEF "s + macroName + " = ...\""; + return "\"DEF "s + macroName.str() + " = ...\""; } if (str[0] == ':') { - return "a label \""s + macroName + (str[1] == ':' ? "::" : ":") + "\""; + return "a label \""s + macroName.str() + (str[1] == ':' ? "::" : ":") + "\""; } return std::nullopt; @@ -439,7 +438,7 @@ void fstk_RunRept(uint32_t count, int32_t reptLineNo, ContentSpan const &span, b } void fstk_RunFor( - std::string const &symName, + InternedStr symName, int32_t start, int32_t stop, int32_t step, diff --git a/src/asm/intern.cpp b/src/asm/intern.cpp new file mode 100644 index 00000000..efe6ce23 --- /dev/null +++ b/src/asm/intern.cpp @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: MIT + +#include "asm/intern.hpp" + +#include +#include // equal_to +#include +#include +#include +#include +#include + +#include "helpers.hpp" // assume +#include "verbosity.hpp" + +// Avoid `std::string` allocations when looking up heterogeneous values in `internedIndexes` +struct StringHash { + using is_transparent = void; + + size_t operator()(std::string_view str) const { return std::hash{}(str); } +}; + +// Use a `deque` not a `vector` to prevent reallocation so `internedIndexes` keys stay valid +static std::deque internedStrings; +// Keys are views of values in `internedStrings`; values are their corresponding indexes +static std::unordered_map> internedIndexes; + +std::string const &InternedStr::str() const { + assume(index != static_cast(-1)); + return internedStrings[index]; +} + +InternedStr intern(std::string_view str) { + if (auto search = internedIndexes.find(str); search != internedIndexes.end()) { + return InternedStr(search->second); + } + + size_t index = internedStrings.size(); + std::string &interned = internedStrings.emplace_back(str); + internedIndexes.emplace(interned, index); + + verbosePrint(VERB_TRACE, "Interned string \"%s\"\n", interned.c_str()); + + return InternedStr(index); +} diff --git a/src/asm/lexer.cpp b/src/asm/lexer.cpp index 9ca0bc67..2e2d6200 100644 --- a/src/asm/lexer.cpp +++ b/src/asm/lexer.cpp @@ -34,6 +34,7 @@ #include "asm/format.hpp" #include "asm/fstack.hpp" +#include "asm/intern.hpp" #include "asm/macro.hpp" #include "asm/main.hpp" #include "asm/rpn.hpp" @@ -47,7 +48,7 @@ struct Token { int type; - std::variant value; + std::variant value; Token() : type(T_(NUMBER)), value(std::monostate{}) { assume( @@ -63,15 +64,15 @@ struct Token { } Token(int type_, uint32_t value_) : type(type_), value(value_) { assume(type == T_(NUMBER)); } Token(int type_, std::string const &value_) : type(type_), value(std::move(value_)) { - assume( - type == T_(STRING) || type == T_(CHARACTER) || type == T_(SYMBOL) || type == T_(LABEL) - || type == T_(LOCAL) || type == T_(ANON) || type == T_(QMACRO) - ); + assume(type == T_(STRING) || type == T_(CHARACTER)); } Token(int type_, std::string &&value_) : type(type_), value(std::move(value_)) { + assume(type == T_(STRING) || type == T_(CHARACTER)); + } + Token(int type_, InternedStr value_) : type(type_), value(value_) { assume( - type == T_(STRING) || type == T_(CHARACTER) || type == T_(SYMBOL) || type == T_(LABEL) - || type == T_(LOCAL) || type == T_(ANON) || type == T_(QMACRO) + type == T_(SYMBOL) || type == T_(LABEL) || type == T_(LOCAL) || type == T_(ANON) + || type == T_(QMACRO) ); } }; @@ -519,7 +520,7 @@ void lexer_ToggleStringExpansion(bool enable) { // Functions for the actual lexer to obtain characters -static void beginExpansion(std::shared_ptr str, std::optional name) { +static void beginExpansion(std::shared_ptr str, std::optional name) { if (name) { lexer_CheckRecursionDepth(); } @@ -582,11 +583,13 @@ static uint32_t readBracketedMacroArgNum() { } } - std::string symName; + std::string builder; for (; continuesIdentifier(c); c = nextChar()) { - symName += c; + builder += c; } + InternedStr symName = intern(builder); + if (Symbol const *sym = sym_FindScopedValidSymbol(symName); !sym) { if (sym_IsPurgedScoped(symName)) { error("Bracketed symbol `%s` does not exist; it was purged", symName.c_str()); @@ -950,7 +953,7 @@ static void discardLineContinuation() { // Functions to read tokenizable values -static std::string readAnonLabelRef(char c) { +static InternedStr readAnonLabelRef(char c) { assume(c == '+' || c == '-'); // We come here having already peeked at one char, so no need to do it again @@ -1229,7 +1232,7 @@ static uint32_t readGfxConstant() { static Token readIdentifier(char firstChar, bool raw) { assume(startsIdentifier(firstChar)); - std::string identifier(1, firstChar); + std::string builder(1, firstChar); bool keywordBeforeLocal = false; int tokenType = firstChar == '.' ? T_(LOCAL) : T_(SYMBOL); @@ -1238,19 +1241,21 @@ static Token readIdentifier(char firstChar, bool raw) { // If the char was a dot, the identifier is a local label if (c == '.') { // Check for a keyword before a non-raw local label - if (!raw && tokenType != T_(LOCAL) && keywords.find(identifier) != keywords.end()) { + if (!raw && tokenType != T_(LOCAL) && keywords.find(builder) != keywords.end()) { keywordBeforeLocal = true; } tokenType = T_(LOCAL); } - identifier += c; + builder += c; } + InternedStr identifier = intern(builder); + // Check for a keyword if the identifier is not raw and not a local label if (!raw && tokenType != T_(LOCAL)) { - if (auto search = keywords.find(identifier); search != keywords.end()) { + if (auto search = keywords.find(builder); search != keywords.end()) { return Token(search->second); } } @@ -1278,7 +1283,7 @@ static std::pair> readInterpolation fatal("Recursion limit (%zu) exceeded", options.maxRecursionDepth); } - std::string identifier; + std::string builder; FormatSpec fmt{}; bool invalid = false; @@ -1299,15 +1304,15 @@ static std::pair> readInterpolation break; } else if (c == ':' && !fmt.isParsed()) { // Format spec, only once shiftChar(); - size_t n = fmt.parseSpec(identifier.c_str()); - if (!fmt.isValid() || n != identifier.length()) { - error("Invalid interpolation format spec \"%s\"", identifier.c_str()); + size_t n = fmt.parseSpec(builder.c_str()); + if (!fmt.isValid() || n != builder.length()) { + error("Invalid interpolation format spec \"%s\"", builder.c_str()); invalid = true; } - identifier.clear(); // Now that format has been set, restart at beginning of string. + builder.clear(); // Now that format has been set, restart at beginning of string. } else { shiftChar(); - identifier += c; + builder += c; } } @@ -1315,19 +1320,21 @@ static std::pair> readInterpolation return {nullptr, nullptr}; // Don't allow invalid interpolation to occur. } - if (identifier.starts_with('#')) { + if (builder.starts_with('#')) { // Skip a '#' raw symbol prefix, but after expanding any nested interpolations. - identifier.erase(0, 1); - } else if (keywords.find(identifier) != keywords.end()) { + builder.erase(0, 1); + } else if (keywords.find(builder) != keywords.end()) { // Don't allow symbols that alias keywords without a '#' prefix. error( "Interpolated symbol `%s` is a reserved keyword; add a '#' prefix to use it as a raw " "symbol", - identifier.c_str() + builder.c_str() ); return {nullptr, nullptr}; } + InternedStr identifier = intern(builder); + if (Symbol const *sym = sym_FindScopedValidSymbol(identifier); !sym || !sym->isDefined()) { if (sym_IsPurgedScoped(identifier)) { error("Interpolated symbol `%s` does not exist; it was purged", identifier.c_str()); @@ -1902,9 +1909,9 @@ static Token yylex_NORMAL() { return token; } - // `token` is either a `SYMBOL` or a `LOCAL`, and both have a `std::string` value. - assume(std::holds_alternative(token.value)); - std::string const &identifier = std::get(token.value); + // `token` is either a `SYMBOL` or a `LOCAL`, and both have an `InternedStr` value. + assume(std::holds_alternative(token.value)); + InternedStr identifier = std::get(token.value); // Raw symbols and local symbols cannot be string expansions if (!raw && token.type == T_(SYMBOL) && lexerState->enableStringExpansions) { @@ -2116,13 +2123,13 @@ static Token skipToLeadingKeyword( if (c == EOF) { return Token(T_(YYEOF)); } else if (isLetter(c)) { - std::string keyword(1, c); + std::string builder(1, c); shiftFn(); for (c = peekFn(); continuesIdentifier(c); c = peekFn()) { - keyword += c; + builder += c; shiftFn(); } - if (auto search = keywords.find(keyword); search != keywords.end()) { + if (auto search = keywords.find(builder); search != keywords.end()) { finalizeFn(); return Token(search->second); } @@ -2347,6 +2354,19 @@ yy::parser::symbol_type yylex() { } // LCOV_EXCL_STOP return yy::parser::symbol_type(token.type, std::get(token.value)); + } else if (std::holds_alternative(token.value)) { + // LCOV_EXCL_START + if (checkVerbosity(VERB_TRACE)) { + style_Set(stderr, STYLE_MAGENTA, false); + fprintf( + stderr, "Lexed `%s` token (interned ", yy::parser::symbol_type(token.type).name() + ); + verboseOutputString(std::get(token.value).str()); + fputs(")\n", stderr); + style_Reset(stderr); + } + // LCOV_EXCL_STOP + return yy::parser::symbol_type(token.type, std::get(token.value)); } else { // LCOV_EXCL_START verbosePrint(VERB_TRACE, "Lexed `%s` token\n", yy::parser::symbol_type(token.type).name()); diff --git a/src/asm/main.cpp b/src/asm/main.cpp index 38979f33..25ac3ff5 100644 --- a/src/asm/main.cpp +++ b/src/asm/main.cpp @@ -190,9 +190,9 @@ static void parseArg(int ch, char *arg) { char *equals = strchr(arg, '='); if (equals) { *equals = '\0'; - sym_AddString(arg, std::make_shared(equals + 1)); + sym_AddString(intern(arg), std::make_shared(equals + 1)); } else { - sym_AddString(arg, std::make_shared("1")); + sym_AddString(intern(arg), std::make_shared("1")); } break; } diff --git a/src/asm/output.cpp b/src/asm/output.cpp index f49fc0c1..e3aa3c16 100644 --- a/src/asm/output.cpp +++ b/src/asm/output.cpp @@ -20,6 +20,7 @@ #include "asm/charmap.hpp" #include "asm/fstack.hpp" +#include "asm/intern.hpp" #include "asm/lexer.hpp" #include "asm/main.hpp" #include "asm/rpn.hpp" @@ -107,7 +108,7 @@ static void writeSection(Section const §, FILE *file) { } static void writeSymbol(Symbol const &sym, FILE *file) { - putString(sym.name, file); + putString(sym.name.str(), file); if (!sym.isDefined()) { putc(SYMTYPE_IMPORT, file); } else { @@ -338,7 +339,7 @@ static bool dumpCharmaps(FILE *file) { // Characters are ordered by charmap, then by definition order return charmap_ForEach( - [](std::string const &name) { fprintf(charmapFile, "newcharmap %s\n", name.c_str()); }, + [](InternedStr name) { fprintf(charmapFile, "newcharmap %s\n", name.c_str()); }, [](std::string const &mapping, std::vector value) { fputs("charmap \"", charmapFile); dumpString(mapping, charmapFile); diff --git a/src/asm/parser.y b/src/asm/parser.y index 3dc2f9e2..1f801952 100644 --- a/src/asm/parser.y +++ b/src/asm/parser.y @@ -59,7 +59,7 @@ yy::parser::symbol_type yylex(); // Provided by lexer.cpp static auto handleSymbolByType( - std::string const &symName, + InternedStr symName, std::invocable auto numCallback, std::invocable auto strCallback ) { @@ -324,11 +324,11 @@ %token NUMBER "number" %token STRING "string" %token CHARACTER "character" -%token SYMBOL "symbol" -%token LABEL "label" -%token LOCAL "local label" -%token ANON "anonymous label" -%token QMACRO "quiet macro" +%token SYMBOL "symbol" +%token LABEL "label" +%token LOCAL "local label" +%token ANON "anonymous label" +%token QMACRO "quiet macro" /******************** Data types ********************/ @@ -355,23 +355,23 @@ %type string_literal %type strcat_args // Strings used for identifiers -%type def_id -%type redef_id -%type def_numeric -%type def_equ -%type redef_equ -%type def_set -%type def_rb -%type def_rw -%type def_rl -%type def_equs -%type redef_equs -%type scoped_sym +%type def_id +%type redef_id +%type def_numeric +%type def_equ +%type redef_equ +%type def_set +%type def_rb +%type def_rw +%type def_rl +%type def_equs +%type redef_equs +%type scoped_sym // `scoped_sym_no_anon` exists because anonymous labels usually count as "scoped symbols", but some // contexts treat anonymous labels and other labels/symbols differently, e.g. `purge` or `export`. -%type scoped_sym_no_anon -%type fragment_literal -%type fragment_literal_name +%type scoped_sym_no_anon +%type fragment_literal +%type fragment_literal_name // SM83 instruction parameters %type reg_r @@ -399,7 +399,7 @@ %type > ds_args %type for_args %type > macro_args -%type > purge_args +%type > purge_args %type sect_attrs %type sect_mod %type sect_type @@ -507,7 +507,7 @@ def_id: lexer_ToggleStringExpansion(false); } SYMBOL { lexer_ToggleStringExpansion(true); - $$ = std::move($3); + $$ = $3; } ; @@ -516,7 +516,7 @@ redef_id: lexer_ToggleStringExpansion(false); } SYMBOL { lexer_ToggleStringExpansion(true); - $$ = std::move($3); + $$ = $3; } ; @@ -912,40 +912,40 @@ endu: def_equ: def_id POP_EQU iconst { - $$ = std::move($1); + $$ = $1; sym_AddEqu($$, $3); } ; redef_equ: redef_id POP_EQU iconst { - $$ = std::move($1); + $$ = $1; sym_RedefEqu($$, $3); } ; def_set: def_id POP_EQUAL iconst { - $$ = std::move($1); + $$ = $1; sym_AddVar($$, $3); } | redef_id POP_EQUAL iconst { - $$ = std::move($1); + $$ = $1; sym_AddVar($$, $3); } | def_id compound_eq iconst { - $$ = std::move($1); + $$ = $1; act_CompoundAssignment($$, $2, $3); } | redef_id compound_eq iconst { - $$ = std::move($1); + $$ = $1; act_CompoundAssignment($$, $2, $3); } ; def_rb: def_id POP_RB rs_uconst { - $$ = std::move($1); + $$ = $1; uint32_t rs = sym_GetRSValue(); sym_AddEqu($$, rs); sym_SetRSValue(rs + $3); @@ -954,7 +954,7 @@ def_rb: def_rw: def_id POP_RW rs_uconst { - $$ = std::move($1); + $$ = $1; uint32_t rs = sym_GetRSValue(); sym_AddEqu($$, rs); sym_SetRSValue(rs + 2 * $3); @@ -963,7 +963,7 @@ def_rw: def_rl: def_id SM83_RL rs_uconst { - $$ = std::move($1); + $$ = $1; uint32_t rs = sym_GetRSValue(); sym_AddEqu($$, rs); sym_SetRSValue(rs + 4 * $3); @@ -972,14 +972,14 @@ def_rl: def_equs: def_id POP_EQUS string { - $$ = std::move($1); + $$ = $1; sym_AddString($$, std::make_shared($3)); } ; redef_equs: redef_id POP_EQUS string { - $$ = std::move($1); + $$ = $1; sym_RedefString($$, std::make_shared($3)); } ; @@ -988,7 +988,7 @@ purge: POP_PURGE { lexer_ToggleStringExpansion(false); } purge_args trailing_comma { - for (std::string &arg : $3) { + for (InternedStr arg : $3) { sym_Purge(arg); } lexer_ToggleStringExpansion(true); @@ -1254,7 +1254,7 @@ reloc_16bit: fragment_literal: LBRACKS fragment_literal_name asm_file RBRACKS { sect_PopSection(); - $$ = std::move($2); + $$ = $2; } ; diff --git a/src/asm/rpn.cpp b/src/asm/rpn.cpp index addc6b6f..c251e63e 100644 --- a/src/asm/rpn.cpp +++ b/src/asm/rpn.cpp @@ -37,7 +37,7 @@ Symbol const *Expression::symbolOf() const { if (rpn.size() != 1 || rpn[0].command != RPN_SYM) { return nullptr; } - return sym_FindScopedSymbol(std::get(rpn[0].data)); + return sym_FindScopedSymbol(std::get(rpn[0].data)); } bool Expression::isDiffConstant(Symbol const *sym) const { @@ -58,7 +58,7 @@ void Expression::makeNumber(uint32_t value) { data = static_cast(value); } -void Expression::makeSymbol(std::string const &symName) { +void Expression::makeSymbol(InternedStr symName) { assume(rpn.empty()); if (Symbol *sym = sym_FindScopedSymbol(symName); sym_IsPC(sym) && !sect_GetSymbolSection()) { error("PC has no value outside of a section"); @@ -69,8 +69,8 @@ void Expression::makeSymbol(std::string const &symName) { } else if (!sym || !sym->isConstant()) { data = sym_IsPC(sym) ? "PC is not constant at assembly time" : (sym && sym->isDefined() - ? "`"s + symName + "` is not constant at assembly time" - : "undefined symbol `"s + symName + "`") + ? "`"s + symName.str() + "` is not constant at assembly time" + : "undefined symbol `"s + symName.str() + "`") + (sym_IsPurgedScoped(symName) ? "; it was purged" : ""); sym = sym_Ref(symName); rpn.emplace_back(RPN_SYM, sym->name); @@ -79,7 +79,7 @@ void Expression::makeSymbol(std::string const &symName) { } } -void Expression::makeBankSymbol(std::string const &symName) { +void Expression::makeBankSymbol(InternedStr symName) { assume(rpn.empty()); if (Symbol const *sym = sym_FindScopedSymbol(symName); sym_IsPC(sym)) { // The @ symbol is treated differently. @@ -103,8 +103,8 @@ void Expression::makeBankSymbol(std::string const &symName) { data = static_cast(sym->getSection()->bank); } else { data = sym_IsPurgedScoped(symName) - ? "`"s + symName + "`'s bank is not known; it was purged" - : "`"s + symName + "`'s bank is not known"; + ? "`"s + symName.str() + "`'s bank is not known; it was purged" + : "`"s + symName.str() + "`'s bank is not known"; rpn.emplace_back(RPN_BANK_SYM, sym->name); } } @@ -116,7 +116,7 @@ void Expression::makeBankSection(std::string const §Name) { data = static_cast(sect->bank); } else { data = "Section \""s + sectName + "\"'s bank is not known"; - rpn.emplace_back(RPN_BANK_SECT, sectName); + rpn.emplace_back(RPN_BANK_SECT, intern(sectName)); } } @@ -126,7 +126,7 @@ void Expression::makeSizeOfSection(std::string const §Name) { data = static_cast(sect->size); } else { data = "Section \""s + sectName + "\"'s size is not known"; - rpn.emplace_back(RPN_SIZEOF_SECT, sectName); + rpn.emplace_back(RPN_SIZEOF_SECT, intern(sectName)); } } @@ -136,7 +136,7 @@ void Expression::makeStartOfSection(std::string const §Name) { data = static_cast(sect->org); } else { data = "Section \""s + sectName + "\"'s start is not known"; - rpn.emplace_back(RPN_STARTOF_SECT, sectName); + rpn.emplace_back(RPN_STARTOF_SECT, intern(sectName)); } } @@ -550,7 +550,7 @@ RPNValue::RPNValue(RPNCommand cmd, uint32_t val) : command(cmd), data(val) { assume(cmd == RPN_CONST); } -RPNValue::RPNValue(RPNCommand cmd, std::string const &name) : command(cmd), data(name) { +RPNValue::RPNValue(RPNCommand cmd, InternedStr name) : command(cmd), data(name) { assume( cmd == RPN_SYM || cmd == RPN_BANK_SYM || cmd == RPN_BANK_SECT || cmd == RPN_SIZEOF_SECT || cmd == RPN_STARTOF_SECT @@ -576,9 +576,9 @@ void RPNValue::appendEncoded(std::vector &buffer) const { case RPN_SYM: case RPN_BANK_SYM: { // The command ID is followed by a four-byte symbol ID - assume(std::holds_alternative(data)); + assume(std::holds_alternative(data)); // The symbol name is always written expanded - Symbol *sym = sym_FindExactSymbol(std::get(data)); + Symbol *sym = sym_FindExactSymbol(std::get(data)); out_RegisterSymbol(*sym); // Ensure that `sym->ID` is set buffer.push_back(sym->ID & 0xFF); buffer.push_back(sym->ID >> 8); @@ -591,8 +591,8 @@ void RPNValue::appendEncoded(std::vector &buffer) const { case RPN_SIZEOF_SECT: case RPN_STARTOF_SECT: { // The command ID is followed by a NUL-terminated section name string - assume(std::holds_alternative(data)); - std::string const &name = std::get(data); + assume(std::holds_alternative(data)); + std::string const &name = std::get(data).str(); buffer.reserve(buffer.size() + name.length() + 1); buffer.insert(buffer.end(), RANGE(name)); buffer.push_back('\0'); diff --git a/src/asm/section.cpp b/src/asm/section.cpp index f6ec4e74..7d6d5d70 100644 --- a/src/asm/section.cpp +++ b/src/asm/section.cpp @@ -1158,7 +1158,7 @@ void sect_EndSection() { sym_ResetCurrentLabelScopes(); } -std::string sect_PushSectionFragmentLiteral() { +InternedStr sect_PushSectionFragmentLiteral() { static uint64_t nextFragmentLiteralID = 0; // Like `requireCodeSection` but fatal @@ -1192,5 +1192,5 @@ std::string sect_PushSectionFragmentLiteral() { currentSection = sect; // Return a symbol ID to use for the address of this section fragment - return "$"s + std::to_string(nextFragmentLiteralID++); + return intern("$"s + std::to_string(nextFragmentLiteralID++)); } diff --git a/src/asm/symbol.cpp b/src/asm/symbol.cpp index 61ef06a4..1dd9b645 100644 --- a/src/asm/symbol.cpp +++ b/src/asm/symbol.cpp @@ -22,6 +22,7 @@ #include "version.hpp" #include "asm/fstack.hpp" +#include "asm/intern.hpp" #include "asm/lexer.hpp" #include "asm/macro.hpp" #include "asm/main.hpp" @@ -31,8 +32,8 @@ using namespace std::literals; -static std::unordered_map symbols; -static std::unordered_set purgedSymbols; +static std::unordered_map symbols; +static std::unordered_set purgedSymbols; static Symbol const *globalScope = nullptr; // Current section's global label scope static Symbol const *localScope = nullptr; // Current section's local label scope @@ -44,6 +45,10 @@ static Symbol *globalScopeSymbol; static Symbol *localScopeSymbol; static Symbol *RSSymbol; +static InternedStr PCName; +static InternedStr globalScopeName; +static InternedStr localScopeName; + static char savedTIME[256]; static char savedDATE[256]; static char savedTIMESTAMP_ISO8601_LOCAL[256]; @@ -53,10 +58,10 @@ bool sym_IsPC(Symbol const *sym) { return sym == PCSymbol; } -bool sym_IsDotScope(std::string const &symName) { +bool sym_IsDotScope(InternedStr symName) { // Label scopes `.` and `..` are the only nonlocal identifiers that start with a dot. // Three or more dots are considered a nonsensical local label. - return symName == "." || symName == ".."; + return symName == globalScopeName || symName == localScopeName; } void sym_ForEach(void (*callback)(Symbol &)) { @@ -76,9 +81,9 @@ static int32_t NARGCallback() { static std::shared_ptr SCOPECallback() { if (localScope) { - return std::make_shared(".."); + return std::make_shared(localScopeName.str()); } else if (globalScope) { - return std::make_shared("."); + return std::make_shared(globalScopeName.str()); } else { if (!sect_GetSymbolSection()) { error("`__SCOPE__` has no value outside of a section"); @@ -92,7 +97,7 @@ static std::shared_ptr globalScopeCallback() { error("`.` has no value outside of a label scope"); return std::make_shared(""); } - return std::make_shared(globalScope->name); + return std::make_shared(globalScope->name.str()); } static std::shared_ptr localScopeCallback() { @@ -100,7 +105,7 @@ static std::shared_ptr localScopeCallback() { error("`..` has no value outside of a local label scope"); return std::make_shared(""); } - return std::make_shared(localScope->name); + return std::make_shared(localScope->name.str()); } static int32_t PCCallback() { @@ -219,13 +224,13 @@ static void redefinedError(Symbol const &sym) { } } -static void assumeAlreadyExpanded(std::string const &symName) { +static void assumeAlreadyExpanded(InternedStr symName) { // Either the symbol name is `Global.local` or entirely '.'s (for scopes `.` and `..`), // but cannot be unqualified `.local` or more than two '.'s - assume(!symName.starts_with('.') || sym_IsDotScope(symName)); + assume(!symName.str().starts_with('.') || sym_IsDotScope(symName)); } -static Symbol &createSymbol(std::string const &symName) { +static Symbol &createSymbol(InternedStr symName) { assumeAlreadyExpanded(symName); static uint32_t nextDefIndex = 0; @@ -245,13 +250,13 @@ static Symbol &createSymbol(std::string const &symName) { return sym; } -static bool isAutoScoped(std::string const &symName) { +static bool isAutoScoped(InternedStr symName) { // `globalScope` should be global if it's defined - assume(!globalScope || globalScope->name.find('.') == std::string::npos); + assume(!globalScope || globalScope->name.str().find('.') == std::string::npos); // `localScope` should be qualified local if it's defined - assume(!localScope || localScope->name.find('.') != std::string::npos); + assume(!localScope || localScope->name.str().find('.') != std::string::npos); - size_t dotPos = symName.find('.'); + size_t dotPos = symName.str().find('.'); // If there are no dots, it's not a local label if (dotPos == std::string::npos) { @@ -264,12 +269,12 @@ static bool isAutoScoped(std::string const &symName) { } // Check for nothing after the dot - if (dotPos == symName.length() - 1) { + if (dotPos == symName.str().length() - 1) { fatal("`%s` is a nonsensical reference to an empty local label", symName.c_str()); } // Check for more than one dot - if (symName.find('.', dotPos + 1) != std::string::npos) { + if (symName.str().find('.', dotPos + 1) != std::string::npos) { fatal("`%s` is a nonsensical reference to a nested local label", symName.c_str()); } @@ -286,22 +291,22 @@ static bool isAutoScoped(std::string const &symName) { return true; } -static std::string expandedSymName(std::string const &symName) { - return isAutoScoped(symName) ? globalScope->name + symName : symName; +static InternedStr expandedSymName(InternedStr symName) { + return isAutoScoped(symName) ? intern(globalScope->name.str() + symName.str()) : symName; } -Symbol *sym_FindExactSymbol(std::string const &symName) { +Symbol *sym_FindExactSymbol(InternedStr symName) { assumeAlreadyExpanded(symName); auto search = symbols.find(symName); return search != symbols.end() ? &search->second : nullptr; } -Symbol *sym_FindScopedSymbol(std::string const &symName) { +Symbol *sym_FindScopedSymbol(InternedStr symName) { return sym_FindExactSymbol(expandedSymName(symName)); } -Symbol *sym_FindScopedValidSymbol(std::string const &symName) { +Symbol *sym_FindScopedValidSymbol(InternedStr symName) { Symbol *sym = sym_FindScopedSymbol(symName); // `@` has no value outside of a section @@ -332,7 +337,7 @@ Symbol const *sym_GetPC() { return PCSymbol; } -void sym_Purge(std::string const &symName) { +void sym_Purge(InternedStr symName) { Symbol *sym = sym_FindScopedValidSymbol(symName); if (!sym) { @@ -363,13 +368,13 @@ void sym_Purge(std::string const &symName) { } } -bool sym_IsPurgedExact(std::string const &symName) { +bool sym_IsPurgedExact(InternedStr symName) { assumeAlreadyExpanded(symName); return purgedSymbols.find(symName) != purgedSymbols.end(); } -bool sym_IsPurgedScoped(std::string const &symName) { +bool sym_IsPurgedScoped(InternedStr symName) { return sym_IsPurgedExact(expandedSymName(symName)); } @@ -405,9 +410,9 @@ void sym_SetCurrentLabelScopes(std::pair newScop localScope = std::get<1>(newScopes); // `globalScope` should be global if it's defined - assume(!globalScope || globalScope->name.find('.') == std::string::npos); + assume(!globalScope || globalScope->name.str().find('.') == std::string::npos); // `localScope` should be qualified local if it's defined - assume(!localScope || localScope->name.find('.') != std::string::npos); + assume(!localScope || localScope->name.str().find('.') != std::string::npos); } void sym_ResetCurrentLabelScopes() { @@ -415,7 +420,7 @@ void sym_ResetCurrentLabelScopes() { localScope = nullptr; } -static Symbol *createNonrelocSymbol(std::string const &symName, bool numeric) { +static Symbol *createNonrelocSymbol(InternedStr symName, bool numeric) { Symbol *sym = sym_FindExactSymbol(symName); if (!sym) { @@ -436,7 +441,7 @@ static Symbol *createNonrelocSymbol(std::string const &symName, bool numeric) { return sym; } -Symbol *sym_AddEqu(std::string const &symName, int32_t value) { +Symbol *sym_AddEqu(InternedStr symName, int32_t value) { Symbol *sym = createNonrelocSymbol(symName, true); if (!sym) { @@ -449,7 +454,7 @@ Symbol *sym_AddEqu(std::string const &symName, int32_t value) { return sym; } -Symbol *sym_RedefEqu(std::string const &symName, int32_t value) { +Symbol *sym_RedefEqu(InternedStr symName, int32_t value) { Symbol *sym = sym_FindExactSymbol(symName); if (!sym) { @@ -471,7 +476,7 @@ Symbol *sym_RedefEqu(std::string const &symName, int32_t value) { return sym; } -Symbol *sym_AddString(std::string const &symName, std::shared_ptr str) { +Symbol *sym_AddString(InternedStr symName, std::shared_ptr str) { Symbol *sym = createNonrelocSymbol(symName, false); if (!sym) { @@ -483,7 +488,7 @@ Symbol *sym_AddString(std::string const &symName, std::shared_ptr s return sym; } -Symbol *sym_RedefString(std::string const &symName, std::shared_ptr str) { +Symbol *sym_RedefString(InternedStr symName, std::shared_ptr str) { Symbol *sym = sym_FindExactSymbol(symName); if (!sym) { @@ -511,7 +516,7 @@ Symbol *sym_RedefString(std::string const &symName, std::shared_ptr return sym; } -Symbol *sym_AddVar(std::string const &symName, int32_t value) { +Symbol *sym_AddVar(InternedStr symName, int32_t value) { Symbol *sym = sym_FindExactSymbol(symName); if (!sym) { @@ -529,7 +534,7 @@ Symbol *sym_AddVar(std::string const &symName, int32_t value) { return sym; } -static Symbol *addLabel(std::string const &symName) { +static Symbol *addLabel(InternedStr symName) { assumeAlreadyExpanded(symName); Symbol *sym = sym_FindExactSymbol(symName); @@ -546,7 +551,7 @@ static Symbol *addLabel(std::string const &symName) { sym->type = SYM_LABEL; sym->data = static_cast(sect_GetSymbolOffset()); // Don't export anonymous labels - if (options.exportAll && !symName.starts_with('!')) { + if (options.exportAll && !symName.str().starts_with('!')) { sym->isExported = true; } sym->section = sect_GetSymbolSection(); @@ -558,9 +563,9 @@ static Symbol *addLabel(std::string const &symName) { return sym; } -Symbol *sym_AddLocalLabel(std::string const &symName) { +Symbol *sym_AddLocalLabel(InternedStr symName) { // The symbol name should be local, qualified or not - assume(symName.find('.') != std::string::npos); + assume(symName.str().find('.') != std::string::npos); Symbol *sym = addLabel(expandedSymName(symName)); @@ -571,9 +576,9 @@ Symbol *sym_AddLocalLabel(std::string const &symName) { return sym; } -Symbol *sym_AddLabel(std::string const &symName) { +Symbol *sym_AddLabel(InternedStr symName) { // The symbol name should be global - assume(symName.find('.') == std::string::npos); + assume(symName.str().find('.') == std::string::npos); Symbol *sym = addLabel(symName); @@ -596,12 +601,12 @@ Symbol *sym_AddAnonLabel() { // LCOV_EXCL_STOP } - std::string anon = sym_MakeAnonLabelName(0, true); // The direction is important! + InternedStr anon = sym_MakeAnonLabelName(0, true); // The direction is important! ++anonLabelID; return addLabel(anon); } -std::string sym_MakeAnonLabelName(uint32_t ofs, bool neg) { +InternedStr sym_MakeAnonLabelName(uint32_t ofs, bool neg) { uint32_t id = 0; if (neg) { @@ -632,11 +637,11 @@ std::string sym_MakeAnonLabelName(uint32_t ofs, bool neg) { } } - return "!"s + std::to_string(id); + return intern("!"s + std::to_string(id)); } -void sym_Export(std::string const &symName) { - if (symName.starts_with('!')) { +void sym_Export(InternedStr symName) { + if (symName.str().starts_with('!')) { // LCOV_EXCL_START // The parser does not accept anonymous labels for an `EXPORT` directive error("Cannot export anonymous label"); @@ -655,9 +660,8 @@ void sym_Export(std::string const &symName) { sym->isExported = true; } -Symbol *sym_AddMacro( - std::string const &symName, int32_t defLineNo, ContentSpan const &span, bool isQuiet -) { +Symbol * + sym_AddMacro(InternedStr symName, int32_t defLineNo, ContentSpan const &span, bool isQuiet) { Symbol *sym = createNonrelocSymbol(symName, false); if (!sym) { @@ -678,7 +682,7 @@ Symbol *sym_AddMacro( // Flag that a symbol is referenced in an RPN expression // and create it if it doesn't exist yet -Symbol *sym_Ref(std::string const &symName) { +Symbol *sym_Ref(InternedStr symName) { Symbol *sym = sym_FindScopedSymbol(symName); if (!sym) { @@ -691,41 +695,46 @@ Symbol *sym_Ref(std::string const &symName) { // Define the built-in symbols void sym_Init(time_t now) { - PCSymbol = &createSymbol("@"s); + PCName = intern("@"); + PCSymbol = &createSymbol(PCName); PCSymbol->type = SYM_LABEL; PCSymbol->data = PCCallback; PCSymbol->isBuiltin = true; - NARGSymbol = &createSymbol("_NARG"s); + NARGSymbol = &createSymbol(intern("_NARG")); NARGSymbol->type = SYM_EQU; NARGSymbol->data = NARGCallback; NARGSymbol->isBuiltin = true; - globalScopeSymbol = &createSymbol("."s); + globalScopeName = intern("."); + globalScopeSymbol = &createSymbol(globalScopeName); globalScopeSymbol->type = SYM_EQUS; globalScopeSymbol->data = globalScopeCallback; globalScopeSymbol->isBuiltin = true; - localScopeSymbol = &createSymbol(".."s); + localScopeName = intern(".."); + localScopeSymbol = &createSymbol(localScopeName); localScopeSymbol->type = SYM_EQUS; localScopeSymbol->data = localScopeCallback; localScopeSymbol->isBuiltin = true; - SCOPESymbol = &createSymbol("__SCOPE__"s); + SCOPESymbol = &createSymbol(intern("__SCOPE__")); SCOPESymbol->type = SYM_EQUS; SCOPESymbol->data = SCOPECallback; SCOPESymbol->isBuiltin = true; - RSSymbol = sym_AddVar("_RS"s, 0); + RSSymbol = sym_AddVar(intern("_RS"), 0); RSSymbol->isBuiltin = true; - sym_AddString("__RGBDS_VERSION__"s, std::make_shared(get_package_version_string())) + sym_AddString( + intern("__RGBDS_VERSION__"), std::make_shared(get_package_version_string()) + ) ->isBuiltin = true; - sym_AddEqu("__RGBDS_MAJOR__"s, PACKAGE_VERSION_MAJOR)->isBuiltin = true; - sym_AddEqu("__RGBDS_MINOR__"s, PACKAGE_VERSION_MINOR)->isBuiltin = true; - sym_AddEqu("__RGBDS_PATCH__"s, PACKAGE_VERSION_PATCH)->isBuiltin = true; + sym_AddEqu(intern("__RGBDS_MAJOR__"), PACKAGE_VERSION_MAJOR)->isBuiltin = true; + sym_AddEqu(intern("__RGBDS_MINOR__"), PACKAGE_VERSION_MINOR)->isBuiltin = true; + sym_AddEqu(intern("__RGBDS_PATCH__"), PACKAGE_VERSION_PATCH)->isBuiltin = true; #ifdef PACKAGE_VERSION_RC - sym_AddEqu("__RGBDS_RC__"s, PACKAGE_VERSION_RC)->isBuiltin = true; + sym_AddEqu(intern("__RGBDS_RC__"), PACKAGE_VERSION_RC)->isBuiltin = true; #endif // LCOV_EXCL_START @@ -756,7 +765,7 @@ void sym_Init(time_t now) { time_utc ); - Symbol *timeSymbol = &createSymbol("__TIME__"s); + Symbol *timeSymbol = &createSymbol(intern("__TIME__")); timeSymbol->type = SYM_EQUS; timeSymbol->data = []() { warning(WARNING_OBSOLETE, "`__TIME__` is deprecated; use `__ISO_8601_LOCAL__`"); @@ -764,7 +773,7 @@ void sym_Init(time_t now) { }; timeSymbol->isBuiltin = true; - Symbol *dateSymbol = &createSymbol("__DATE__"s); + Symbol *dateSymbol = &createSymbol(intern("__DATE__")); dateSymbol->type = SYM_EQUS; dateSymbol->data = []() { warning(WARNING_OBSOLETE, "`__DATE__` is deprecated; use `__ISO_8601_LOCAL__`"); @@ -773,16 +782,18 @@ void sym_Init(time_t now) { dateSymbol->isBuiltin = true; sym_AddString( - "__ISO_8601_LOCAL__"s, std::make_shared(savedTIMESTAMP_ISO8601_LOCAL) + intern("__ISO_8601_LOCAL__"), std::make_shared(savedTIMESTAMP_ISO8601_LOCAL) ) ->isBuiltin = true; - sym_AddString("__ISO_8601_UTC__"s, std::make_shared(savedTIMESTAMP_ISO8601_UTC)) + sym_AddString( + intern("__ISO_8601_UTC__"), std::make_shared(savedTIMESTAMP_ISO8601_UTC) + ) ->isBuiltin = true; - sym_AddEqu("__UTC_YEAR__"s, time_utc->tm_year + 1900)->isBuiltin = true; - sym_AddEqu("__UTC_MONTH__"s, time_utc->tm_mon + 1)->isBuiltin = true; - sym_AddEqu("__UTC_DAY__"s, time_utc->tm_mday)->isBuiltin = true; - sym_AddEqu("__UTC_HOUR__"s, time_utc->tm_hour)->isBuiltin = true; - sym_AddEqu("__UTC_MINUTE__"s, time_utc->tm_min)->isBuiltin = true; - sym_AddEqu("__UTC_SECOND__"s, time_utc->tm_sec)->isBuiltin = true; + sym_AddEqu(intern("__UTC_YEAR__"), time_utc->tm_year + 1900)->isBuiltin = true; + sym_AddEqu(intern("__UTC_MONTH__"), time_utc->tm_mon + 1)->isBuiltin = true; + sym_AddEqu(intern("__UTC_DAY__"), time_utc->tm_mday)->isBuiltin = true; + sym_AddEqu(intern("__UTC_HOUR__"), time_utc->tm_hour)->isBuiltin = true; + sym_AddEqu(intern("__UTC_MINUTE__"), time_utc->tm_min)->isBuiltin = true; + sym_AddEqu(intern("__UTC_SECOND__"), time_utc->tm_sec)->isBuiltin = true; }