From a71e4086a2deb2810003a42ce6e553667b1c3c07 Mon Sep 17 00:00:00 2001 From: Sylvie <35663410+Rangi42@users.noreply.github.com> Date: Sat, 2 Mar 2024 13:21:28 -0500 Subject: [PATCH] Use `std::string_view` for macro bodies (#1326) This removes the last use of `strdup` This required making a lot of related pointers be `const`. That in turn conflicted with the need to `munmap()` a pointer eventually, which was similar to the need to eventually `free()` an `Expansion`'s contents, so I used the same solution of a `union`. That lets us normally use the `const` pointer for `const` correctness, and the non-`const` one for the not-really- mutating destruction cases. --- include/asm/fstack.hpp | 4 ++-- include/asm/lexer.hpp | 12 ++++++++---- include/asm/symbol.hpp | 24 ++++++++---------------- include/platform.hpp | 5 ----- src/asm/fstack.cpp | 8 ++++---- src/asm/lexer.cpp | 23 ++++++++++++----------- src/asm/symbol.cpp | 30 +++++++++++++----------------- 7 files changed, 47 insertions(+), 59 deletions(-) diff --git a/include/asm/fstack.hpp b/include/asm/fstack.hpp index 7fa6a4f4..71761e92 100644 --- a/include/asm/fstack.hpp +++ b/include/asm/fstack.hpp @@ -61,9 +61,9 @@ std::string *fstk_FindFile(char const *path); bool yywrap(); void fstk_RunInclude(char const *path); void fstk_RunMacro(char const *macroName, MacroArgs *args); -void fstk_RunRept(uint32_t count, int32_t reptLineNo, char *body, size_t size); +void fstk_RunRept(uint32_t count, int32_t reptLineNo, char const *body, size_t size); void fstk_RunFor(char const *symName, int32_t start, int32_t stop, int32_t step, - int32_t reptLineNo, char *body, size_t size); + int32_t reptLineNo, char const *body, size_t size); void fstk_StopRept(); bool fstk_Break(); diff --git a/include/asm/lexer.hpp b/include/asm/lexer.hpp index 0bcfb95a..0da3aafd 100644 --- a/include/asm/lexer.hpp +++ b/include/asm/lexer.hpp @@ -30,7 +30,7 @@ struct Expansion { std::optional name; union { char const *unowned; - char *owned; + char *owned; // Non-`const` only so it can be `free()`d } contents; size_t size; // Length of the contents size_t offset; // Cursor into the contents @@ -43,7 +43,10 @@ struct IfStackEntry { }; struct MmappedLexerState { - char *ptr; // Technically `const` during the lexer's execution + union { + char const *unreferenced; + char *referenced; // Non-`const` only so it can be `munmap()`ped + } ptr; size_t size; size_t offset; bool isReferenced; // If a macro in this file requires not unmapping it @@ -121,7 +124,8 @@ static inline void lexer_SetGfxDigits(char const digits[4]) // `path` is referenced, but not held onto..! bool lexer_OpenFile(LexerState &state, char const *path); -void lexer_OpenFileView(LexerState &state, char const *path, char *buf, size_t size, uint32_t lineNo); +void lexer_OpenFileView(LexerState &state, char const *path, char const *buf, size_t size, + uint32_t lineNo); void lexer_RestartRept(uint32_t lineNo); void lexer_CleanupState(LexerState &state); void lexer_Init(); @@ -138,7 +142,7 @@ void lexer_ReachELSEBlock(); struct CaptureBody { uint32_t lineNo; - char *body; + char const *body; size_t size; }; diff --git a/include/asm/symbol.hpp b/include/asm/symbol.hpp index dc1c353e..2278217b 100644 --- a/include/asm/symbol.hpp +++ b/include/asm/symbol.hpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "asm/section.hpp" @@ -23,12 +24,6 @@ enum SymbolType { SYM_REF // Forward reference to a label }; -// Only used in an anonymous union by `Symbol` -struct strValue { - size_t size; - char *value; -}; - struct Symbol; // For the `sym_IsPC` forward declaration bool sym_IsPC(Symbol const *sym); // For the inline `getSection` method @@ -43,14 +38,11 @@ struct Symbol { bool hasCallback; union { - // If isNumeric() - int32_t value; - int32_t (*numCallback)(); // If hasCallback - // For SYM_MACRO - strValue macro; - // For SYM_EQUS - strValue equs; - char const *(*strCallback)(); // If hasCallback + int32_t value; // If isNumeric() + int32_t (*numCallback)(); // If isNumeric() and hasCallback + std::string_view *macro; // For SYM_MACRO + std::string *equs; // For SYM_EQUS + char const *(*strCallback)(); // For SYM_EQUS if hasCallback }; uint32_t ID; // ID of the symbol in the object file (-1 if none) @@ -71,7 +63,7 @@ struct Symbol { int32_t getValue() const; uint32_t getConstantValue() const; - char const *getStringValue() const { return hasCallback ? strCallback() : equs.value; } + char const *getStringValue() const { return hasCallback ? strCallback() : equs->c_str(); } }; void sym_ForEach(void (*func)(Symbol *)); @@ -94,7 +86,7 @@ Symbol *sym_FindScopedSymbol(char const *symName); // Find a scoped symbol by name; do not return `@` or `_NARG` when they have no value Symbol *sym_FindScopedValidSymbol(char const *symName); Symbol const *sym_GetPC(); -Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char *body, size_t size); +Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char const *body, size_t size); Symbol *sym_Ref(char const *symName); Symbol *sym_AddString(char const *symName, char const *value); Symbol *sym_RedefString(char const *symName, char const *value); diff --git a/include/platform.hpp b/include/platform.hpp index 7483aaec..cf38cd97 100644 --- a/include/platform.hpp +++ b/include/platform.hpp @@ -14,11 +14,6 @@ # include #endif -// MSVC has deprecated strdup in favor of _strdup -#ifdef _MSC_VER -# define strdup _strdup -#endif - // MSVC prefixes the names of S_* macros with underscores, // and doesn't define any S_IS* macros; define them ourselves #ifdef _MSC_VER diff --git a/src/asm/fstack.cpp b/src/asm/fstack.cpp index 4597d8e7..bea78f7a 100644 --- a/src/asm/fstack.cpp +++ b/src/asm/fstack.cpp @@ -396,14 +396,14 @@ void fstk_RunMacro(char const *macroName, MacroArgs *args) Context &context = newContext(fileInfo); - lexer_OpenFileView(context.lexerState, "MACRO", macro->macro.value, macro->macro.size, + lexer_OpenFileView(context.lexerState, "MACRO", macro->macro->data(), macro->macro->size(), macro->fileLine); lexer_SetStateAtEOL(&context.lexerState); context.uniqueID = macro_UseNewUniqueID(); macro_UseNewArgs(args); } -static bool newReptContext(int32_t reptLineNo, char *body, size_t size) +static bool newReptContext(int32_t reptLineNo, char const *body, size_t size) { uint32_t reptDepth = contextStack.top().fileInfo->type == NODE_REPT ? contextStack.top().fileInfo->iters().size() @@ -432,7 +432,7 @@ static bool newReptContext(int32_t reptLineNo, char *body, size_t size) return true; } -void fstk_RunRept(uint32_t count, int32_t reptLineNo, char *body, size_t size) +void fstk_RunRept(uint32_t count, int32_t reptLineNo, char const *body, size_t size) { if (count == 0) return; @@ -443,7 +443,7 @@ void fstk_RunRept(uint32_t count, int32_t reptLineNo, char *body, size_t size) } void fstk_RunFor(char const *symName, int32_t start, int32_t stop, int32_t step, - int32_t reptLineNo, char *body, size_t size) + int32_t reptLineNo, char const *body, size_t size) { Symbol *sym = sym_AddVar(symName, start); diff --git a/src/asm/lexer.cpp b/src/asm/lexer.cpp index 0400b112..61240de4 100644 --- a/src/asm/lexer.cpp +++ b/src/asm/lexer.cpp @@ -389,9 +389,9 @@ bool lexer_OpenFile(LexerState &state, char const *path) if (!isStdin && fileInfo.st_size > 0) { // Try using `mmap` for better performance - // Important: do NOT assign to `state.mmap.ptr` directly, to avoid a cast that may - // alter an eventual `MAP_FAILED` value. It would also invalidate `state.cbuf.fd`, - // being on the other side of the union. + // Important: do NOT assign to `state.mmap.ptr.referenced` directly, to avoid a + // cast that may alter an eventual `MAP_FAILED` value. It would also invalidate + // `state.cbuf.fd`, being on the other side of the union. void *mappingAddr; mapFile(mappingAddr, state.cbuf.fd, state.path, fileInfo.st_size); @@ -404,7 +404,7 @@ bool lexer_OpenFile(LexerState &state, char const *path) state.isMmapped = true; state.mmap.isReferenced = false; // By default, a state isn't referenced - state.mmap.ptr = (char *)mappingAddr; + state.mmap.ptr.referenced = (char *)mappingAddr; assert(fileInfo.st_size >= 0); state.mmap.size = (size_t)fileInfo.st_size; state.mmap.offset = 0; @@ -433,12 +433,13 @@ bool lexer_OpenFile(LexerState &state, char const *path) return true; } -void lexer_OpenFileView(LexerState &state, char const *path, char *buf, size_t size, uint32_t lineNo) +void lexer_OpenFileView(LexerState &state, char const *path, char const *buf, size_t size, + uint32_t lineNo) { state.path = path; // Used to report read errors in `peekInternal` state.isFile = false; state.isMmapped = true; // It's not *really* mmap()ed, but it behaves the same - state.mmap.ptr = buf; + state.mmap.ptr.unreferenced = buf; state.mmap.size = size; state.mmap.offset = 0; @@ -471,7 +472,7 @@ void lexer_CleanupState(LexerState &state) if (!state.isMmapped) close(state.cbuf.fd); else if (state.isFile && !state.mmap.isReferenced) - munmap(state.mmap.ptr, state.mmap.size); + munmap(state.mmap.ptr.referenced, state.mmap.size); } void lexer_SetMode(enum LexerMode mode) @@ -669,10 +670,10 @@ static int peekInternal(uint8_t distance) PRIu8 " >= %u)\n", distance, LEXER_BUF_SIZE); if (lexerState->isMmapped) { - if (lexerState->mmap.offset + distance >= lexerState->mmap.size) - return EOF; + size_t index = lexerState->mmap.offset + distance; - return (unsigned char)lexerState->mmap.ptr[lexerState->mmap.offset + distance]; + return index < lexerState->mmap.size ? + (uint8_t)lexerState->mmap.ptr.unreferenced[index] : EOF; } if (lexerState->cbuf.nbChars <= distance) { @@ -2290,7 +2291,7 @@ static void startCapture(CaptureBody *capture) capture->lineNo = lexer_GetLineNo(); if (lexerState->isMmapped && lexerState->expansions.empty()) { - capture->body = &lexerState->mmap.ptr[lexerState->mmap.offset]; + capture->body = &lexerState->mmap.ptr.unreferenced[lexerState->mmap.offset]; } else { lexerState->captureCapacity = 128; // The initial size will be twice that assert(lexerState->captureBuf == nullptr); diff --git a/src/asm/symbol.cpp b/src/asm/symbol.cpp index 023d6d65..909ddc9e 100644 --- a/src/asm/symbol.cpp +++ b/src/asm/symbol.cpp @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: MIT */ -// Symboltable and macroargs stuff - #include #include #include @@ -11,6 +9,7 @@ #include #include #include +#include #include #include "asm/fixpoint.hpp" @@ -25,7 +24,6 @@ #include "error.hpp" #include "helpers.hpp" -#include "platform.hpp" // strdup #include "version.hpp" std::map symbols; @@ -141,14 +139,10 @@ static void fullSymbolName(char *output, size_t outputSize, static void assignStringSymbol(Symbol *sym, char const *value) { - char *string = strdup(value); - - if (!string) - fatalerror("No memory for string equate: %s\n", strerror(errno)); - sym->type = SYM_EQUS; - sym->equs.value = string; - sym->equs.size = strlen(string); + sym->equs = new(std::nothrow) std::string(value); + if (!sym->equs) + fatalerror("No memory for string equate: %s\n", strerror(errno)); } Symbol *sym_FindExactSymbol(char const *symName) @@ -215,8 +209,8 @@ void sym_Purge(std::string const &symName) if (sym->name == labelScope) sym_SetCurrentSymbolScope(nullptr); - // FIXME: this leaks sym->equs.value for SYM_EQUS and sym->macro.value for SYM_MACRO, - // but this can't free either of them because the expansion may be purging itself. + // FIXME: this leaks `sym->equs` for SYM_EQUS and `sym->macro` for SYM_MACRO, but + // this can't delete either of them because the expansion may be purging itself. symbols.erase(sym->name); // TODO: ideally, also unref the file stack nodes } @@ -379,8 +373,8 @@ Symbol *sym_RedefString(char const *symName, char const *value) } updateSymbolFilename(sym); - // FIXME: this leaks the previous `sym->equs.value`, but this can't `free(sym->equs.value)` - // because the expansion may be redefining itself. + // FIXME: this leaks the previous `sym->equs`, but this can't delete it because the + // expansion may be redefining itself. assignStringSymbol(sym, value); return sym; @@ -544,7 +538,7 @@ void sym_Export(char const *symName) } // Add a macro definition -Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char *body, size_t size) +Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char const *body, size_t size) { Symbol *sym = createNonrelocSymbol(symName, false); @@ -552,8 +546,10 @@ Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char *body, size_t return nullptr; sym->type = SYM_MACRO; - sym->macro.size = size; - sym->macro.value = body; + sym->macro = new(std::nothrow) std::string_view(body, size); + if (!sym->macro) + fatalerror("No memory for macro: %s\n", strerror(errno)); + setSymbolFilename(sym); // TODO: is this really necessary? // The symbol is created at the line after the `endm`, // override this with the actual definition line