Use std::string_view for macro bodies (#1326)

This removes the last use of `strdup`

This required making a lot of related pointers be `const`.
That in turn conflicted with the need to `munmap()` a pointer
eventually, which was similar to the need to eventually `free()`
an `Expansion`'s contents, so I used the same solution of a
`union`. That lets us normally use the `const` pointer for
`const` correctness, and the non-`const` one for the not-really-
mutating destruction cases.
This commit is contained in:
Sylvie
2024-03-02 13:21:28 -05:00
committed by GitHub
parent 2069a95e0f
commit a71e4086a2
7 changed files with 47 additions and 59 deletions

View File

@@ -61,9 +61,9 @@ std::string *fstk_FindFile(char const *path);
bool yywrap(); bool yywrap();
void fstk_RunInclude(char const *path); void fstk_RunInclude(char const *path);
void fstk_RunMacro(char const *macroName, MacroArgs *args); void fstk_RunMacro(char const *macroName, MacroArgs *args);
void fstk_RunRept(uint32_t count, int32_t reptLineNo, char *body, size_t size); void fstk_RunRept(uint32_t count, int32_t reptLineNo, char const *body, size_t size);
void fstk_RunFor(char const *symName, int32_t start, int32_t stop, int32_t step, void fstk_RunFor(char const *symName, int32_t start, int32_t stop, int32_t step,
int32_t reptLineNo, char *body, size_t size); int32_t reptLineNo, char const *body, size_t size);
void fstk_StopRept(); void fstk_StopRept();
bool fstk_Break(); bool fstk_Break();

View File

@@ -30,7 +30,7 @@ struct Expansion {
std::optional<std::string> name; std::optional<std::string> name;
union { union {
char const *unowned; char const *unowned;
char *owned; char *owned; // Non-`const` only so it can be `free()`d
} contents; } contents;
size_t size; // Length of the contents size_t size; // Length of the contents
size_t offset; // Cursor into the contents size_t offset; // Cursor into the contents
@@ -43,7 +43,10 @@ struct IfStackEntry {
}; };
struct MmappedLexerState { struct MmappedLexerState {
char *ptr; // Technically `const` during the lexer's execution union {
char const *unreferenced;
char *referenced; // Non-`const` only so it can be `munmap()`ped
} ptr;
size_t size; size_t size;
size_t offset; size_t offset;
bool isReferenced; // If a macro in this file requires not unmapping it bool isReferenced; // If a macro in this file requires not unmapping it
@@ -121,7 +124,8 @@ static inline void lexer_SetGfxDigits(char const digits[4])
// `path` is referenced, but not held onto..! // `path` is referenced, but not held onto..!
bool lexer_OpenFile(LexerState &state, char const *path); bool lexer_OpenFile(LexerState &state, char const *path);
void lexer_OpenFileView(LexerState &state, char const *path, char *buf, size_t size, uint32_t lineNo); void lexer_OpenFileView(LexerState &state, char const *path, char const *buf, size_t size,
uint32_t lineNo);
void lexer_RestartRept(uint32_t lineNo); void lexer_RestartRept(uint32_t lineNo);
void lexer_CleanupState(LexerState &state); void lexer_CleanupState(LexerState &state);
void lexer_Init(); void lexer_Init();
@@ -138,7 +142,7 @@ void lexer_ReachELSEBlock();
struct CaptureBody { struct CaptureBody {
uint32_t lineNo; uint32_t lineNo;
char *body; char const *body;
size_t size; size_t size;
}; };

View File

@@ -6,6 +6,7 @@
#include <stdint.h> #include <stdint.h>
#include <string> #include <string>
#include <string.h> #include <string.h>
#include <string_view>
#include <time.h> #include <time.h>
#include "asm/section.hpp" #include "asm/section.hpp"
@@ -23,12 +24,6 @@ enum SymbolType {
SYM_REF // Forward reference to a label SYM_REF // Forward reference to a label
}; };
// Only used in an anonymous union by `Symbol`
struct strValue {
size_t size;
char *value;
};
struct Symbol; // For the `sym_IsPC` forward declaration struct Symbol; // For the `sym_IsPC` forward declaration
bool sym_IsPC(Symbol const *sym); // For the inline `getSection` method bool sym_IsPC(Symbol const *sym); // For the inline `getSection` method
@@ -43,14 +38,11 @@ struct Symbol {
bool hasCallback; bool hasCallback;
union { union {
// If isNumeric() int32_t value; // If isNumeric()
int32_t value; int32_t (*numCallback)(); // If isNumeric() and hasCallback
int32_t (*numCallback)(); // If hasCallback std::string_view *macro; // For SYM_MACRO
// For SYM_MACRO std::string *equs; // For SYM_EQUS
strValue macro; char const *(*strCallback)(); // For SYM_EQUS if hasCallback
// For SYM_EQUS
strValue equs;
char const *(*strCallback)(); // If hasCallback
}; };
uint32_t ID; // ID of the symbol in the object file (-1 if none) uint32_t ID; // ID of the symbol in the object file (-1 if none)
@@ -71,7 +63,7 @@ struct Symbol {
int32_t getValue() const; int32_t getValue() const;
uint32_t getConstantValue() const; uint32_t getConstantValue() const;
char const *getStringValue() const { return hasCallback ? strCallback() : equs.value; } char const *getStringValue() const { return hasCallback ? strCallback() : equs->c_str(); }
}; };
void sym_ForEach(void (*func)(Symbol *)); void sym_ForEach(void (*func)(Symbol *));
@@ -94,7 +86,7 @@ Symbol *sym_FindScopedSymbol(char const *symName);
// Find a scoped symbol by name; do not return `@` or `_NARG` when they have no value // Find a scoped symbol by name; do not return `@` or `_NARG` when they have no value
Symbol *sym_FindScopedValidSymbol(char const *symName); Symbol *sym_FindScopedValidSymbol(char const *symName);
Symbol const *sym_GetPC(); Symbol const *sym_GetPC();
Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char *body, size_t size); Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char const *body, size_t size);
Symbol *sym_Ref(char const *symName); Symbol *sym_Ref(char const *symName);
Symbol *sym_AddString(char const *symName, char const *value); Symbol *sym_AddString(char const *symName, char const *value);
Symbol *sym_RedefString(char const *symName, char const *value); Symbol *sym_RedefString(char const *symName, char const *value);

View File

@@ -14,11 +14,6 @@
# include <strings.h> # include <strings.h>
#endif #endif
// MSVC has deprecated strdup in favor of _strdup
#ifdef _MSC_VER
# define strdup _strdup
#endif
// MSVC prefixes the names of S_* macros with underscores, // MSVC prefixes the names of S_* macros with underscores,
// and doesn't define any S_IS* macros; define them ourselves // and doesn't define any S_IS* macros; define them ourselves
#ifdef _MSC_VER #ifdef _MSC_VER

View File

@@ -396,14 +396,14 @@ void fstk_RunMacro(char const *macroName, MacroArgs *args)
Context &context = newContext(fileInfo); Context &context = newContext(fileInfo);
lexer_OpenFileView(context.lexerState, "MACRO", macro->macro.value, macro->macro.size, lexer_OpenFileView(context.lexerState, "MACRO", macro->macro->data(), macro->macro->size(),
macro->fileLine); macro->fileLine);
lexer_SetStateAtEOL(&context.lexerState); lexer_SetStateAtEOL(&context.lexerState);
context.uniqueID = macro_UseNewUniqueID(); context.uniqueID = macro_UseNewUniqueID();
macro_UseNewArgs(args); macro_UseNewArgs(args);
} }
static bool newReptContext(int32_t reptLineNo, char *body, size_t size) static bool newReptContext(int32_t reptLineNo, char const *body, size_t size)
{ {
uint32_t reptDepth = contextStack.top().fileInfo->type == NODE_REPT uint32_t reptDepth = contextStack.top().fileInfo->type == NODE_REPT
? contextStack.top().fileInfo->iters().size() ? contextStack.top().fileInfo->iters().size()
@@ -432,7 +432,7 @@ static bool newReptContext(int32_t reptLineNo, char *body, size_t size)
return true; return true;
} }
void fstk_RunRept(uint32_t count, int32_t reptLineNo, char *body, size_t size) void fstk_RunRept(uint32_t count, int32_t reptLineNo, char const *body, size_t size)
{ {
if (count == 0) if (count == 0)
return; return;
@@ -443,7 +443,7 @@ void fstk_RunRept(uint32_t count, int32_t reptLineNo, char *body, size_t size)
} }
void fstk_RunFor(char const *symName, int32_t start, int32_t stop, int32_t step, void fstk_RunFor(char const *symName, int32_t start, int32_t stop, int32_t step,
int32_t reptLineNo, char *body, size_t size) int32_t reptLineNo, char const *body, size_t size)
{ {
Symbol *sym = sym_AddVar(symName, start); Symbol *sym = sym_AddVar(symName, start);

View File

@@ -389,9 +389,9 @@ bool lexer_OpenFile(LexerState &state, char const *path)
if (!isStdin && fileInfo.st_size > 0) { if (!isStdin && fileInfo.st_size > 0) {
// Try using `mmap` for better performance // Try using `mmap` for better performance
// Important: do NOT assign to `state.mmap.ptr` directly, to avoid a cast that may // Important: do NOT assign to `state.mmap.ptr.referenced` directly, to avoid a
// alter an eventual `MAP_FAILED` value. It would also invalidate `state.cbuf.fd`, // cast that may alter an eventual `MAP_FAILED` value. It would also invalidate
// being on the other side of the union. // `state.cbuf.fd`, being on the other side of the union.
void *mappingAddr; void *mappingAddr;
mapFile(mappingAddr, state.cbuf.fd, state.path, fileInfo.st_size); mapFile(mappingAddr, state.cbuf.fd, state.path, fileInfo.st_size);
@@ -404,7 +404,7 @@ bool lexer_OpenFile(LexerState &state, char const *path)
state.isMmapped = true; state.isMmapped = true;
state.mmap.isReferenced = false; // By default, a state isn't referenced state.mmap.isReferenced = false; // By default, a state isn't referenced
state.mmap.ptr = (char *)mappingAddr; state.mmap.ptr.referenced = (char *)mappingAddr;
assert(fileInfo.st_size >= 0); assert(fileInfo.st_size >= 0);
state.mmap.size = (size_t)fileInfo.st_size; state.mmap.size = (size_t)fileInfo.st_size;
state.mmap.offset = 0; state.mmap.offset = 0;
@@ -433,12 +433,13 @@ bool lexer_OpenFile(LexerState &state, char const *path)
return true; return true;
} }
void lexer_OpenFileView(LexerState &state, char const *path, char *buf, size_t size, uint32_t lineNo) void lexer_OpenFileView(LexerState &state, char const *path, char const *buf, size_t size,
uint32_t lineNo)
{ {
state.path = path; // Used to report read errors in `peekInternal` state.path = path; // Used to report read errors in `peekInternal`
state.isFile = false; state.isFile = false;
state.isMmapped = true; // It's not *really* mmap()ed, but it behaves the same state.isMmapped = true; // It's not *really* mmap()ed, but it behaves the same
state.mmap.ptr = buf; state.mmap.ptr.unreferenced = buf;
state.mmap.size = size; state.mmap.size = size;
state.mmap.offset = 0; state.mmap.offset = 0;
@@ -471,7 +472,7 @@ void lexer_CleanupState(LexerState &state)
if (!state.isMmapped) if (!state.isMmapped)
close(state.cbuf.fd); close(state.cbuf.fd);
else if (state.isFile && !state.mmap.isReferenced) else if (state.isFile && !state.mmap.isReferenced)
munmap(state.mmap.ptr, state.mmap.size); munmap(state.mmap.ptr.referenced, state.mmap.size);
} }
void lexer_SetMode(enum LexerMode mode) void lexer_SetMode(enum LexerMode mode)
@@ -669,10 +670,10 @@ static int peekInternal(uint8_t distance)
PRIu8 " >= %u)\n", distance, LEXER_BUF_SIZE); PRIu8 " >= %u)\n", distance, LEXER_BUF_SIZE);
if (lexerState->isMmapped) { if (lexerState->isMmapped) {
if (lexerState->mmap.offset + distance >= lexerState->mmap.size) size_t index = lexerState->mmap.offset + distance;
return EOF;
return (unsigned char)lexerState->mmap.ptr[lexerState->mmap.offset + distance]; return index < lexerState->mmap.size ?
(uint8_t)lexerState->mmap.ptr.unreferenced[index] : EOF;
} }
if (lexerState->cbuf.nbChars <= distance) { if (lexerState->cbuf.nbChars <= distance) {
@@ -2290,7 +2291,7 @@ static void startCapture(CaptureBody *capture)
capture->lineNo = lexer_GetLineNo(); capture->lineNo = lexer_GetLineNo();
if (lexerState->isMmapped && lexerState->expansions.empty()) { if (lexerState->isMmapped && lexerState->expansions.empty()) {
capture->body = &lexerState->mmap.ptr[lexerState->mmap.offset]; capture->body = &lexerState->mmap.ptr.unreferenced[lexerState->mmap.offset];
} else { } else {
lexerState->captureCapacity = 128; // The initial size will be twice that lexerState->captureCapacity = 128; // The initial size will be twice that
assert(lexerState->captureBuf == nullptr); assert(lexerState->captureBuf == nullptr);

View File

@@ -1,7 +1,5 @@
/* SPDX-License-Identifier: MIT */ /* SPDX-License-Identifier: MIT */
// Symboltable and macroargs stuff
#include <assert.h> #include <assert.h>
#include <errno.h> #include <errno.h>
#include <inttypes.h> #include <inttypes.h>
@@ -11,6 +9,7 @@
#include <stdio.h> #include <stdio.h>
#include <string> #include <string>
#include <string.h> #include <string.h>
#include <string_view>
#include <time.h> #include <time.h>
#include "asm/fixpoint.hpp" #include "asm/fixpoint.hpp"
@@ -25,7 +24,6 @@
#include "error.hpp" #include "error.hpp"
#include "helpers.hpp" #include "helpers.hpp"
#include "platform.hpp" // strdup
#include "version.hpp" #include "version.hpp"
std::map<std::string, Symbol> symbols; std::map<std::string, Symbol> symbols;
@@ -141,14 +139,10 @@ static void fullSymbolName(char *output, size_t outputSize,
static void assignStringSymbol(Symbol *sym, char const *value) static void assignStringSymbol(Symbol *sym, char const *value)
{ {
char *string = strdup(value);
if (!string)
fatalerror("No memory for string equate: %s\n", strerror(errno));
sym->type = SYM_EQUS; sym->type = SYM_EQUS;
sym->equs.value = string; sym->equs = new(std::nothrow) std::string(value);
sym->equs.size = strlen(string); if (!sym->equs)
fatalerror("No memory for string equate: %s\n", strerror(errno));
} }
Symbol *sym_FindExactSymbol(char const *symName) Symbol *sym_FindExactSymbol(char const *symName)
@@ -215,8 +209,8 @@ void sym_Purge(std::string const &symName)
if (sym->name == labelScope) if (sym->name == labelScope)
sym_SetCurrentSymbolScope(nullptr); sym_SetCurrentSymbolScope(nullptr);
// FIXME: this leaks sym->equs.value for SYM_EQUS and sym->macro.value for SYM_MACRO, // FIXME: this leaks `sym->equs` for SYM_EQUS and `sym->macro` for SYM_MACRO, but
// but this can't free either of them because the expansion may be purging itself. // this can't delete either of them because the expansion may be purging itself.
symbols.erase(sym->name); symbols.erase(sym->name);
// TODO: ideally, also unref the file stack nodes // TODO: ideally, also unref the file stack nodes
} }
@@ -379,8 +373,8 @@ Symbol *sym_RedefString(char const *symName, char const *value)
} }
updateSymbolFilename(sym); updateSymbolFilename(sym);
// FIXME: this leaks the previous `sym->equs.value`, but this can't `free(sym->equs.value)` // FIXME: this leaks the previous `sym->equs`, but this can't delete it because the
// because the expansion may be redefining itself. // expansion may be redefining itself.
assignStringSymbol(sym, value); assignStringSymbol(sym, value);
return sym; return sym;
@@ -544,7 +538,7 @@ void sym_Export(char const *symName)
} }
// Add a macro definition // Add a macro definition
Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char *body, size_t size) Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char const *body, size_t size)
{ {
Symbol *sym = createNonrelocSymbol(symName, false); Symbol *sym = createNonrelocSymbol(symName, false);
@@ -552,8 +546,10 @@ Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char *body, size_t
return nullptr; return nullptr;
sym->type = SYM_MACRO; sym->type = SYM_MACRO;
sym->macro.size = size; sym->macro = new(std::nothrow) std::string_view(body, size);
sym->macro.value = body; if (!sym->macro)
fatalerror("No memory for macro: %s\n", strerror(errno));
setSymbolFilename(sym); // TODO: is this really necessary? setSymbolFilename(sym); // TODO: is this really necessary?
// The symbol is created at the line after the `endm`, // The symbol is created at the line after the `endm`,
// override this with the actual definition line // override this with the actual definition line