Use std::string_view for macro bodies (#1326)

This removes the last use of `strdup`

This required making a lot of related pointers be `const`.
That in turn conflicted with the need to `munmap()` a pointer
eventually, which was similar to the need to eventually `free()`
an `Expansion`'s contents, so I used the same solution of a
`union`. That lets us normally use the `const` pointer for
`const` correctness, and the non-`const` one for the not-really-
mutating destruction cases.
This commit is contained in:
Sylvie
2024-03-02 13:21:28 -05:00
committed by GitHub
parent 2069a95e0f
commit a71e4086a2
7 changed files with 47 additions and 59 deletions

View File

@@ -61,9 +61,9 @@ std::string *fstk_FindFile(char const *path);
bool yywrap();
void fstk_RunInclude(char const *path);
void fstk_RunMacro(char const *macroName, MacroArgs *args);
void fstk_RunRept(uint32_t count, int32_t reptLineNo, char *body, size_t size);
void fstk_RunRept(uint32_t count, int32_t reptLineNo, char const *body, size_t size);
void fstk_RunFor(char const *symName, int32_t start, int32_t stop, int32_t step,
int32_t reptLineNo, char *body, size_t size);
int32_t reptLineNo, char const *body, size_t size);
void fstk_StopRept();
bool fstk_Break();

View File

@@ -30,7 +30,7 @@ struct Expansion {
std::optional<std::string> name;
union {
char const *unowned;
char *owned;
char *owned; // Non-`const` only so it can be `free()`d
} contents;
size_t size; // Length of the contents
size_t offset; // Cursor into the contents
@@ -43,7 +43,10 @@ struct IfStackEntry {
};
struct MmappedLexerState {
char *ptr; // Technically `const` during the lexer's execution
union {
char const *unreferenced;
char *referenced; // Non-`const` only so it can be `munmap()`ped
} ptr;
size_t size;
size_t offset;
bool isReferenced; // If a macro in this file requires not unmapping it
@@ -121,7 +124,8 @@ static inline void lexer_SetGfxDigits(char const digits[4])
// `path` is referenced, but not held onto..!
bool lexer_OpenFile(LexerState &state, char const *path);
void lexer_OpenFileView(LexerState &state, char const *path, char *buf, size_t size, uint32_t lineNo);
void lexer_OpenFileView(LexerState &state, char const *path, char const *buf, size_t size,
uint32_t lineNo);
void lexer_RestartRept(uint32_t lineNo);
void lexer_CleanupState(LexerState &state);
void lexer_Init();
@@ -138,7 +142,7 @@ void lexer_ReachELSEBlock();
struct CaptureBody {
uint32_t lineNo;
char *body;
char const *body;
size_t size;
};

View File

@@ -6,6 +6,7 @@
#include <stdint.h>
#include <string>
#include <string.h>
#include <string_view>
#include <time.h>
#include "asm/section.hpp"
@@ -23,12 +24,6 @@ enum SymbolType {
SYM_REF // Forward reference to a label
};
// Only used in an anonymous union by `Symbol`
struct strValue {
size_t size;
char *value;
};
struct Symbol; // For the `sym_IsPC` forward declaration
bool sym_IsPC(Symbol const *sym); // For the inline `getSection` method
@@ -43,14 +38,11 @@ struct Symbol {
bool hasCallback;
union {
// If isNumeric()
int32_t value;
int32_t (*numCallback)(); // If hasCallback
// For SYM_MACRO
strValue macro;
// For SYM_EQUS
strValue equs;
char const *(*strCallback)(); // If hasCallback
int32_t value; // If isNumeric()
int32_t (*numCallback)(); // If isNumeric() and hasCallback
std::string_view *macro; // For SYM_MACRO
std::string *equs; // For SYM_EQUS
char const *(*strCallback)(); // For SYM_EQUS if hasCallback
};
uint32_t ID; // ID of the symbol in the object file (-1 if none)
@@ -71,7 +63,7 @@ struct Symbol {
int32_t getValue() const;
uint32_t getConstantValue() const;
char const *getStringValue() const { return hasCallback ? strCallback() : equs.value; }
char const *getStringValue() const { return hasCallback ? strCallback() : equs->c_str(); }
};
void sym_ForEach(void (*func)(Symbol *));
@@ -94,7 +86,7 @@ Symbol *sym_FindScopedSymbol(char const *symName);
// Find a scoped symbol by name; do not return `@` or `_NARG` when they have no value
Symbol *sym_FindScopedValidSymbol(char const *symName);
Symbol const *sym_GetPC();
Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char *body, size_t size);
Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char const *body, size_t size);
Symbol *sym_Ref(char const *symName);
Symbol *sym_AddString(char const *symName, char const *value);
Symbol *sym_RedefString(char const *symName, char const *value);

View File

@@ -14,11 +14,6 @@
# include <strings.h>
#endif
// MSVC has deprecated strdup in favor of _strdup
#ifdef _MSC_VER
# define strdup _strdup
#endif
// MSVC prefixes the names of S_* macros with underscores,
// and doesn't define any S_IS* macros; define them ourselves
#ifdef _MSC_VER

View File

@@ -396,14 +396,14 @@ void fstk_RunMacro(char const *macroName, MacroArgs *args)
Context &context = newContext(fileInfo);
lexer_OpenFileView(context.lexerState, "MACRO", macro->macro.value, macro->macro.size,
lexer_OpenFileView(context.lexerState, "MACRO", macro->macro->data(), macro->macro->size(),
macro->fileLine);
lexer_SetStateAtEOL(&context.lexerState);
context.uniqueID = macro_UseNewUniqueID();
macro_UseNewArgs(args);
}
static bool newReptContext(int32_t reptLineNo, char *body, size_t size)
static bool newReptContext(int32_t reptLineNo, char const *body, size_t size)
{
uint32_t reptDepth = contextStack.top().fileInfo->type == NODE_REPT
? contextStack.top().fileInfo->iters().size()
@@ -432,7 +432,7 @@ static bool newReptContext(int32_t reptLineNo, char *body, size_t size)
return true;
}
void fstk_RunRept(uint32_t count, int32_t reptLineNo, char *body, size_t size)
void fstk_RunRept(uint32_t count, int32_t reptLineNo, char const *body, size_t size)
{
if (count == 0)
return;
@@ -443,7 +443,7 @@ void fstk_RunRept(uint32_t count, int32_t reptLineNo, char *body, size_t size)
}
void fstk_RunFor(char const *symName, int32_t start, int32_t stop, int32_t step,
int32_t reptLineNo, char *body, size_t size)
int32_t reptLineNo, char const *body, size_t size)
{
Symbol *sym = sym_AddVar(symName, start);

View File

@@ -389,9 +389,9 @@ bool lexer_OpenFile(LexerState &state, char const *path)
if (!isStdin && fileInfo.st_size > 0) {
// Try using `mmap` for better performance
// Important: do NOT assign to `state.mmap.ptr` directly, to avoid a cast that may
// alter an eventual `MAP_FAILED` value. It would also invalidate `state.cbuf.fd`,
// being on the other side of the union.
// Important: do NOT assign to `state.mmap.ptr.referenced` directly, to avoid a
// cast that may alter an eventual `MAP_FAILED` value. It would also invalidate
// `state.cbuf.fd`, being on the other side of the union.
void *mappingAddr;
mapFile(mappingAddr, state.cbuf.fd, state.path, fileInfo.st_size);
@@ -404,7 +404,7 @@ bool lexer_OpenFile(LexerState &state, char const *path)
state.isMmapped = true;
state.mmap.isReferenced = false; // By default, a state isn't referenced
state.mmap.ptr = (char *)mappingAddr;
state.mmap.ptr.referenced = (char *)mappingAddr;
assert(fileInfo.st_size >= 0);
state.mmap.size = (size_t)fileInfo.st_size;
state.mmap.offset = 0;
@@ -433,12 +433,13 @@ bool lexer_OpenFile(LexerState &state, char const *path)
return true;
}
void lexer_OpenFileView(LexerState &state, char const *path, char *buf, size_t size, uint32_t lineNo)
void lexer_OpenFileView(LexerState &state, char const *path, char const *buf, size_t size,
uint32_t lineNo)
{
state.path = path; // Used to report read errors in `peekInternal`
state.isFile = false;
state.isMmapped = true; // It's not *really* mmap()ed, but it behaves the same
state.mmap.ptr = buf;
state.mmap.ptr.unreferenced = buf;
state.mmap.size = size;
state.mmap.offset = 0;
@@ -471,7 +472,7 @@ void lexer_CleanupState(LexerState &state)
if (!state.isMmapped)
close(state.cbuf.fd);
else if (state.isFile && !state.mmap.isReferenced)
munmap(state.mmap.ptr, state.mmap.size);
munmap(state.mmap.ptr.referenced, state.mmap.size);
}
void lexer_SetMode(enum LexerMode mode)
@@ -669,10 +670,10 @@ static int peekInternal(uint8_t distance)
PRIu8 " >= %u)\n", distance, LEXER_BUF_SIZE);
if (lexerState->isMmapped) {
if (lexerState->mmap.offset + distance >= lexerState->mmap.size)
return EOF;
size_t index = lexerState->mmap.offset + distance;
return (unsigned char)lexerState->mmap.ptr[lexerState->mmap.offset + distance];
return index < lexerState->mmap.size ?
(uint8_t)lexerState->mmap.ptr.unreferenced[index] : EOF;
}
if (lexerState->cbuf.nbChars <= distance) {
@@ -2290,7 +2291,7 @@ static void startCapture(CaptureBody *capture)
capture->lineNo = lexer_GetLineNo();
if (lexerState->isMmapped && lexerState->expansions.empty()) {
capture->body = &lexerState->mmap.ptr[lexerState->mmap.offset];
capture->body = &lexerState->mmap.ptr.unreferenced[lexerState->mmap.offset];
} else {
lexerState->captureCapacity = 128; // The initial size will be twice that
assert(lexerState->captureBuf == nullptr);

View File

@@ -1,7 +1,5 @@
/* SPDX-License-Identifier: MIT */
// Symboltable and macroargs stuff
#include <assert.h>
#include <errno.h>
#include <inttypes.h>
@@ -11,6 +9,7 @@
#include <stdio.h>
#include <string>
#include <string.h>
#include <string_view>
#include <time.h>
#include "asm/fixpoint.hpp"
@@ -25,7 +24,6 @@
#include "error.hpp"
#include "helpers.hpp"
#include "platform.hpp" // strdup
#include "version.hpp"
std::map<std::string, Symbol> symbols;
@@ -141,14 +139,10 @@ static void fullSymbolName(char *output, size_t outputSize,
static void assignStringSymbol(Symbol *sym, char const *value)
{
char *string = strdup(value);
if (!string)
fatalerror("No memory for string equate: %s\n", strerror(errno));
sym->type = SYM_EQUS;
sym->equs.value = string;
sym->equs.size = strlen(string);
sym->equs = new(std::nothrow) std::string(value);
if (!sym->equs)
fatalerror("No memory for string equate: %s\n", strerror(errno));
}
Symbol *sym_FindExactSymbol(char const *symName)
@@ -215,8 +209,8 @@ void sym_Purge(std::string const &symName)
if (sym->name == labelScope)
sym_SetCurrentSymbolScope(nullptr);
// FIXME: this leaks sym->equs.value for SYM_EQUS and sym->macro.value for SYM_MACRO,
// but this can't free either of them because the expansion may be purging itself.
// FIXME: this leaks `sym->equs` for SYM_EQUS and `sym->macro` for SYM_MACRO, but
// this can't delete either of them because the expansion may be purging itself.
symbols.erase(sym->name);
// TODO: ideally, also unref the file stack nodes
}
@@ -379,8 +373,8 @@ Symbol *sym_RedefString(char const *symName, char const *value)
}
updateSymbolFilename(sym);
// FIXME: this leaks the previous `sym->equs.value`, but this can't `free(sym->equs.value)`
// because the expansion may be redefining itself.
// FIXME: this leaks the previous `sym->equs`, but this can't delete it because the
// expansion may be redefining itself.
assignStringSymbol(sym, value);
return sym;
@@ -544,7 +538,7 @@ void sym_Export(char const *symName)
}
// Add a macro definition
Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char *body, size_t size)
Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char const *body, size_t size)
{
Symbol *sym = createNonrelocSymbol(symName, false);
@@ -552,8 +546,10 @@ Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char *body, size_t
return nullptr;
sym->type = SYM_MACRO;
sym->macro.size = size;
sym->macro.value = body;
sym->macro = new(std::nothrow) std::string_view(body, size);
if (!sym->macro)
fatalerror("No memory for macro: %s\n", strerror(errno));
setSymbolFilename(sym); // TODO: is this really necessary?
// The symbol is created at the line after the `endm`,
// override this with the actual definition line