Use std::string_view for macro bodies (#1326)

This removes the last use of `strdup`

This required making a lot of related pointers be `const`.
That in turn conflicted with the need to `munmap()` a pointer
eventually, which was similar to the need to eventually `free()`
an `Expansion`'s contents, so I used the same solution of a
`union`. That lets us normally use the `const` pointer for
`const` correctness, and the non-`const` one for the not-really-
mutating destruction cases.
This commit is contained in:
Sylvie
2024-03-02 13:21:28 -05:00
committed by GitHub
parent 2069a95e0f
commit a71e4086a2
7 changed files with 47 additions and 59 deletions

View File

@@ -396,14 +396,14 @@ void fstk_RunMacro(char const *macroName, MacroArgs *args)
Context &context = newContext(fileInfo);
lexer_OpenFileView(context.lexerState, "MACRO", macro->macro.value, macro->macro.size,
lexer_OpenFileView(context.lexerState, "MACRO", macro->macro->data(), macro->macro->size(),
macro->fileLine);
lexer_SetStateAtEOL(&context.lexerState);
context.uniqueID = macro_UseNewUniqueID();
macro_UseNewArgs(args);
}
static bool newReptContext(int32_t reptLineNo, char *body, size_t size)
static bool newReptContext(int32_t reptLineNo, char const *body, size_t size)
{
uint32_t reptDepth = contextStack.top().fileInfo->type == NODE_REPT
? contextStack.top().fileInfo->iters().size()
@@ -432,7 +432,7 @@ static bool newReptContext(int32_t reptLineNo, char *body, size_t size)
return true;
}
void fstk_RunRept(uint32_t count, int32_t reptLineNo, char *body, size_t size)
void fstk_RunRept(uint32_t count, int32_t reptLineNo, char const *body, size_t size)
{
if (count == 0)
return;
@@ -443,7 +443,7 @@ void fstk_RunRept(uint32_t count, int32_t reptLineNo, char *body, size_t size)
}
void fstk_RunFor(char const *symName, int32_t start, int32_t stop, int32_t step,
int32_t reptLineNo, char *body, size_t size)
int32_t reptLineNo, char const *body, size_t size)
{
Symbol *sym = sym_AddVar(symName, start);

View File

@@ -389,9 +389,9 @@ bool lexer_OpenFile(LexerState &state, char const *path)
if (!isStdin && fileInfo.st_size > 0) {
// Try using `mmap` for better performance
// Important: do NOT assign to `state.mmap.ptr` directly, to avoid a cast that may
// alter an eventual `MAP_FAILED` value. It would also invalidate `state.cbuf.fd`,
// being on the other side of the union.
// Important: do NOT assign to `state.mmap.ptr.referenced` directly, to avoid a
// cast that may alter an eventual `MAP_FAILED` value. It would also invalidate
// `state.cbuf.fd`, being on the other side of the union.
void *mappingAddr;
mapFile(mappingAddr, state.cbuf.fd, state.path, fileInfo.st_size);
@@ -404,7 +404,7 @@ bool lexer_OpenFile(LexerState &state, char const *path)
state.isMmapped = true;
state.mmap.isReferenced = false; // By default, a state isn't referenced
state.mmap.ptr = (char *)mappingAddr;
state.mmap.ptr.referenced = (char *)mappingAddr;
assert(fileInfo.st_size >= 0);
state.mmap.size = (size_t)fileInfo.st_size;
state.mmap.offset = 0;
@@ -433,12 +433,13 @@ bool lexer_OpenFile(LexerState &state, char const *path)
return true;
}
void lexer_OpenFileView(LexerState &state, char const *path, char *buf, size_t size, uint32_t lineNo)
void lexer_OpenFileView(LexerState &state, char const *path, char const *buf, size_t size,
uint32_t lineNo)
{
state.path = path; // Used to report read errors in `peekInternal`
state.isFile = false;
state.isMmapped = true; // It's not *really* mmap()ed, but it behaves the same
state.mmap.ptr = buf;
state.mmap.ptr.unreferenced = buf;
state.mmap.size = size;
state.mmap.offset = 0;
@@ -471,7 +472,7 @@ void lexer_CleanupState(LexerState &state)
if (!state.isMmapped)
close(state.cbuf.fd);
else if (state.isFile && !state.mmap.isReferenced)
munmap(state.mmap.ptr, state.mmap.size);
munmap(state.mmap.ptr.referenced, state.mmap.size);
}
void lexer_SetMode(enum LexerMode mode)
@@ -669,10 +670,10 @@ static int peekInternal(uint8_t distance)
PRIu8 " >= %u)\n", distance, LEXER_BUF_SIZE);
if (lexerState->isMmapped) {
if (lexerState->mmap.offset + distance >= lexerState->mmap.size)
return EOF;
size_t index = lexerState->mmap.offset + distance;
return (unsigned char)lexerState->mmap.ptr[lexerState->mmap.offset + distance];
return index < lexerState->mmap.size ?
(uint8_t)lexerState->mmap.ptr.unreferenced[index] : EOF;
}
if (lexerState->cbuf.nbChars <= distance) {
@@ -2290,7 +2291,7 @@ static void startCapture(CaptureBody *capture)
capture->lineNo = lexer_GetLineNo();
if (lexerState->isMmapped && lexerState->expansions.empty()) {
capture->body = &lexerState->mmap.ptr[lexerState->mmap.offset];
capture->body = &lexerState->mmap.ptr.unreferenced[lexerState->mmap.offset];
} else {
lexerState->captureCapacity = 128; // The initial size will be twice that
assert(lexerState->captureBuf == nullptr);

View File

@@ -1,7 +1,5 @@
/* SPDX-License-Identifier: MIT */
// Symboltable and macroargs stuff
#include <assert.h>
#include <errno.h>
#include <inttypes.h>
@@ -11,6 +9,7 @@
#include <stdio.h>
#include <string>
#include <string.h>
#include <string_view>
#include <time.h>
#include "asm/fixpoint.hpp"
@@ -25,7 +24,6 @@
#include "error.hpp"
#include "helpers.hpp"
#include "platform.hpp" // strdup
#include "version.hpp"
std::map<std::string, Symbol> symbols;
@@ -141,14 +139,10 @@ static void fullSymbolName(char *output, size_t outputSize,
static void assignStringSymbol(Symbol *sym, char const *value)
{
char *string = strdup(value);
if (!string)
fatalerror("No memory for string equate: %s\n", strerror(errno));
sym->type = SYM_EQUS;
sym->equs.value = string;
sym->equs.size = strlen(string);
sym->equs = new(std::nothrow) std::string(value);
if (!sym->equs)
fatalerror("No memory for string equate: %s\n", strerror(errno));
}
Symbol *sym_FindExactSymbol(char const *symName)
@@ -215,8 +209,8 @@ void sym_Purge(std::string const &symName)
if (sym->name == labelScope)
sym_SetCurrentSymbolScope(nullptr);
// FIXME: this leaks sym->equs.value for SYM_EQUS and sym->macro.value for SYM_MACRO,
// but this can't free either of them because the expansion may be purging itself.
// FIXME: this leaks `sym->equs` for SYM_EQUS and `sym->macro` for SYM_MACRO, but
// this can't delete either of them because the expansion may be purging itself.
symbols.erase(sym->name);
// TODO: ideally, also unref the file stack nodes
}
@@ -379,8 +373,8 @@ Symbol *sym_RedefString(char const *symName, char const *value)
}
updateSymbolFilename(sym);
// FIXME: this leaks the previous `sym->equs.value`, but this can't `free(sym->equs.value)`
// because the expansion may be redefining itself.
// FIXME: this leaks the previous `sym->equs`, but this can't delete it because the
// expansion may be redefining itself.
assignStringSymbol(sym, value);
return sym;
@@ -544,7 +538,7 @@ void sym_Export(char const *symName)
}
// Add a macro definition
Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char *body, size_t size)
Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char const *body, size_t size)
{
Symbol *sym = createNonrelocSymbol(symName, false);
@@ -552,8 +546,10 @@ Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char *body, size_t
return nullptr;
sym->type = SYM_MACRO;
sym->macro.size = size;
sym->macro.value = body;
sym->macro = new(std::nothrow) std::string_view(body, size);
if (!sym->macro)
fatalerror("No memory for macro: %s\n", strerror(errno));
setSymbolFilename(sym); // TODO: is this really necessary?
// The symbol is created at the line after the `endm`,
// override this with the actual definition line