Use content-specific destructors for lexer state

Also rename `LexerState` content structs from `*LexerState` to `*Content`
This commit is contained in:
Rangi42
2024-03-26 11:55:30 -04:00
committed by Sylvie
parent 8b6ae994b1
commit a167d23d01
2 changed files with 46 additions and 51 deletions

View File

@@ -41,24 +41,32 @@ struct IfStackEntry {
bool reachedElseBlock; // Whether an ELSE block ran already bool reachedElseBlock; // Whether an ELSE block ran already
}; };
struct MmappedLexerState { struct BufferedContent {
int fd;
size_t index = 0; // Read index into the buffer
char buf[LEXER_BUF_SIZE] = {}; // Circular buffer
size_t nbChars = 0; // Number of "fresh" chars in the buffer
BufferedContent(int fd_) : fd(fd_) {}
~BufferedContent();
};
struct MmappedContent {
char *ptr; char *ptr;
size_t size; size_t size;
size_t offset; size_t offset = 0;
bool isReferenced; // If a macro in this file requires not unmapping it bool isReferenced = false; // If a macro in this file requires not unmapping it
MmappedContent(char *ptr_, size_t size_) : ptr(ptr_), size(size_) {}
~MmappedContent();
}; };
struct ViewedLexerState { struct ViewedContent {
char const *ptr; char const *ptr;
size_t size; size_t size;
size_t offset; size_t offset = 0;
};
struct BufferedLexerState { ViewedContent(char const *ptr_, size_t size_) : ptr(ptr_), size(size_) {}
int fd;
size_t index; // Read index into the buffer
char buf[LEXER_BUF_SIZE]; // Circular buffer
size_t nbChars; // Number of "fresh" chars in the buffer
}; };
struct LexerState { struct LexerState {
@@ -82,19 +90,10 @@ struct LexerState {
bool expandStrings; bool expandStrings;
std::deque<Expansion> expansions; // Front is the innermost current expansion std::deque<Expansion> expansions; // Front is the innermost current expansion
std::variant<std::monostate, MmappedLexerState, ViewedLexerState, BufferedLexerState> content; std::variant<std::monostate, MmappedContent, ViewedContent, BufferedContent> content;
LexerState() = default;
LexerState(LexerState &&) = default;
LexerState(LexerState const &) = delete;
// This destructor unmaps or closes the content file if applicable.
// As such, lexer states should not be copyable.
~LexerState(); ~LexerState();
LexerState &operator=(LexerState &&) = default;
LexerState &operator=(LexerState const &) = delete;
void setAsCurrentState(); void setAsCurrentState();
bool setFileAsNextState(std::string const &filePath, bool updateStateNow); bool setFileAsNextState(std::string const &filePath, bool updateStateNow);
void setViewAsNextState(char const *name, char const *buf, size_t size, uint32_t lineNo_); void setViewAsNextState(char const *name, char const *buf, size_t size, uint32_t lineNo_);

View File

@@ -1,7 +1,6 @@
/* SPDX-License-Identifier: MIT */ /* SPDX-License-Identifier: MIT */
#include "asm/lexer.hpp" #include "asm/lexer.hpp"
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/types.h> #include <sys/types.h>
@@ -388,7 +387,7 @@ void LexerState::setAsCurrentState() {
bool LexerState::setFileAsNextState(std::string const &filePath, bool updateStateNow) { bool LexerState::setFileAsNextState(std::string const &filePath, bool updateStateNow) {
if (filePath == "-") { if (filePath == "-") {
path = "<stdin>"; path = "<stdin>";
content = BufferedLexerState{.fd = STDIN_FILENO, .index = 0, .buf = {}, .nbChars = 0}; content.emplace<BufferedContent>(STDIN_FILENO);
if (verbose) if (verbose)
printf("Opening stdin\n"); printf("Opening stdin\n");
} else { } else {
@@ -414,12 +413,7 @@ bool LexerState::setFileAsNextState(std::string const &filePath, bool updateStat
if (mappingAddr != MAP_FAILED) { if (mappingAddr != MAP_FAILED) {
close(fd); close(fd);
content = MmappedLexerState{ content.emplace<MmappedContent>((char *)mappingAddr, (size_t)statBuf.st_size);
.ptr = (char *)mappingAddr,
.size = (size_t)statBuf.st_size,
.offset = 0,
.isReferenced = false,
};
if (verbose) if (verbose)
printf("File \"%s\" is mmap()ped\n", path.c_str()); printf("File \"%s\" is mmap()ped\n", path.c_str());
isMmapped = true; isMmapped = true;
@@ -428,7 +422,7 @@ bool LexerState::setFileAsNextState(std::string const &filePath, bool updateStat
if (!isMmapped) { if (!isMmapped) {
// Sometimes mmap() fails or isn't available, so have a fallback // Sometimes mmap() fails or isn't available, so have a fallback
content = BufferedLexerState{.fd = fd, .index = 0, .buf = {}, .nbChars = 0}; content.emplace<BufferedContent>(fd);
if (verbose) { if (verbose) {
if (statBuf.st_size == 0) { if (statBuf.st_size == 0) {
printf("File \"%s\" is empty\n", path.c_str()); printf("File \"%s\" is empty\n", path.c_str());
@@ -453,15 +447,15 @@ void LexerState::setViewAsNextState(
char const *name, char const *buf, size_t size, uint32_t lineNo_ char const *name, char const *buf, size_t size, uint32_t lineNo_
) { ) {
path = name; // Used to report read errors in `peekInternal` path = name; // Used to report read errors in `peekInternal`
content = ViewedLexerState{.ptr = buf, .size = size, .offset = 0}; content.emplace<ViewedContent>(buf, size);
clear(lineNo_); clear(lineNo_);
lexerStateEOL = this; lexerStateEOL = this;
} }
void lexer_RestartRept(uint32_t lineNo) { void lexer_RestartRept(uint32_t lineNo) {
if (auto *mmap = std::get_if<MmappedLexerState>(&lexerState->content); mmap) { if (auto *mmap = std::get_if<MmappedContent>(&lexerState->content); mmap) {
mmap->offset = 0; mmap->offset = 0;
} else if (auto *view = std::get_if<ViewedLexerState>(&lexerState->content); view) { } else if (auto *view = std::get_if<ViewedContent>(&lexerState->content); view) {
view->offset = 0; view->offset = 0;
} }
lexerState->clear(lineNo); lexerState->clear(lineNo);
@@ -480,13 +474,15 @@ LexerState::~LexerState() {
// It could be argued that deleting a state that's scheduled for EOF could simply clear // It could be argued that deleting a state that's scheduled for EOF could simply clear
// `lexerStateEOL`, but there's currently no situation in which this should happen. // `lexerStateEOL`, but there's currently no situation in which this should happen.
assert(this != lexerStateEOL); assert(this != lexerStateEOL);
}
if (auto *mmap = std::get_if<MmappedLexerState>(&content); mmap) { BufferedContent::~BufferedContent() {
if (!mmap->isReferenced) close(fd);
munmap(mmap->ptr, mmap->size); }
} else if (auto *cbuf = std::get_if<BufferedLexerState>(&content); cbuf) {
close(cbuf->fd); MmappedContent::~MmappedContent() {
} if (!isReferenced)
munmap(ptr, size);
} }
void lexer_SetMode(LexerMode mode) { void lexer_SetMode(LexerMode mode) {
@@ -635,7 +631,7 @@ static std::shared_ptr<std::string> readMacroArg(char name) {
} }
} }
static size_t readInternal(BufferedLexerState &cbuf, size_t bufIndex, size_t nbChars) { static size_t readInternal(BufferedContent &cbuf, size_t bufIndex, size_t nbChars) {
// This buffer overflow made me lose WEEKS of my life. Never again. // This buffer overflow made me lose WEEKS of my life. Never again.
assert(bufIndex + nbChars <= LEXER_BUF_SIZE); assert(bufIndex + nbChars <= LEXER_BUF_SIZE);
ssize_t nbReadChars = read(cbuf.fd, &cbuf.buf[bufIndex], nbChars); ssize_t nbReadChars = read(cbuf.fd, &cbuf.buf[bufIndex], nbChars);
@@ -665,17 +661,17 @@ static int peekInternal(uint8_t distance) {
LEXER_BUF_SIZE LEXER_BUF_SIZE
); );
if (auto *mmap = std::get_if<MmappedLexerState>(&lexerState->content); mmap) { if (auto *mmap = std::get_if<MmappedContent>(&lexerState->content); mmap) {
if (size_t idx = mmap->offset + distance; idx < mmap->size) if (size_t idx = mmap->offset + distance; idx < mmap->size)
return (uint8_t)mmap->ptr[idx]; return (uint8_t)mmap->ptr[idx];
return EOF; return EOF;
} else if (auto *view = std::get_if<ViewedLexerState>(&lexerState->content); view) { } else if (auto *view = std::get_if<ViewedContent>(&lexerState->content); view) {
if (size_t idx = view->offset + distance; idx < view->size) if (size_t idx = view->offset + distance; idx < view->size)
return (uint8_t)view->ptr[idx]; return (uint8_t)view->ptr[idx];
return EOF; return EOF;
} else { } else {
assert(std::holds_alternative<BufferedLexerState>(lexerState->content)); assert(std::holds_alternative<BufferedContent>(lexerState->content));
auto &cbuf = std::get<BufferedLexerState>(lexerState->content); auto &cbuf = std::get<BufferedContent>(lexerState->content);
if (cbuf.nbChars > distance) if (cbuf.nbChars > distance)
return (uint8_t)cbuf.buf[(cbuf.index + distance) % LEXER_BUF_SIZE]; return (uint8_t)cbuf.buf[(cbuf.index + distance) % LEXER_BUF_SIZE];
@@ -789,13 +785,13 @@ restart:
} else { } else {
// Advance within the file contents // Advance within the file contents
lexerState->colNo++; lexerState->colNo++;
if (auto *mmap = std::get_if<MmappedLexerState>(&lexerState->content); mmap) { if (auto *mmap = std::get_if<MmappedContent>(&lexerState->content); mmap) {
mmap->offset++; mmap->offset++;
} else if (auto *view = std::get_if<ViewedLexerState>(&lexerState->content); view) { } else if (auto *view = std::get_if<ViewedContent>(&lexerState->content); view) {
view->offset++; view->offset++;
} else { } else {
assert(std::holds_alternative<BufferedLexerState>(lexerState->content)); assert(std::holds_alternative<BufferedContent>(lexerState->content));
auto &cbuf = std::get<BufferedLexerState>(lexerState->content); auto &cbuf = std::get<BufferedContent>(lexerState->content);
assert(cbuf.index < LEXER_BUF_SIZE); assert(cbuf.index < LEXER_BUF_SIZE);
cbuf.index++; cbuf.index++;
if (cbuf.index == LEXER_BUF_SIZE) if (cbuf.index == LEXER_BUF_SIZE)
@@ -2205,10 +2201,10 @@ void CaptureBody::startCapture() {
lexerState->disableInterpolation = true; lexerState->disableInterpolation = true;
lineNo = lexer_GetLineNo(); lineNo = lexer_GetLineNo();
if (auto *mmap = std::get_if<MmappedLexerState>(&lexerState->content); if (auto *mmap = std::get_if<MmappedContent>(&lexerState->content);
mmap && lexerState->expansions.empty()) { mmap && lexerState->expansions.empty()) {
body = &mmap->ptr[mmap->offset]; body = &mmap->ptr[mmap->offset];
} else if (auto *view = std::get_if<ViewedLexerState>(&lexerState->content); } else if (auto *view = std::get_if<ViewedContent>(&lexerState->content);
view && lexerState->expansions.empty()) { view && lexerState->expansions.empty()) {
body = &view->ptr[view->offset]; body = &view->ptr[view->offset];
} else { } else {
@@ -2299,7 +2295,7 @@ CaptureBody lexer_CaptureMacroBody() {
capture.startCapture(); capture.startCapture();
// If the file is `mmap`ed, we need not to unmap it to keep access to the macro // If the file is `mmap`ed, we need not to unmap it to keep access to the macro
if (auto *mmap = std::get_if<MmappedLexerState>(&lexerState->content); mmap) if (auto *mmap = std::get_if<MmappedContent>(&lexerState->content); mmap)
mmap->isReferenced = true; mmap->isReferenced = true;
int c = EOF; int c = EOF;