mirror of
https://github.com/gbdev/rgbds.git
synced 2025-11-20 10:12:06 +00:00
Use std::variant for lexer mmap/buffer state (#1328)
This commit is contained in:
@@ -6,6 +6,7 @@
|
|||||||
#include <deque>
|
#include <deque>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <variant>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "platform.hpp" // SSIZE_MAX
|
#include "platform.hpp" // SSIZE_MAX
|
||||||
@@ -44,15 +45,18 @@ struct IfStackEntry {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct MmappedLexerState {
|
struct MmappedLexerState {
|
||||||
union {
|
char *ptr;
|
||||||
char const *unreferenced;
|
|
||||||
char *referenced; // Non-`const` only so it can be `munmap()`ped
|
|
||||||
} ptr;
|
|
||||||
size_t size;
|
size_t size;
|
||||||
size_t offset;
|
size_t offset;
|
||||||
bool isReferenced; // If a macro in this file requires not unmapping it
|
bool isReferenced; // If a macro in this file requires not unmapping it
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ViewedLexerState {
|
||||||
|
char const *ptr;
|
||||||
|
size_t size;
|
||||||
|
size_t offset;
|
||||||
|
};
|
||||||
|
|
||||||
struct BufferedLexerState {
|
struct BufferedLexerState {
|
||||||
int fd;
|
int fd;
|
||||||
size_t index; // Read index into the buffer
|
size_t index; // Read index into the buffer
|
||||||
@@ -63,16 +67,6 @@ struct BufferedLexerState {
|
|||||||
struct LexerState {
|
struct LexerState {
|
||||||
char const *path;
|
char const *path;
|
||||||
|
|
||||||
// mmap()-dependent IO state
|
|
||||||
bool isMmapped;
|
|
||||||
union {
|
|
||||||
MmappedLexerState mmap; // If mmap()ed
|
|
||||||
BufferedLexerState cbuf; // Otherwise
|
|
||||||
};
|
|
||||||
|
|
||||||
// Common state
|
|
||||||
bool isFile;
|
|
||||||
|
|
||||||
enum LexerMode mode;
|
enum LexerMode mode;
|
||||||
bool atLineStart;
|
bool atLineStart;
|
||||||
uint32_t lineNo;
|
uint32_t lineNo;
|
||||||
@@ -90,6 +84,13 @@ struct LexerState {
|
|||||||
size_t macroArgScanDistance; // Max distance already scanned for macro args
|
size_t macroArgScanDistance; // Max distance already scanned for macro args
|
||||||
bool expandStrings;
|
bool expandStrings;
|
||||||
std::deque<Expansion> expansions; // Front is the innermost current expansion
|
std::deque<Expansion> expansions; // Front is the innermost current expansion
|
||||||
|
|
||||||
|
std::variant<
|
||||||
|
std::monostate,
|
||||||
|
MmappedLexerState,
|
||||||
|
ViewedLexerState,
|
||||||
|
BufferedLexerState
|
||||||
|
> content;
|
||||||
};
|
};
|
||||||
|
|
||||||
extern LexerState *lexerState;
|
extern LexerState *lexerState;
|
||||||
|
|||||||
@@ -17,6 +17,7 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
#include <variant>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#ifndef _MSC_VER
|
#ifndef _MSC_VER
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
@@ -369,64 +370,68 @@ void lexer_ReachELSEBlock()
|
|||||||
|
|
||||||
bool lexer_OpenFile(LexerState &state, char const *path)
|
bool lexer_OpenFile(LexerState &state, char const *path)
|
||||||
{
|
{
|
||||||
bool isStdin = !strcmp(path, "-");
|
if (!strcmp(path, "-")) {
|
||||||
|
state.path = "<stdin>";
|
||||||
|
state.content = BufferedLexerState{
|
||||||
|
.fd = STDIN_FILENO,
|
||||||
|
.index = 0,
|
||||||
|
.buf = {},
|
||||||
|
.nbChars = 0
|
||||||
|
};
|
||||||
|
if (verbose)
|
||||||
|
printf("Opening stdin\n");
|
||||||
|
} else {
|
||||||
struct stat fileInfo;
|
struct stat fileInfo;
|
||||||
|
if (stat(path, &fileInfo) != 0) {
|
||||||
// Give stdin a nicer file name
|
|
||||||
if (isStdin)
|
|
||||||
path = "<stdin>";
|
|
||||||
if (!isStdin && stat(path, &fileInfo) != 0) {
|
|
||||||
error("Failed to stat file \"%s\": %s\n", path, strerror(errno));
|
error("Failed to stat file \"%s\": %s\n", path, strerror(errno));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
state.path = path;
|
state.path = path;
|
||||||
state.isFile = true;
|
|
||||||
state.cbuf.fd = isStdin ? STDIN_FILENO : open(path, O_RDONLY);
|
int fd = open(path, O_RDONLY);
|
||||||
if (state.cbuf.fd < 0) {
|
if (fd < 0) {
|
||||||
error("Failed to open file \"%s\": %s\n", path, strerror(errno));
|
error("Failed to open file \"%s\": %s\n", path, strerror(errno));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
state.isMmapped = false; // By default, assume it won't be mmap()ed
|
|
||||||
if (!isStdin && fileInfo.st_size > 0) {
|
bool isMmapped = false;
|
||||||
|
|
||||||
|
if (fileInfo.st_size > 0) {
|
||||||
// Try using `mmap` for better performance
|
// Try using `mmap` for better performance
|
||||||
|
|
||||||
// Important: do NOT assign to `state.mmap.ptr.referenced` directly, to avoid a
|
|
||||||
// cast that may alter an eventual `MAP_FAILED` value. It would also invalidate
|
|
||||||
// `state.cbuf.fd`, being on the other side of the union.
|
|
||||||
void *mappingAddr;
|
void *mappingAddr;
|
||||||
|
mapFile(mappingAddr, fd, state.path, fileInfo.st_size);
|
||||||
|
|
||||||
mapFile(mappingAddr, state.cbuf.fd, state.path, fileInfo.st_size);
|
if (mappingAddr != MAP_FAILED) {
|
||||||
if (mappingAddr == MAP_FAILED) {
|
close(fd);
|
||||||
// If mmap()ing failed, try again using another method (below)
|
state.content = MmappedLexerState{
|
||||||
state.isMmapped = false;
|
.ptr = (char *)mappingAddr,
|
||||||
} else {
|
.size = (size_t)fileInfo.st_size,
|
||||||
// IMPORTANT: the `union` mandates this is accessed before other members!
|
.offset = 0,
|
||||||
close(state.cbuf.fd);
|
.isReferenced = false
|
||||||
|
};
|
||||||
state.isMmapped = true;
|
|
||||||
state.mmap.isReferenced = false; // By default, a state isn't referenced
|
|
||||||
state.mmap.ptr.referenced = (char *)mappingAddr;
|
|
||||||
assert(fileInfo.st_size >= 0);
|
|
||||||
state.mmap.size = (size_t)fileInfo.st_size;
|
|
||||||
state.mmap.offset = 0;
|
|
||||||
|
|
||||||
if (verbose)
|
if (verbose)
|
||||||
printf("File %s successfully mmap()ped\n", path);
|
printf("File \"%s\" is mmap()ped\n", path);
|
||||||
|
isMmapped = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!state.isMmapped) {
|
|
||||||
|
if (!isMmapped) {
|
||||||
// Sometimes mmap() fails or isn't available, so have a fallback
|
// Sometimes mmap() fails or isn't available, so have a fallback
|
||||||
|
state.content = BufferedLexerState{
|
||||||
|
.fd = fd,
|
||||||
|
.index = 0,
|
||||||
|
.buf = {},
|
||||||
|
.nbChars = 0
|
||||||
|
};
|
||||||
if (verbose) {
|
if (verbose) {
|
||||||
if (isStdin)
|
if (fileInfo.st_size == 0) {
|
||||||
printf("Opening stdin\n");
|
printf("File \"%s\" is empty\n", path);
|
||||||
else if (fileInfo.st_size == 0)
|
} else {
|
||||||
printf("File %s is empty\n", path);
|
printf("File \"%s\" is opened; errno reports: %s\n",
|
||||||
else
|
|
||||||
printf("File %s opened as regular, errno reports \"%s\"\n",
|
|
||||||
path, strerror(errno));
|
path, strerror(errno));
|
||||||
}
|
}
|
||||||
state.cbuf.index = 0;
|
}
|
||||||
state.cbuf.nbChars = 0;
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
initState(state);
|
initState(state);
|
||||||
@@ -438,19 +443,22 @@ void lexer_OpenFileView(LexerState &state, char const *path, char const *buf, si
|
|||||||
uint32_t lineNo)
|
uint32_t lineNo)
|
||||||
{
|
{
|
||||||
state.path = path; // Used to report read errors in `peekInternal`
|
state.path = path; // Used to report read errors in `peekInternal`
|
||||||
state.isFile = false;
|
state.content = ViewedLexerState{
|
||||||
state.isMmapped = true; // It's not *really* mmap()ed, but it behaves the same
|
.ptr = buf,
|
||||||
state.mmap.ptr.unreferenced = buf;
|
.size = size,
|
||||||
state.mmap.size = size;
|
.offset = 0
|
||||||
state.mmap.offset = 0;
|
};
|
||||||
|
|
||||||
initState(state);
|
initState(state);
|
||||||
state.lineNo = lineNo; // Will be incremented at first line start
|
state.lineNo = lineNo; // Will be incremented at first line start
|
||||||
}
|
}
|
||||||
|
|
||||||
void lexer_RestartRept(uint32_t lineNo)
|
void lexer_RestartRept(uint32_t lineNo)
|
||||||
{
|
{
|
||||||
lexerState->mmap.offset = 0;
|
std::visit(Visitor{
|
||||||
|
[](MmappedLexerState &mmap) { mmap.offset = 0; },
|
||||||
|
[](ViewedLexerState &view) { view.offset = 0; },
|
||||||
|
[](auto &) {},
|
||||||
|
}, lexerState->content);
|
||||||
initState(*lexerState);
|
initState(*lexerState);
|
||||||
lexerState->lineNo = lineNo;
|
lexerState->lineNo = lineNo;
|
||||||
}
|
}
|
||||||
@@ -470,10 +478,16 @@ void lexer_CleanupState(LexerState &state)
|
|||||||
// `lexerStateEOL`, but there's currently no situation in which this should happen.
|
// `lexerStateEOL`, but there's currently no situation in which this should happen.
|
||||||
assert(&state != lexerStateEOL);
|
assert(&state != lexerStateEOL);
|
||||||
|
|
||||||
if (!state.isMmapped)
|
std::visit(Visitor{
|
||||||
close(state.cbuf.fd);
|
[](MmappedLexerState &mmap) {
|
||||||
else if (state.isFile && !state.mmap.isReferenced)
|
if (!mmap.isReferenced)
|
||||||
munmap(state.mmap.ptr.referenced, state.mmap.size);
|
munmap(mmap.ptr, mmap.size);
|
||||||
|
},
|
||||||
|
[](BufferedLexerState &cbuf) {
|
||||||
|
close(cbuf.fd);
|
||||||
|
},
|
||||||
|
[](auto &) {},
|
||||||
|
}, state.content);
|
||||||
}
|
}
|
||||||
|
|
||||||
void lexer_SetMode(enum LexerMode mode)
|
void lexer_SetMode(enum LexerMode mode)
|
||||||
@@ -628,11 +642,11 @@ static char const *readMacroArg(char name)
|
|||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t readInternal(size_t bufIndex, size_t nbChars)
|
static size_t readInternal(BufferedLexerState &cbuf, size_t bufIndex, size_t nbChars)
|
||||||
{
|
{
|
||||||
// This buffer overflow made me lose WEEKS of my life. Never again.
|
// This buffer overflow made me lose WEEKS of my life. Never again.
|
||||||
assert(bufIndex + nbChars <= LEXER_BUF_SIZE);
|
assert(bufIndex + nbChars <= LEXER_BUF_SIZE);
|
||||||
ssize_t nbReadChars = read(lexerState->cbuf.fd, &lexerState->cbuf.buf[bufIndex], nbChars);
|
ssize_t nbReadChars = read(cbuf.fd, &cbuf.buf[bufIndex], nbChars);
|
||||||
|
|
||||||
if (nbReadChars == -1)
|
if (nbReadChars == -1)
|
||||||
fatalerror("Error while reading \"%s\": %s\n", lexerState->path, strerror(errno));
|
fatalerror("Error while reading \"%s\": %s\n", lexerState->path, strerror(errno));
|
||||||
@@ -657,26 +671,33 @@ static int peekInternal(uint8_t distance)
|
|||||||
fatalerror("Internal lexer error: buffer has insufficient size for peeking (%"
|
fatalerror("Internal lexer error: buffer has insufficient size for peeking (%"
|
||||||
PRIu8 " >= %u)\n", distance, LEXER_BUF_SIZE);
|
PRIu8 " >= %u)\n", distance, LEXER_BUF_SIZE);
|
||||||
|
|
||||||
if (lexerState->isMmapped) {
|
return std::visit(Visitor{
|
||||||
size_t index = lexerState->mmap.offset + distance;
|
[&distance](MmappedLexerState &mmap) -> int {
|
||||||
|
if (size_t idx = mmap.offset + distance; idx < mmap.size)
|
||||||
|
return (uint8_t)mmap.ptr[idx];
|
||||||
|
return EOF;
|
||||||
|
},
|
||||||
|
[&distance](ViewedLexerState &view) -> int {
|
||||||
|
if (size_t idx = view.offset + distance; idx < view.size)
|
||||||
|
return (uint8_t)view.ptr[idx];
|
||||||
|
return EOF;
|
||||||
|
},
|
||||||
|
[&distance](BufferedLexerState &cbuf) -> int {
|
||||||
|
if (cbuf.nbChars > distance)
|
||||||
|
return (uint8_t)cbuf.buf[(cbuf.index + distance) % LEXER_BUF_SIZE];
|
||||||
|
|
||||||
return index < lexerState->mmap.size ?
|
|
||||||
(uint8_t)lexerState->mmap.ptr.unreferenced[index] : EOF;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (lexerState->cbuf.nbChars <= distance) {
|
|
||||||
// Buffer isn't full enough, read some chars in
|
// Buffer isn't full enough, read some chars in
|
||||||
size_t target = LEXER_BUF_SIZE - lexerState->cbuf.nbChars; // Aim: making the buf full
|
size_t target = LEXER_BUF_SIZE - cbuf.nbChars; // Aim: making the buf full
|
||||||
|
|
||||||
// Compute the index we'll start writing to
|
// Compute the index we'll start writing to
|
||||||
size_t writeIndex = (lexerState->cbuf.index + lexerState->cbuf.nbChars) % LEXER_BUF_SIZE;
|
size_t writeIndex = (cbuf.index + cbuf.nbChars) % LEXER_BUF_SIZE;
|
||||||
|
|
||||||
// If the range to fill passes over the buffer wrapping point, we need two reads
|
// If the range to fill passes over the buffer wrapping point, we need two reads
|
||||||
if (writeIndex + target > LEXER_BUF_SIZE) {
|
if (writeIndex + target > LEXER_BUF_SIZE) {
|
||||||
size_t nbExpectedChars = LEXER_BUF_SIZE - writeIndex;
|
size_t nbExpectedChars = LEXER_BUF_SIZE - writeIndex;
|
||||||
size_t nbReadChars = readInternal(writeIndex, nbExpectedChars);
|
size_t nbReadChars = readInternal(cbuf, writeIndex, nbExpectedChars);
|
||||||
|
|
||||||
lexerState->cbuf.nbChars += nbReadChars;
|
cbuf.nbChars += nbReadChars;
|
||||||
|
|
||||||
writeIndex += nbReadChars;
|
writeIndex += nbReadChars;
|
||||||
if (writeIndex == LEXER_BUF_SIZE)
|
if (writeIndex == LEXER_BUF_SIZE)
|
||||||
@@ -688,14 +709,18 @@ static int peekInternal(uint8_t distance)
|
|||||||
target = 0;
|
target = 0;
|
||||||
}
|
}
|
||||||
if (target != 0)
|
if (target != 0)
|
||||||
lexerState->cbuf.nbChars += readInternal(writeIndex, target);
|
cbuf.nbChars += readInternal(cbuf, writeIndex, target);
|
||||||
|
|
||||||
|
if (cbuf.nbChars > distance)
|
||||||
|
return (uint8_t)cbuf.buf[(cbuf.index + distance) % LEXER_BUF_SIZE];
|
||||||
|
|
||||||
// If there aren't enough chars even after refilling, give up
|
// If there aren't enough chars even after refilling, give up
|
||||||
if (lexerState->cbuf.nbChars <= distance)
|
return EOF;
|
||||||
|
},
|
||||||
|
[](std::monostate) -> int {
|
||||||
return EOF;
|
return EOF;
|
||||||
}
|
}
|
||||||
|
}, lexerState->content);
|
||||||
return (unsigned char)lexerState->cbuf.buf[(lexerState->cbuf.index + distance) % LEXER_BUF_SIZE];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// forward declarations for peek
|
// forward declarations for peek
|
||||||
@@ -775,16 +800,23 @@ restart:
|
|||||||
} else {
|
} else {
|
||||||
// Advance within the file contents
|
// Advance within the file contents
|
||||||
lexerState->colNo++;
|
lexerState->colNo++;
|
||||||
if (lexerState->isMmapped) {
|
std::visit(Visitor{
|
||||||
lexerState->mmap.offset++;
|
[](MmappedLexerState &mmap) {
|
||||||
} else {
|
mmap.offset++;
|
||||||
assert(lexerState->cbuf.index < LEXER_BUF_SIZE);
|
},
|
||||||
lexerState->cbuf.index++;
|
[](ViewedLexerState &view) {
|
||||||
if (lexerState->cbuf.index == LEXER_BUF_SIZE)
|
view.offset++;
|
||||||
lexerState->cbuf.index = 0; // Wrap around if necessary
|
},
|
||||||
assert(lexerState->cbuf.nbChars > 0);
|
[](BufferedLexerState &cbuf) {
|
||||||
lexerState->cbuf.nbChars--;
|
assert(cbuf.index < LEXER_BUF_SIZE);
|
||||||
}
|
cbuf.index++;
|
||||||
|
if (cbuf.index == LEXER_BUF_SIZE)
|
||||||
|
cbuf.index = 0; // Wrap around if necessary
|
||||||
|
assert(cbuf.nbChars > 0);
|
||||||
|
cbuf.nbChars--;
|
||||||
|
},
|
||||||
|
[](std::monostate) {}
|
||||||
|
}, lexerState->content);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2273,15 +2305,24 @@ static void startCapture(CaptureBody &capture)
|
|||||||
lexerState->disableInterpolation = true;
|
lexerState->disableInterpolation = true;
|
||||||
|
|
||||||
capture.lineNo = lexer_GetLineNo();
|
capture.lineNo = lexer_GetLineNo();
|
||||||
|
capture.body = std::visit(Visitor{
|
||||||
|
[](MmappedLexerState &mmap) -> char const * {
|
||||||
|
return lexerState->expansions.empty() ? &mmap.ptr[mmap.offset] : nullptr;
|
||||||
|
},
|
||||||
|
[](ViewedLexerState &view) -> char const * {
|
||||||
|
return lexerState->expansions.empty() ? &view.ptr[view.offset] : nullptr;
|
||||||
|
},
|
||||||
|
[](auto &) -> char const * {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
}, lexerState->content);
|
||||||
|
|
||||||
if (lexerState->isMmapped && lexerState->expansions.empty()) {
|
if (capture.body == nullptr) {
|
||||||
capture.body = &lexerState->mmap.ptr.unreferenced[lexerState->mmap.offset];
|
// Indicates to retrieve the capture buffer when done capturing
|
||||||
} else {
|
|
||||||
assert(lexerState->captureBuf == nullptr);
|
assert(lexerState->captureBuf == nullptr);
|
||||||
lexerState->captureBuf = new(std::nothrow) std::vector<char>();
|
lexerState->captureBuf = new(std::nothrow) std::vector<char>();
|
||||||
if (!lexerState->captureBuf)
|
if (!lexerState->captureBuf)
|
||||||
fatalerror("Failed to allocate capture buffer: %s\n", strerror(errno));
|
fatalerror("Failed to allocate capture buffer: %s\n", strerror(errno));
|
||||||
capture.body = nullptr; // Indicate to retrieve the capture buffer when done capturing
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2362,8 +2403,8 @@ bool lexer_CaptureMacroBody(CaptureBody &capture)
|
|||||||
startCapture(capture);
|
startCapture(capture);
|
||||||
|
|
||||||
// If the file is `mmap`ed, we need not to unmap it to keep access to the macro
|
// If the file is `mmap`ed, we need not to unmap it to keep access to the macro
|
||||||
if (lexerState->isMmapped)
|
if (MmappedLexerState *mmap = std::get_if<MmappedLexerState>(&lexerState->content); mmap)
|
||||||
lexerState->mmap.isReferenced = true;
|
mmap->isReferenced = true;
|
||||||
|
|
||||||
int c = EOF;
|
int c = EOF;
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user