Use automatic allocation for lexer states

Lexer states are now owned by fstack contexts
This commit is contained in:
Rangi42
2024-02-26 13:53:52 -05:00
committed by Sylvie
parent 514044496f
commit 31836967fa
4 changed files with 159 additions and 184 deletions

View File

@@ -3,17 +3,94 @@
#ifndef RGBDS_ASM_LEXER_H
#define RGBDS_ASM_LEXER_H
#include <deque>
#include <stack>
#include "platform.hpp" // SSIZE_MAX
#define MAXSTRLEN 255
struct LexerState;
#define LEXER_BUF_SIZE 42 // TODO: determine a sane value for this
// The buffer needs to be large enough for the maximum `peekInternal` lookahead distance
static_assert(LEXER_BUF_SIZE > 1, "Lexer buffer size is too small");
// This caps the size of buffer reads, and according to POSIX, passing more than SSIZE_MAX is UB
static_assert(LEXER_BUF_SIZE <= SSIZE_MAX, "Lexer buffer size is too large");
enum LexerMode {
LEXER_NORMAL,
LEXER_RAW,
LEXER_SKIP_TO_ELIF,
LEXER_SKIP_TO_ENDC,
LEXER_SKIP_TO_ENDR,
NB_LEXER_MODES
};
struct Expansion {
char *name;
union {
char const *unowned;
char *owned;
} contents;
size_t size; // Length of the contents
size_t offset; // Cursor into the contents
bool owned; // Whether or not to free contents when this expansion is freed
};
struct IfStackEntry {
bool ranIfBlock; // Whether an IF/ELIF/ELSE block ran already
bool reachedElseBlock; // Whether an ELSE block ran already
};
struct MmappedLexerState {
char *ptr; // Technically `const` during the lexer's execution
size_t size;
size_t offset;
bool isReferenced; // If a macro in this file requires not unmapping it
};
struct BufferedLexerState {
int fd;
size_t index; // Read index into the buffer
char buf[LEXER_BUF_SIZE]; // Circular buffer
size_t nbChars; // Number of "fresh" chars in the buffer
};
struct LexerState {
char const *path;
// mmap()-dependent IO state
bool isMmapped;
union {
struct MmappedLexerState mmap; // If mmap()ed
struct BufferedLexerState cbuf; // Otherwise
};
// Common state
bool isFile;
enum LexerMode mode;
bool atLineStart;
uint32_t lineNo;
uint32_t colNo;
int lastToken;
std::stack<struct IfStackEntry> *ifStack;
bool capturing; // Whether the text being lexed should be captured
size_t captureSize; // Amount of text captured
char *captureBuf; // Buffer to send the captured text to if non-NULL
size_t captureCapacity; // Size of the buffer above
bool disableMacroArgs;
bool disableInterpolation;
size_t macroArgScanDistance; // Max distance already scanned for macro args
bool expandStrings;
std::deque<struct Expansion> *expansions; // Front is the innermost current expansion
};
extern struct LexerState *lexerState;
extern struct LexerState *lexerStateEOL;
static inline struct LexerState *lexer_GetState(void)
{
return lexerState;
}
static inline void lexer_SetState(struct LexerState *state)
{
lexerState = state;
@@ -42,21 +119,12 @@ static inline void lexer_SetGfxDigits(char const digits[4])
}
// `path` is referenced, but not held onto..!
struct LexerState *lexer_OpenFile(char const *path);
struct LexerState *lexer_OpenFileView(char const *path, char *buf, size_t size, uint32_t lineNo);
bool lexer_OpenFile(struct LexerState &state, char const *path);
void lexer_OpenFileView(struct LexerState &state, char const *path, char *buf, size_t size,
uint32_t lineNo);
void lexer_RestartRept(uint32_t lineNo);
void lexer_DeleteState(struct LexerState *state);
void lexer_DeleteState(struct LexerState &state);
void lexer_Init(void);
enum LexerMode {
LEXER_NORMAL,
LEXER_RAW,
LEXER_SKIP_TO_ELIF,
LEXER_SKIP_TO_ENDC,
LEXER_SKIP_TO_ENDR,
NB_LEXER_MODES
};
void lexer_SetMode(enum LexerMode mode);
void lexer_ToggleStringExpansion(bool enable);

View File

@@ -46,6 +46,7 @@
# define SSIZE_MAX INT_MAX
#else
# include <fcntl.h>
# include <limits.h>
# include <unistd.h>
#endif

View File

@@ -24,7 +24,7 @@
struct Context {
struct FileStackNode *fileInfo;
struct LexerState *lexerState;
struct LexerState lexerState;
uint32_t uniqueID;
struct MacroArgs *macroArgs; // Macro args are *saved* here
uint32_t nbReptIters;
@@ -252,7 +252,7 @@ bool yywrap(void)
// Free the FOR symbol name
free(oldContext.forName);
lexer_SetState(contextStack.top().lexerState);
lexer_SetState(&contextStack.top().lexerState);
macro_SetUniqueID(contextStack.top().uniqueID);
return false;
@@ -312,10 +312,9 @@ void fstk_RunInclude(char const *path)
uint32_t uniqueID = contextStack.top().uniqueID;
struct Context &context = newContext(fileInfo);
context.lexerState = lexer_OpenFile(fileInfo->name().c_str());
if (!context.lexerState)
if (!lexer_OpenFile(context.lexerState, fileInfo->name().c_str()))
fatalerror("Failed to set up lexer for file include\n");
lexer_SetStateAtEOL(context.lexerState);
lexer_SetStateAtEOL(&context.lexerState);
// We're back at top-level, so most things are reset,
// but not the unique ID, since INCLUDE may be inside a
// MACRO or REPT/FOR loop
@@ -348,10 +347,9 @@ static void runPreIncludeFile(void)
struct Context &context = newContext(fileInfo);
context.lexerState = lexer_OpenFile(fileInfo->name().c_str());
if (!context.lexerState)
if (!lexer_OpenFile(context.lexerState, fileInfo->name().c_str()))
fatalerror("Failed to set up lexer for file include\n");
lexer_SetState(context.lexerState);
lexer_SetState(&context.lexerState);
// We're back at top-level, so most things are reset
context.uniqueID = macro_UndefUniqueID();
}
@@ -404,11 +402,10 @@ void fstk_RunMacro(char const *macroName, struct MacroArgs *args)
fileInfoName.append(macro->name);
struct Context &context = newContext(fileInfo);
context.lexerState = lexer_OpenFileView("MACRO", macro->macro.value, macro->macro.size,
lexer_OpenFileView(context.lexerState, "MACRO", macro->macro.value, macro->macro.size,
macro->fileLine);
if (!context.lexerState)
fatalerror("Failed to set up lexer for macro invocation\n");
lexer_SetStateAtEOL(context.lexerState);
lexer_SetStateAtEOL(&context.lexerState);
context.uniqueID = macro_UseNewUniqueID();
macro_UseNewArgs(args);
}
@@ -436,10 +433,8 @@ static bool newReptContext(int32_t reptLineNo, char *body, size_t size)
// Correct our line number, which currently points to the `ENDR` line
context.fileInfo->lineNo = reptLineNo;
context.lexerState = lexer_OpenFileView("REPT", body, size, reptLineNo);
if (!context.lexerState)
fatalerror("Failed to set up lexer for REPT block\n");
lexer_SetStateAtEOL(context.lexerState);
lexer_OpenFileView(context.lexerState, "REPT", body, size, reptLineNo);
lexer_SetStateAtEOL(&context.lexerState);
context.uniqueID = macro_UseNewUniqueID();
return true;
}
@@ -517,28 +512,24 @@ void fstk_NewRecursionDepth(size_t newDepth)
void fstk_Init(char const *mainPath, size_t maxDepth)
{
struct LexerState *state = lexer_OpenFile(mainPath);
struct Context &context = contextStack.emplace();
if (!state)
if (!lexer_OpenFile(context.lexerState, mainPath))
fatalerror("Failed to open main file\n");
lexer_SetState(state);
char const *fileName = lexer_GetFileName();
lexer_SetState(&context.lexerState);
struct FileStackNode *fileInfo = new(std::nothrow) struct FileStackNode();
if (!fileInfo)
fatalerror("Failed to allocate memory for main file info: %s\n", strerror(errno));
fileInfo->type = NODE_FILE;
fileInfo->data = fileName;
fileInfo->data = lexer_GetFileName();
// lineNo and nbReptIters are unused on the top-level context
fileInfo->parent = NULL;
fileInfo->lineNo = 0; // This still gets written to the object file, so init it
fileInfo->referenced = false;
struct Context &context = contextStack.emplace();
context.fileInfo = fileInfo;
context.lexerState = state;
context.uniqueID = macro_UndefUniqueID();
context.nbReptIters = 0;
context.forValue = 0;

View File

@@ -285,98 +285,29 @@ static bool isWhitespace(int c)
return c == ' ' || c == '\t';
}
#define LEXER_BUF_SIZE 42 // TODO: determine a sane value for this
// The buffer needs to be large enough for the maximum `peekInternal` lookahead distance
static_assert(LEXER_BUF_SIZE > 1, "Lexer buffer size is too small");
// This caps the size of buffer reads, and according to POSIX, passing more than SSIZE_MAX is UB
static_assert(LEXER_BUF_SIZE <= SSIZE_MAX, "Lexer buffer size is too large");
struct Expansion {
char *name;
union {
char const *unowned;
char *owned;
} contents;
size_t size; // Length of the contents
size_t offset; // Cursor into the contents
bool owned; // Whether or not to free contents when this expansion is freed
};
struct IfStackEntry {
bool ranIfBlock; // Whether an IF/ELIF/ELSE block ran already
bool reachedElseBlock; // Whether an ELSE block ran already
};
struct MmappedLexerState {
char *ptr; // Technically `const` during the lexer's execution
size_t size;
size_t offset;
bool isReferenced; // If a macro in this file requires not unmapping it
};
struct BufferedLexerState {
int fd;
size_t index; // Read index into the buffer
char buf[LEXER_BUF_SIZE]; // Circular buffer
size_t nbChars; // Number of "fresh" chars in the buffer
};
struct LexerState {
char const *path;
// mmap()-dependent IO state
bool isMmapped;
union {
struct MmappedLexerState mmap; // If mmap()ed
struct BufferedLexerState cbuf; // Otherwise
};
// Common state
bool isFile;
enum LexerMode mode;
bool atLineStart;
uint32_t lineNo;
uint32_t colNo;
int lastToken;
std::stack<struct IfStackEntry> *ifStack;
bool capturing; // Whether the text being lexed should be captured
size_t captureSize; // Amount of text captured
char *captureBuf; // Buffer to send the captured text to if non-NULL
size_t captureCapacity; // Size of the buffer above
bool disableMacroArgs;
bool disableInterpolation;
size_t macroArgScanDistance; // Max distance already scanned for macro args
bool expandStrings;
std::deque<struct Expansion> *expansions; // Front is the innermost current expansion
};
struct LexerState *lexerState = NULL;
struct LexerState *lexerStateEOL = NULL;
static void initState(struct LexerState *state)
static void initState(struct LexerState &state)
{
state->mode = LEXER_NORMAL;
state->atLineStart = true; // yylex() will init colNo due to this
state->lastToken = T_EOF;
state.mode = LEXER_NORMAL;
state.atLineStart = true; // yylex() will init colNo due to this
state.lastToken = T_EOF;
state->ifStack = new(std::nothrow) std::stack<struct IfStackEntry>();
if (!state->ifStack)
state.ifStack = new(std::nothrow) std::stack<struct IfStackEntry>();
if (!state.ifStack)
fatalerror("Unable to allocate new IF stack: %s\n", strerror(errno));
state->capturing = false;
state->captureBuf = NULL;
state.capturing = false;
state.captureBuf = NULL;
state->disableMacroArgs = false;
state->disableInterpolation = false;
state->macroArgScanDistance = 0;
state->expandStrings = true;
state.disableMacroArgs = false;
state.disableInterpolation = false;
state.macroArgScanDistance = 0;
state.expandStrings = true;
state->expansions = new(std::nothrow) std::deque<struct Expansion>();
if (!state->expansions)
state.expansions = new(std::nothrow) std::deque<struct Expansion>();
if (!state.expansions)
fatalerror("Unable to allocate new expansion stack: %s\n", strerror(errno));
}
@@ -424,61 +355,54 @@ void lexer_ReachELSEBlock(void)
lexerState->ifStack->top().reachedElseBlock = true;
}
struct LexerState *lexer_OpenFile(char const *path)
bool lexer_OpenFile(struct LexerState &state, char const *path)
{
bool isStdin = !strcmp(path, "-");
struct LexerState *state = (struct LexerState *)malloc(sizeof(*state));
struct stat fileInfo;
// Give stdin a nicer file name
if (isStdin)
path = "<stdin>";
if (!state) {
error("Failed to allocate memory for lexer state: %s\n", strerror(errno));
return NULL;
}
if (!isStdin && stat(path, &fileInfo) != 0) {
error("Failed to stat file \"%s\": %s\n", path, strerror(errno));
free(state);
return NULL;
return false;
}
state->path = path;
state->isFile = true;
state->cbuf.fd = isStdin ? STDIN_FILENO : open(path, O_RDONLY);
if (state->cbuf.fd < 0) {
state.path = path;
state.isFile = true;
state.cbuf.fd = isStdin ? STDIN_FILENO : open(path, O_RDONLY);
if (state.cbuf.fd < 0) {
error("Failed to open file \"%s\": %s\n", path, strerror(errno));
free(state);
return NULL;
return false;
}
state->isMmapped = false; // By default, assume it won't be mmap()ed
state.isMmapped = false; // By default, assume it won't be mmap()ed
if (!isStdin && fileInfo.st_size > 0) {
// Try using `mmap` for better performance
// Important: do NOT assign to `state->mmap.ptr` directly, to avoid a cast that may
// alter an eventual `MAP_FAILED` value. It would also invalidate `state->cbuf.fd`,
// Important: do NOT assign to `state.mmap.ptr` directly, to avoid a cast that may
// alter an eventual `MAP_FAILED` value. It would also invalidate `state.cbuf.fd`,
// being on the other side of the union.
void *mappingAddr;
mapFile(mappingAddr, state->cbuf.fd, state->path, fileInfo.st_size);
mapFile(mappingAddr, state.cbuf.fd, state.path, fileInfo.st_size);
if (mappingAddr == MAP_FAILED) {
// If mmap()ing failed, try again using another method (below)
state->isMmapped = false;
state.isMmapped = false;
} else {
// IMPORTANT: the `union` mandates this is accessed before other members!
close(state->cbuf.fd);
close(state.cbuf.fd);
state->isMmapped = true;
state->mmap.isReferenced = false; // By default, a state isn't referenced
state->mmap.ptr = (char *)mappingAddr;
state.isMmapped = true;
state.mmap.isReferenced = false; // By default, a state isn't referenced
state.mmap.ptr = (char *)mappingAddr;
assert(fileInfo.st_size >= 0);
state->mmap.size = (size_t)fileInfo.st_size;
state->mmap.offset = 0;
state.mmap.size = (size_t)fileInfo.st_size;
state.mmap.offset = 0;
if (verbose)
printf("File %s successfully mmap()ped\n", path);
}
}
if (!state->isMmapped) {
if (!state.isMmapped) {
// Sometimes mmap() fails or isn't available, so have a fallback
if (verbose) {
if (isStdin)
@@ -489,44 +413,36 @@ struct LexerState *lexer_OpenFile(char const *path)
printf("File %s opened as regular, errno reports \"%s\"\n",
path, strerror(errno));
}
state->cbuf.index = 0;
state->cbuf.nbChars = 0;
state.cbuf.index = 0;
state.cbuf.nbChars = 0;
}
initState(state);
state->lineNo = 0; // Will be incremented at first line start
return state;
state.lineNo = 0; // Will be incremented at first line start
return true;
}
struct LexerState *lexer_OpenFileView(char const *path, char *buf, size_t size, uint32_t lineNo)
void lexer_OpenFileView(struct LexerState &state, char const *path, char *buf, size_t size, uint32_t lineNo)
{
struct LexerState *state = (struct LexerState *)malloc(sizeof(*state));
if (!state) {
error("Failed to allocate memory for lexer state: %s\n", strerror(errno));
return NULL;
}
state->path = path; // Used to report read errors in `peekInternal`
state->isFile = false;
state->isMmapped = true; // It's not *really* mmap()ed, but it behaves the same
state->mmap.ptr = buf;
state->mmap.size = size;
state->mmap.offset = 0;
state.path = path; // Used to report read errors in `peekInternal`
state.isFile = false;
state.isMmapped = true; // It's not *really* mmap()ed, but it behaves the same
state.mmap.ptr = buf;
state.mmap.size = size;
state.mmap.offset = 0;
initState(state);
state->lineNo = lineNo; // Will be incremented at first line start
return state;
state.lineNo = lineNo; // Will be incremented at first line start
}
void lexer_RestartRept(uint32_t lineNo)
{
lexerState->mmap.offset = 0;
initState(lexerState);
initState(*lexerState);
lexerState->lineNo = lineNo;
}
void lexer_DeleteState(struct LexerState *state)
void lexer_DeleteState(struct LexerState &state)
{
// A big chunk of the lexer state soundness is the file stack ("fstack").
// Each context in the fstack has its own *unique* lexer state; thus, we always guarantee
@@ -539,15 +455,14 @@ void lexer_DeleteState(struct LexerState *state)
// This assertion checks that this doesn't happen again.
// It could be argued that deleting a state that's scheduled for EOF could simply clear
// `lexerStateEOL`, but there's currently no situation in which this should happen.
assert(state != lexerStateEOL);
assert(&state != lexerStateEOL);
if (!state->isMmapped)
close(state->cbuf.fd);
else if (state->isFile && !state->mmap.isReferenced)
munmap(state->mmap.ptr, state->mmap.size);
delete state->ifStack;
delete state->expansions;
free(state);
if (!state.isMmapped)
close(state.cbuf.fd);
else if (state.isFile && !state.mmap.isReferenced)
munmap(state.mmap.ptr, state.mmap.size);
delete state.ifStack;
delete state.expansions;
}
struct KeywordDictNode {