Some refactoring, renaming, and debug assertions for clarity and safety

- Add more `assume()` checks in the lexer
- Replace double negative "`disable* = false`" with "`enable* = true`"
- Naming convention of "`std::deque<> *Stack`" when relying on
  `.push_front()` and `.pop_back()`
This commit is contained in:
Rangi42
2026-04-27 18:47:06 +02:00
parent 9fd0c0297f
commit 3c7488c131
5 changed files with 40 additions and 40 deletions
+1 -1
View File
@@ -183,7 +183,7 @@ These files have been copied ("vendored") from external authors and adapted for
- **`lexer.cpp`:** - **`lexer.cpp`:**
Functions and data related to [lexing](https://en.wikipedia.org/wiki/Lexical_analysis) assembly source code into tokens, which can then be parsed. Functions and data related to [lexing](https://en.wikipedia.org/wiki/Lexical_analysis) assembly source code into tokens, which can then be parsed.
This file maintains static `lexerState` and `lexerStateEOL` pointers to `LexerState`s from the `Context`s in `fstack.cpp`. This file maintains static `lexerState` and `lexerStateEOL` pointers to `LexerState`s from the `Context`s in `fstack.cpp`.
Each `LexerState` *owns* its `content` and its `expansions`' content. Each `Expansion` (the contents of an `{interpolation}` or macro argument) in turn *owns* its `contents`. Each `LexerState` *owns* its `content` and its `expansionStack`'s content. Each `Expansion` (the contents of an `{interpolation}` or macro argument) in turn *owns* its `contents`.
The lexer and parser are interdependent: when the parser reaches certain tokens, it changes the lexer's mode, which affects how characters get lexed into tokens. For example, when the parser reaches a macro name, it changes the lexer to "raw" mode, which lexes the rest of the line as a sequence of string arguments to the macro. The lexer and parser are interdependent: when the parser reaches certain tokens, it changes the lexer's mode, which affects how characters get lexed into tokens. For example, when the parser reaches a macro name, it changes the lexer to "raw" mode, which lexes the rest of the line as a sequence of string arguments to the macro.
- **`macro.cpp`:** - **`macro.cpp`:**
`MacroArgs` methods related to macro arguments. Each `MacroArgs` *references* its arguments' contents. `MacroArgs` methods related to macro arguments. Each `MacroArgs` *references* its arguments' contents.
+4 -4
View File
@@ -87,16 +87,16 @@ struct LexerState {
int lastToken; int lastToken;
int nextToken; int nextToken;
std::deque<IfStackEntry> ifStack; std::deque<IfStackEntry> ifStack; // Front is the innermost `IF` block
bool capturing; // Whether the text being lexed should be captured bool capturing; // Whether the text being lexed should be captured
size_t captureSize; // Amount of text captured size_t captureSize; // Amount of text captured
std::shared_ptr<std::vector<char>> captureBuf; // Buffer to send the captured text to if set std::shared_ptr<std::vector<char>> captureBuf; // Buffer to send the captured text to if set
bool disableExpansions; bool enableExpansions;
bool enableStringExpansions;
size_t expansionScanDistance; // Max distance already scanned for expansions size_t expansionScanDistance; // Max distance already scanned for expansions
bool expandStrings; std::deque<Expansion> expansionStack; // Front is the innermost current expansion
std::deque<Expansion> expansions; // Front is the innermost current expansion
std::variant<std::monostate, ViewedContent, BufferedContent> content; std::variant<std::monostate, ViewedContent, BufferedContent> content;
+1 -1
View File
@@ -36,7 +36,7 @@ ReversedIterable<IterableT> reversed(IterableT &&_iterable) {
// A map from `std::string` keys to `ItemT` items, iterable in the order the items were inserted. // A map from `std::string` keys to `ItemT` items, iterable in the order the items were inserted.
template<typename ItemT> template<typename ItemT>
class InsertionOrderedMap { class InsertionOrderedMap {
std::deque<ItemT> list; std::deque<ItemT> list; // `deque` does not invalidate item references
std::unordered_map<std::string, size_t> map; // Indexes into `list` std::unordered_map<std::string, size_t> map; // Indexes into `list`
public: public:
+5 -5
View File
@@ -54,7 +54,7 @@ static std::stack<Context> contextStack;
// The first include path for `fstk_FindFile` to try is none at all // The first include path for `fstk_FindFile` to try is none at all
static std::vector<std::string> includePaths = {""}; // -I static std::vector<std::string> includePaths = {""}; // -I
static std::deque<std::string> preIncludeNames; // -P static std::deque<std::string> preIncludeStack; // -P
static bool failedOnMissingInclude = false; static bool failedOnMissingInclude = false;
void FileStackNode::printBacktrace(uint32_t curLineNo) const { void FileStackNode::printBacktrace(uint32_t curLineNo) const {
@@ -124,9 +124,9 @@ void fstk_VerboseOutputConfig() {
} }
} }
// -P/--preinclude // -P/--preinclude
if (!preIncludeNames.empty()) { if (!preIncludeStack.empty()) {
fputs("\tPreincluded files:\n", stderr); fputs("\tPreincluded files:\n", stderr);
for (std::string const &name : preIncludeNames) { for (std::string const &name : preIncludeStack) {
fprintf(stderr, "\t - %s\n", name.c_str()); fprintf(stderr, "\t - %s\n", name.c_str());
} }
} }
@@ -168,7 +168,7 @@ void fstk_AddIncludePath(std::string const &path) {
} }
void fstk_AddPreIncludeFile(std::string const &path) { void fstk_AddPreIncludeFile(std::string const &path) {
preIncludeNames.emplace_front(path); preIncludeStack.emplace_front(path);
} }
static bool isValidFilePath(std::string const &path) { static bool isValidFilePath(std::string const &path) {
@@ -497,7 +497,7 @@ void fstk_NewRecursionDepth(size_t newDepth) {
bool fstk_Init(std::string const &mainPath) { bool fstk_Init(std::string const &mainPath) {
newFileContext(mainPath, false, true); newFileContext(mainPath, false, true);
for (std::string const &name : preIncludeNames) { for (std::string const &name : preIncludeStack) {
if (std::optional<std::string> fullPath = fstk_FindFile(name); fullPath) { if (std::optional<std::string> fullPath = fstk_FindFile(name); fullPath) {
newFileContext(*fullPath, false, false); newFileContext(*fullPath, false, false);
} else if (fstk_FileError(name, "pre-included")) { } else if (fstk_FileError(name, "pre-included")) {
+28 -28
View File
@@ -277,18 +277,18 @@ void LexerState::clear(uint32_t lineNo_) {
capturing = false; capturing = false;
captureBuf = nullptr; captureBuf = nullptr;
disableExpansions = false; enableExpansions = true;
enableStringExpansions = true;
expansionScanDistance = 0; expansionScanDistance = 0;
expandStrings = true;
expansions.clear(); expansionStack.clear();
lineNo = lineNo_; // Will be incremented at next line start lineNo = lineNo_; // Will be incremented at next line start
} }
static void nextLine() { static void nextLine() {
// Newlines read within an expansion should not increase the line count // Newlines read within an expansion should not increase the line count
if (lexerState->expansions.empty()) { if (lexerState->expansionStack.empty()) {
++lexerState->lineNo; ++lexerState->lineNo;
} }
} }
@@ -444,6 +444,7 @@ void BufferedContent::advance() {
} }
void BufferedContent::refill() { void BufferedContent::refill() {
assume(size <= std::size(buf));
size_t target = std::size(buf) - size; // Aim: making the buf full size_t target = std::size(buf) - size; // Aim: making the buf full
// Compute the index we'll start writing to // Compute the index we'll start writing to
@@ -482,6 +483,7 @@ size_t BufferedContent::readMore(size_t startIndex, size_t nbChars) {
} }
size += nbReadChars; size += nbReadChars;
assume(size <= std::size(buf));
// `nbReadChars` cannot be negative, so it's fine to cast to `size_t` // `nbReadChars` cannot be negative, so it's fine to cast to `size_t`
return static_cast<size_t>(nbReadChars); return static_cast<size_t>(nbReadChars);
@@ -492,7 +494,7 @@ void lexer_SetMode(LexerMode mode) {
} }
void lexer_ToggleStringExpansion(bool enable) { void lexer_ToggleStringExpansion(bool enable) {
lexerState->expandStrings = enable; lexerState->enableStringExpansions = enable;
} }
// Functions for the actual lexer to obtain characters // Functions for the actual lexer to obtain characters
@@ -507,11 +509,11 @@ static void beginExpansion(std::shared_ptr<std::string> str, std::optional<std::
return; return;
} }
lexerState->expansions.push_front({.name = name, .contents = str, .offset = 0}); lexerState->expansionStack.push_front({.name = name, .contents = str, .offset = 0});
} }
void lexer_CheckRecursionDepth() { void lexer_CheckRecursionDepth() {
if (lexerState->expansions.size() > options.maxRecursionDepth + 1) { if (lexerState->expansionStack.size() > options.maxRecursionDepth + 1) {
fatal("Recursion limit (%zu) exceeded", options.maxRecursionDepth); fatal("Recursion limit (%zu) exceeded", options.maxRecursionDepth);
} }
} }
@@ -528,9 +530,9 @@ static int nextChar();
static uint32_t readDecimalNumber(int initial); static uint32_t readDecimalNumber(int initial);
static uint32_t readBracketedMacroArgNum() { static uint32_t readBracketedMacroArgNum() {
bool disableExpansions = lexerState->disableExpansions; bool enableExpansions = lexerState->enableExpansions;
lexerState->disableExpansions = false; lexerState->enableExpansions = true;
Defer restoreExpansions{[&] { lexerState->disableExpansions = disableExpansions; }}; Defer restoreExpansions{[&] { lexerState->enableExpansions = enableExpansions; }};
int32_t num = 0; int32_t num = 0;
int c = peek(); int c = peek();
@@ -606,8 +608,7 @@ static std::shared_ptr<std::string> readMacroArg() {
error("`\\@` cannot be used outside of a macro or loop (`REPT`/`FOR` block)"); error("`\\@` cannot be used outside of a macro or loop (`REPT`/`FOR` block)");
} }
return str; return str;
} else if (c == '#') { } else if (MacroArgs const *macroArgs = fstk_GetCurrentMacroArgs(); c == '#') {
MacroArgs *macroArgs = fstk_GetCurrentMacroArgs();
if (!macroArgs) { if (!macroArgs) {
error("`\\#` cannot be used outside of a macro"); error("`\\#` cannot be used outside of a macro");
return nullptr; return nullptr;
@@ -623,7 +624,6 @@ static std::shared_ptr<std::string> readMacroArg() {
return nullptr; return nullptr;
} }
MacroArgs *macroArgs = fstk_GetCurrentMacroArgs();
if (!macroArgs) { if (!macroArgs) {
error("`\\<%" PRIu32 ">` cannot be used outside of a macro", num); error("`\\<%" PRIu32 ">` cannot be used outside of a macro", num);
return nullptr; return nullptr;
@@ -637,7 +637,6 @@ static std::shared_ptr<std::string> readMacroArg() {
} else { } else {
assume(c >= '1' && c <= '9'); assume(c >= '1' && c <= '9');
MacroArgs *macroArgs = fstk_GetCurrentMacroArgs();
if (!macroArgs) { if (!macroArgs) {
error("`\\%c` cannot be used outside of a macro", c); error("`\\%c` cannot be used outside of a macro", c);
return nullptr; return nullptr;
@@ -653,7 +652,7 @@ static std::shared_ptr<std::string> readMacroArg() {
int LexerState::peekChar() { int LexerState::peekChar() {
// This is `.peekCharAhead()` modified for zero lookahead distance // This is `.peekCharAhead()` modified for zero lookahead distance
for (Expansion &exp : expansions) { for (Expansion &exp : expansionStack) {
if (exp.offset < exp.size()) { if (exp.offset < exp.size()) {
return static_cast<uint8_t>((*exp.contents)[exp.offset]); return static_cast<uint8_t>((*exp.contents)[exp.offset]);
} }
@@ -683,7 +682,7 @@ int LexerState::peekCharAhead() {
// We only need one character of lookahead, for macro arguments // We only need one character of lookahead, for macro arguments
uint8_t distance = 1; uint8_t distance = 1;
for (Expansion &exp : expansions) { for (Expansion &exp : expansionStack) {
// An expansion that has reached its end will have `exp.offset` == `exp.size()`, // An expansion that has reached its end will have `exp.offset` == `exp.size()`,
// and `.peekCharAhead()` will continue with its parent // and `.peekCharAhead()` will continue with its parent
assume(exp.offset <= exp.size()); assume(exp.offset <= exp.size());
@@ -728,7 +727,7 @@ static int peek() {
++lexerState->expansionScanDistance; // Do not consider again ++lexerState->expansionScanDistance; // Do not consider again
if (lexerState->disableExpansions) { if (!lexerState->enableExpansions) {
return c; return c;
} else if (c == '\\') { } else if (c == '\\') {
// If character is a backslash, check for a macro arg // If character is a backslash, check for a macro arg
@@ -770,15 +769,16 @@ static void shiftChar() {
++lexerState->captureSize; ++lexerState->captureSize;
} }
assume(lexerState->expansionScanDistance > 0);
--lexerState->expansionScanDistance; --lexerState->expansionScanDistance;
for (;;) { for (;;) {
if (!lexerState->expansions.empty()) { if (!lexerState->expansionStack.empty()) {
// Advance within the current expansion // Advance within the current expansion
if (Expansion &exp = lexerState->expansions.front(); exp.advance()) { if (Expansion &exp = lexerState->expansionStack.front(); exp.advance()) {
// When advancing would go past an expansion's end, // When advancing would go past an expansion's end,
// move up to its parent and try again to advance // move up to its parent and try again to advance
lexerState->expansions.pop_front(); lexerState->expansionStack.pop_front();
continue; continue;
} }
} else { } else {
@@ -834,8 +834,8 @@ static void handleCRLF(int c) {
} }
static auto scopedDisableExpansions() { static auto scopedDisableExpansions() {
lexerState->disableExpansions = true; lexerState->enableExpansions = false;
return Defer{[&] { lexerState->disableExpansions = false; }}; return Defer{[&] { lexerState->enableExpansions = true; }};
} }
// "Services" provided by the lexer to the rest of the program // "Services" provided by the lexer to the rest of the program
@@ -849,7 +849,7 @@ void lexer_TraceStringExpansions() {
return; return;
} }
for (Expansion &exp : lexerState->expansions) { for (Expansion &exp : lexerState->expansionStack) {
// Only print EQUS expansions, not string args // Only print EQUS expansions, not string args
if (exp.name) { if (exp.name) {
style_Set(stderr, STYLE_CYAN, false); style_Set(stderr, STYLE_CYAN, false);
@@ -1439,11 +1439,11 @@ static void appendCharInLiteral(std::string &str, int c) {
// Symbol interpolation // Symbol interpolation
if (c == '{') { if (c == '{') {
// We'll be exiting the string/character scope, so re-enable expansions // We'll be exiting the string/character scope, so re-enable expansions
lexerState->disableExpansions = false; lexerState->enableExpansions = true;
if (auto interp = readInterpolation(0); interp.second) { if (auto interp = readInterpolation(0); interp.second) {
appendExpandedString(str, *interp.second); appendExpandedString(str, *interp.second);
} }
lexerState->disableExpansions = true; lexerState->enableExpansions = false;
return; return;
} }
@@ -1956,7 +1956,7 @@ static Token yylex_NORMAL() {
std::string const &identifier = std::get<std::string>(token.value); std::string const &identifier = std::get<std::string>(token.value);
// Raw symbols and local symbols cannot be string expansions // Raw symbols and local symbols cannot be string expansions
if (!raw && token.type == T_(SYMBOL) && lexerState->expandStrings) { if (!raw && token.type == T_(SYMBOL) && lexerState->enableStringExpansions) {
// Attempt string expansion // Attempt string expansion
if (Symbol const *sym = sym_FindExactSymbol(identifier); if (Symbol const *sym = sym_FindExactSymbol(identifier);
sym && sym->type == SYM_EQUS) { sym && sym->type == SYM_EQUS) {
@@ -2162,7 +2162,7 @@ finish: // Can't `break` out of a nested `for`-`switch`
// tokens that would change how these constructs were captured or skipped, if // tokens that would change how these constructs were captured or skipped, if
// they had been produced during the capture/skip non-evaluating phase. // they had been produced during the capture/skip non-evaluating phase.
static Token skipToLeadingKeyword() { static Token skipToLeadingKeyword() {
assume(lexerState->disableExpansions); assume(!lexerState->enableExpansions);
for (;;) { for (;;) {
if (lexerState->atLineStart) { if (lexerState->atLineStart) {
@@ -2321,7 +2321,7 @@ static Capture makeCapture(char const *name, CallbackFnT callback) {
.lineNo = lexer_GetLineNo(), .span = {.ptr = nullptr, .size = 0} .lineNo = lexer_GetLineNo(), .span = {.ptr = nullptr, .size = 0}
}; };
if (std::holds_alternative<ViewedContent>(lexerState->content) if (std::holds_alternative<ViewedContent>(lexerState->content)
&& lexerState->expansions.empty()) { && lexerState->expansionStack.empty()) {
auto &view = std::get<ViewedContent>(lexerState->content); auto &view = std::get<ViewedContent>(lexerState->content);
capture.span.ptr = view.makeSharedContentPtr(); capture.span.ptr = view.makeSharedContentPtr();
} else { } else {