Use a custom generic tagged union Either instead of std::variant for efficiency (#1476)

* Implement custom generic tagged union `Either`

This should be more efficient than `std::variant`, while still
keeping runtime safety as it `assert`s when `get`ting values.

* Use `Either` for RPN expressions

* Use `Either` for file stack node data

* Use `Either` for `File` buffer

* Use `Either` for `STRFMT` args

* Use `Either` for RGBLINK symbol values

* Support an equivalent of `std::monostate` for `Either`

* Use `Either` for lexer tokens

* Use `Either` for symbol values

* Use `Either` for lexer mmap/buffer state
This commit is contained in:
Sylvie
2024-08-20 15:19:11 -04:00
committed by GitHub
parent 7d98b9a900
commit 57c3d74b9e
17 changed files with 303 additions and 174 deletions

View File

@@ -48,28 +48,8 @@ static std::vector<std::string> includePaths = {""};
static std::string preIncludeName;
std::vector<uint32_t> &FileStackNode::iters() {
assume(std::holds_alternative<std::vector<uint32_t>>(data));
return std::get<std::vector<uint32_t>>(data);
}
std::vector<uint32_t> const &FileStackNode::iters() const {
assume(std::holds_alternative<std::vector<uint32_t>>(data));
return std::get<std::vector<uint32_t>>(data);
}
std::string &FileStackNode::name() {
assume(std::holds_alternative<std::string>(data));
return std::get<std::string>(data);
}
std::string const &FileStackNode::name() const {
assume(std::holds_alternative<std::string>(data));
return std::get<std::string>(data);
}
std::string const &FileStackNode::dump(uint32_t curLineNo) const {
if (std::holds_alternative<std::vector<uint32_t>>(data)) {
if (data.holds<std::vector<uint32_t>>()) {
assume(parent); // REPT nodes use their parent's name
std::string const &lastName = parent->dump(lineNo);
fputs(" -> ", stderr);

View File

@@ -106,10 +106,10 @@ using namespace std::literals;
struct Token {
int type;
std::variant<std::monostate, uint32_t, std::string> value;
Either<uint32_t, std::string> value;
Token() : type(T_(NUMBER)), value(std::monostate{}) {}
Token(int type_) : type(type_), value(std::monostate{}) {}
Token() : type(T_(NUMBER)), value() {}
Token(int type_) : type(type_), value() {}
Token(int type_, uint32_t value_) : type(type_), value(value_) {}
Token(int type_, std::string const &value_) : type(type_), value(value_) {}
Token(int type_, std::string &&value_) : type(type_), value(value_) {}
@@ -464,8 +464,8 @@ void LexerState::setViewAsNextState(char const *name, ContentSpan const &span, u
}
void lexer_RestartRept(uint32_t lineNo) {
if (auto *view = std::get_if<ViewedContent>(&lexerState->content); view) {
view->offset = 0;
if (lexerState->content.holds<ViewedContent>()) {
lexerState->content.get<ViewedContent>().offset = 0;
}
lexerState->clear(lineNo);
}
@@ -696,12 +696,12 @@ int LexerState::peekChar() {
return (uint8_t)(*exp.contents)[exp.offset];
}
if (auto *view = std::get_if<ViewedContent>(&content); view) {
if (view->offset < view->span.size)
return (uint8_t)view->span.ptr[view->offset];
if (content.holds<ViewedContent>()) {
auto &view = content.get<ViewedContent>();
if (view.offset < view.span.size)
return (uint8_t)view.span.ptr[view.offset];
} else {
assume(std::holds_alternative<BufferedContent>(content));
auto &cbuf = std::get<BufferedContent>(content);
auto &cbuf = content.get<BufferedContent>();
if (cbuf.size == 0)
cbuf.refill();
assume(cbuf.offset < LEXER_BUF_SIZE);
@@ -726,12 +726,12 @@ int LexerState::peekCharAhead() {
distance -= exp.size() - exp.offset;
}
if (auto *view = std::get_if<ViewedContent>(&content); view) {
if (view->offset + distance < view->span.size)
return (uint8_t)view->span.ptr[view->offset + distance];
if (content.holds<ViewedContent>()) {
auto &view = content.get<ViewedContent>();
if (view.offset + distance < view.span.size)
return (uint8_t)view.span.ptr[view.offset + distance];
} else {
assume(std::holds_alternative<BufferedContent>(content));
auto &cbuf = std::get<BufferedContent>(content);
auto &cbuf = content.get<BufferedContent>();
assume(distance < LEXER_BUF_SIZE);
if (cbuf.size <= distance)
cbuf.refill();
@@ -815,12 +815,10 @@ restart:
} else {
// Advance within the file contents
lexerState->colNo++;
if (auto *view = std::get_if<ViewedContent>(&lexerState->content); view) {
view->offset++;
if (lexerState->content.holds<ViewedContent>()) {
lexerState->content.get<ViewedContent>().offset++;
} else {
assume(std::holds_alternative<BufferedContent>(lexerState->content));
auto &cbuf = std::get<BufferedContent>(lexerState->content);
cbuf.advance();
lexerState->content.get<BufferedContent>().advance();
}
}
}
@@ -1796,12 +1794,12 @@ static Token yylex_NORMAL() {
return token;
// `token` is either an `ID` or a `LOCAL_ID`, and both have a `std::string` value.
assume(std::holds_alternative<std::string>(token.value));
assume(token.value.holds<std::string>());
// Local symbols cannot be string expansions
if (token.type == T_(ID) && lexerState->expandStrings) {
// Attempt string expansion
Symbol const *sym = sym_FindExactSymbol(std::get<std::string>(token.value));
Symbol const *sym = sym_FindExactSymbol(token.value.get<std::string>());
if (sym && sym->type == SYM_EQUS) {
std::shared_ptr<std::string> str = sym->getEqus();
@@ -2176,17 +2174,22 @@ yy::parser::symbol_type yylex() {
Token token = lexerModeFuncs[lexerState->mode]();
// Captures end at their buffer's boundary no matter what
if (token.type == T_(YYEOF) && !lexerState->capturing)
token = Token(T_(EOB));
if (token.type == T_(YYEOF) && !lexerState->capturing) {
// Doing `token = Token(T_(EOB));` here would be valid but redundant, because YYEOF and EOB
// both have the same empty value. Furthermore, g++ 11.4.0 was giving a false-positive
// '-Wmaybe-uninitialized' warning for `Token::value.Either<...>::_tag` that way.
// (This was on a developer's local machine; GitHub Actions CI's g++ was not warning.)
token.type = T_(EOB);
}
lexerState->lastToken = token.type;
lexerState->atLineStart = token.type == T_(NEWLINE) || token.type == T_(EOB);
if (auto *numValue = std::get_if<uint32_t>(&token.value); numValue) {
return yy::parser::symbol_type(token.type, *numValue);
} else if (auto *strValue = std::get_if<std::string>(&token.value); strValue) {
return yy::parser::symbol_type(token.type, *strValue);
if (token.value.holds<uint32_t>()) {
return yy::parser::symbol_type(token.type, token.value.get<uint32_t>());
} else if (token.value.holds<std::string>()) {
return yy::parser::symbol_type(token.type, token.value.get<std::string>());
} else {
assume(std::holds_alternative<std::monostate>(token.value));
assume(token.value.empty());
return yy::parser::symbol_type(token.type);
}
}
@@ -2202,10 +2205,10 @@ static Capture startCapture() {
lexerState->captureSize = 0;
uint32_t lineNo = lexer_GetLineNo();
if (auto *view = std::get_if<ViewedContent>(&lexerState->content);
view && lexerState->expansions.empty()) {
if (lexerState->content.holds<ViewedContent>() && lexerState->expansions.empty()) {
auto &view = lexerState->content.get<ViewedContent>();
return {
.lineNo = lineNo, .span = {.ptr = view->makeSharedContentPtr(), .size = 0}
.lineNo = lineNo, .span = {.ptr = view.makeSharedContentPtr(), .size = 0}
};
} else {
assume(lexerState->captureBuf == nullptr);

View File

@@ -7,7 +7,6 @@
%code requires {
#include <stdint.h>
#include <string>
#include <variant>
#include <vector>
#include "asm/lexer.hpp"
@@ -15,6 +14,7 @@
#include "asm/rpn.hpp"
#include "asm/section.hpp"
#include "either.hpp"
#include "linkdefs.hpp"
struct AlignmentSpec {
@@ -30,7 +30,7 @@
struct StrFmtArgList {
std::string format;
std::vector<std::variant<uint32_t, std::string>> args;
std::vector<Either<uint32_t, std::string>> args;
StrFmtArgList() = default;
StrFmtArgList(StrFmtArgList &&) = default;
@@ -81,8 +81,7 @@
std::string_view str, std::string const &old, std::string const &rep
);
static std::string strfmt(
std::string const &spec,
std::vector<std::variant<uint32_t, std::string>> const &args
std::string const &spec, std::vector<Either<uint32_t, std::string>> const &args
);
static void compoundAssignment(std::string const &symName, RPNCommand op, int32_t constValue);
static void failAssert(AssertionType type);
@@ -2642,8 +2641,7 @@ static std::string strrpl(std::string_view str, std::string const &old, std::str
}
static std::string strfmt(
std::string const &spec,
std::vector<std::variant<uint32_t, std::string>> const &args
std::string const &spec, std::vector<Either<uint32_t, std::string>> const &args
) {
std::string str;
size_t argIndex = 0;
@@ -2684,12 +2682,10 @@ static std::string strfmt(
} else if (argIndex >= args.size()) {
// Will warn after formatting is done.
str += '%';
} else if (auto *n = std::get_if<uint32_t>(&args[argIndex]); n) {
fmt.appendNumber(str, *n);
} else if (args[argIndex].holds<uint32_t>()) {
fmt.appendNumber(str, args[argIndex].get<uint32_t>());
} else {
assume(std::holds_alternative<std::string>(args[argIndex]));
auto &s = std::get<std::string>(args[argIndex]);
fmt.appendString(str, s);
fmt.appendString(str, args[argIndex].get<std::string>());
}
argIndex++;

View File

@@ -19,11 +19,6 @@
using namespace std::literals;
int32_t Expression::value() const {
assume(std::holds_alternative<int32_t>(data));
return std::get<int32_t>(data);
}
void Expression::clear() {
data = 0;
isSymbol = false;
@@ -44,7 +39,7 @@ uint8_t *Expression::reserveSpace(uint32_t size, uint32_t patchSize) {
int32_t Expression::getConstVal() const {
if (!isKnown()) {
error("Expected constant expression: %s\n", std::get<std::string>(data).c_str());
error("Expected constant expression: %s\n", data.get<std::string>().c_str());
return 0;
}
return value();

View File

@@ -46,28 +46,8 @@ FILE *linkerScript;
static uint32_t nbErrors = 0;
std::vector<uint32_t> &FileStackNode::iters() {
assume(std::holds_alternative<std::vector<uint32_t>>(data));
return std::get<std::vector<uint32_t>>(data);
}
std::vector<uint32_t> const &FileStackNode::iters() const {
assume(std::holds_alternative<std::vector<uint32_t>>(data));
return std::get<std::vector<uint32_t>>(data);
}
std::string &FileStackNode::name() {
assume(std::holds_alternative<std::string>(data));
return std::get<std::string>(data);
}
std::string const &FileStackNode::name() const {
assume(std::holds_alternative<std::string>(data));
return std::get<std::string>(data);
}
std::string const &FileStackNode::dump(uint32_t curLineNo) const {
if (std::holds_alternative<std::vector<uint32_t>>(data)) {
if (data.holds<std::vector<uint32_t>>()) {
assume(parent); // REPT nodes use their parent's name
std::string const &lastName = parent->dump(lineNo);
fputs(" -> ", stderr);

View File

@@ -503,7 +503,7 @@ void obj_ReadFile(char const *fileName, unsigned int fileID) {
// object file. It's better than nothing.
nodes[fileID].push_back({
.type = NODE_FILE,
.data = fileName,
.data = Either<std::vector<uint32_t>, std::string>(fileName),
.parent = nullptr,
.lineNo = 0,
});
@@ -566,8 +566,8 @@ void obj_ReadFile(char const *fileName, unsigned int fileID) {
if (symbol.type == SYMTYPE_EXPORT)
sym_AddSymbol(symbol);
if (auto *label = std::get_if<Label>(&symbol.data); label)
nbSymPerSect[label->sectionID]++;
if (symbol.data.holds<Label>())
nbSymPerSect[symbol.data.get<Label>().sectionID]++;
}
// This file's sections, stored in a table to link symbols to them
@@ -605,12 +605,11 @@ void obj_ReadFile(char const *fileName, unsigned int fileID) {
// Give symbols' section pointers to their sections
for (uint32_t i = 0; i < nbSymbols; i++) {
if (auto *label = std::get_if<Label>(&fileSymbols[i].data); label) {
Section *section = fileSections[label->sectionID].get();
label->section = section;
if (fileSymbols[i].data.holds<Label>()) {
Label &label = fileSymbols[i].data.get<Label>();
label.section = fileSections[label.sectionID].get();
// Give the section a pointer to the symbol as well
linkSymToSect(fileSymbols[i], *section);
linkSymToSect(fileSymbols[i], *label.section);
}
}
@@ -622,13 +621,15 @@ void obj_ReadFile(char const *fileName, unsigned int fileID) {
// This has to run **after** all the `sect_AddSection()` calls,
// so that `sect_GetSection()` will work
for (uint32_t i = 0; i < nbSymbols; i++) {
if (auto *label = std::get_if<Label>(&fileSymbols[i].data); label) {
if (Section *section = label->section; section->modifier != SECTION_NORMAL) {
if (section->modifier == SECTION_FRAGMENT)
if (fileSymbols[i].data.holds<Label>()) {
Label &label = fileSymbols[i].data.get<Label>();
if (Section *section = label.section; section->modifier != SECTION_NORMAL) {
if (section->modifier == SECTION_FRAGMENT) {
// Add the fragment's offset to the symbol's (`section->offset` is computed by `sect_AddSection`)
label->offset += section->offset;
label.offset += section->offset;
}
// Associate the symbol with the main section, not the "component" one
label->section = sect_GetSection(section->name);
label.section = sect_GetSection(section->name);
}
}
}

View File

@@ -564,16 +564,16 @@ static void writeSym() {
constants.clear();
sym_ForEach([](Symbol &sym) {
// Symbols are already limited to the exported ones
if (std::holds_alternative<int32_t>(sym.data))
if (sym.data.holds<int32_t>())
constants.push_back(&sym);
});
// Numeric constants are ordered by value, then by name
std::sort(RANGE(constants), [](Symbol *sym1, Symbol *sym2) -> bool {
int32_t val1 = std::get<int32_t>(sym1->data), val2 = std::get<int32_t>(sym2->data);
int32_t val1 = sym1->data.get<int32_t>(), val2 = sym2->data.get<int32_t>();
return val1 != val2 ? val1 < val2 : sym1->name < sym2->name;
});
for (Symbol *sym : constants) {
int32_t val = std::get<int32_t>(sym->data);
int32_t val = sym->data.get<int32_t>();
int width = val < 0x100 ? 2 : val < 0x10000 ? 4 : 8;
fprintf(symFile, "%0*" PRIx32 " %s\n", width, val, sym->name.c_str());
}

View File

@@ -5,7 +5,6 @@
#include <deque>
#include <inttypes.h>
#include <stdint.h>
#include <variant>
#include <vector>
#include "helpers.hpp" // assume, clz, ctz
@@ -230,8 +229,8 @@ static int32_t computeRPNExpr(Patch const &patch, std::vector<Symbol> const &fil
);
isError = true;
value = 1;
} else if (auto *label = std::get_if<Label>(&symbol->data); label) {
value = label->section->bank;
} else if (symbol->data.holds<Label>()) {
value = symbol->data.get<Label>().section->bank;
} else {
error(
patch.src,
@@ -390,11 +389,11 @@ static int32_t computeRPNExpr(Patch const &patch, std::vector<Symbol> const &fil
fileSymbols[value].name.c_str()
);
isError = true;
} else if (auto *label = std::get_if<Label>(&symbol->data); label) {
value = label->section->org + label->offset;
} else if (symbol->data.holds<Label>()) {
Label const &label = symbol->data.get<Label>();
value = label.section->org + label.offset;
} else {
assume(std::holds_alternative<int32_t>(symbol->data));
value = std::get<int32_t>(symbol->data);
value = symbol->data.get<int32_t>();
}
}
break;

View File

@@ -8,7 +8,6 @@
#include <stdint.h>
#include <string.h>
#include <tuple>
#include <variant>
#include "helpers.hpp" // assume
#include "linkdefs.hpp"
@@ -395,10 +394,11 @@ void sdobj_ReadFile(FileStackNode const &where, FILE *file, std::vector<Symbol>
// The same symbol can only be defined twice if neither
// definition is in a floating section
auto checkSymbol = [](Symbol const &sym) -> std::tuple<Section *, int32_t> {
if (auto *label = std::get_if<Label>(&sym.data); label)
return {label->section, label->offset};
assume(std::holds_alternative<int32_t>(sym.data));
return {nullptr, std::get<int32_t>(sym.data)};
if (sym.data.holds<Label>()) {
Label const &label = sym.data.get<Label>();
return {label.section, label.offset};
}
return {nullptr, sym.data.get<int32_t>()};
};
auto [symbolSection, symbolValue] = checkSymbol(symbol);
auto [otherSection, otherValue] = checkSymbol(*other);
@@ -876,13 +876,15 @@ void sdobj_ReadFile(FileStackNode const &where, FILE *file, std::vector<Symbol>
// This has to run **after** all the `sect_AddSection()` calls,
// so that `sect_GetSection()` will work
for (Symbol &sym : fileSymbols) {
if (auto *label = std::get_if<Label>(&sym.data); label) {
if (Section *section = label->section; section->modifier != SECTION_NORMAL) {
if (section->modifier == SECTION_FRAGMENT)
if (sym.data.holds<Label>()) {
Label &label = sym.data.get<Label>();
if (Section *section = label.section; section->modifier != SECTION_NORMAL) {
if (section->modifier == SECTION_FRAGMENT) {
// Add the fragment's offset to the symbol's (`section->offset` is computed by `sect_AddSection`)
label->offset += section->offset;
label.offset += section->offset;
}
// Associate the symbol with the main section, not the "component" one
label->section = sect_GetSection(section->name);
label.section = sect_GetSection(section->name);
}
}
}

View File

@@ -12,16 +12,6 @@
std::unordered_map<std::string, Symbol *> symbols;
Label &Symbol::label() {
assume(std::holds_alternative<Label>(data));
return std::get<Label>(data);
}
Label const &Symbol::label() const {
assume(std::holds_alternative<Label>(data));
return std::get<Label>(data);
}
void sym_ForEach(void (*callback)(Symbol &)) {
for (auto &it : symbols)
callback(*it.second);
@@ -29,8 +19,9 @@ void sym_ForEach(void (*callback)(Symbol &)) {
void sym_AddSymbol(Symbol &symbol) {
Symbol *other = sym_GetSymbol(symbol.name);
auto *symValue = std::get_if<int32_t>(&symbol.data);
auto *otherValue = other ? std::get_if<int32_t>(&other->data) : nullptr;
int32_t *symValue = symbol.data.holds<int32_t>() ? &symbol.data.get<int32_t>() : nullptr;
int32_t *otherValue =
other && other->data.holds<int32_t>() ? &other->data.get<int32_t>() : nullptr;
// Check if the symbol already exists with a different value
if (other && !(symValue && otherValue && *symValue == *otherValue)) {