Use a custom generic tagged union Either instead of std::variant for efficiency (#1476)

* Implement custom generic tagged union `Either`

This should be more efficient than `std::variant`, while still
keeping runtime safety as it `assert`s when `get`ting values.

* Use `Either` for RPN expressions

* Use `Either` for file stack node data

* Use `Either` for `File` buffer

* Use `Either` for `STRFMT` args

* Use `Either` for RGBLINK symbol values

* Support an equivalent of `std::monostate` for `Either`

* Use `Either` for lexer tokens

* Use `Either` for symbol values

* Use `Either` for lexer mmap/buffer state
This commit is contained in:
Sylvie
2024-08-20 15:19:11 -04:00
committed by GitHub
parent 7d98b9a900
commit 57c3d74b9e
17 changed files with 303 additions and 174 deletions

View File

@@ -106,10 +106,10 @@ using namespace std::literals;
struct Token {
int type;
std::variant<std::monostate, uint32_t, std::string> value;
Either<uint32_t, std::string> value;
Token() : type(T_(NUMBER)), value(std::monostate{}) {}
Token(int type_) : type(type_), value(std::monostate{}) {}
Token() : type(T_(NUMBER)), value() {}
Token(int type_) : type(type_), value() {}
Token(int type_, uint32_t value_) : type(type_), value(value_) {}
Token(int type_, std::string const &value_) : type(type_), value(value_) {}
Token(int type_, std::string &&value_) : type(type_), value(value_) {}
@@ -464,8 +464,8 @@ void LexerState::setViewAsNextState(char const *name, ContentSpan const &span, u
}
void lexer_RestartRept(uint32_t lineNo) {
if (auto *view = std::get_if<ViewedContent>(&lexerState->content); view) {
view->offset = 0;
if (lexerState->content.holds<ViewedContent>()) {
lexerState->content.get<ViewedContent>().offset = 0;
}
lexerState->clear(lineNo);
}
@@ -696,12 +696,12 @@ int LexerState::peekChar() {
return (uint8_t)(*exp.contents)[exp.offset];
}
if (auto *view = std::get_if<ViewedContent>(&content); view) {
if (view->offset < view->span.size)
return (uint8_t)view->span.ptr[view->offset];
if (content.holds<ViewedContent>()) {
auto &view = content.get<ViewedContent>();
if (view.offset < view.span.size)
return (uint8_t)view.span.ptr[view.offset];
} else {
assume(std::holds_alternative<BufferedContent>(content));
auto &cbuf = std::get<BufferedContent>(content);
auto &cbuf = content.get<BufferedContent>();
if (cbuf.size == 0)
cbuf.refill();
assume(cbuf.offset < LEXER_BUF_SIZE);
@@ -726,12 +726,12 @@ int LexerState::peekCharAhead() {
distance -= exp.size() - exp.offset;
}
if (auto *view = std::get_if<ViewedContent>(&content); view) {
if (view->offset + distance < view->span.size)
return (uint8_t)view->span.ptr[view->offset + distance];
if (content.holds<ViewedContent>()) {
auto &view = content.get<ViewedContent>();
if (view.offset + distance < view.span.size)
return (uint8_t)view.span.ptr[view.offset + distance];
} else {
assume(std::holds_alternative<BufferedContent>(content));
auto &cbuf = std::get<BufferedContent>(content);
auto &cbuf = content.get<BufferedContent>();
assume(distance < LEXER_BUF_SIZE);
if (cbuf.size <= distance)
cbuf.refill();
@@ -815,12 +815,10 @@ restart:
} else {
// Advance within the file contents
lexerState->colNo++;
if (auto *view = std::get_if<ViewedContent>(&lexerState->content); view) {
view->offset++;
if (lexerState->content.holds<ViewedContent>()) {
lexerState->content.get<ViewedContent>().offset++;
} else {
assume(std::holds_alternative<BufferedContent>(lexerState->content));
auto &cbuf = std::get<BufferedContent>(lexerState->content);
cbuf.advance();
lexerState->content.get<BufferedContent>().advance();
}
}
}
@@ -1796,12 +1794,12 @@ static Token yylex_NORMAL() {
return token;
// `token` is either an `ID` or a `LOCAL_ID`, and both have a `std::string` value.
assume(std::holds_alternative<std::string>(token.value));
assume(token.value.holds<std::string>());
// Local symbols cannot be string expansions
if (token.type == T_(ID) && lexerState->expandStrings) {
// Attempt string expansion
Symbol const *sym = sym_FindExactSymbol(std::get<std::string>(token.value));
Symbol const *sym = sym_FindExactSymbol(token.value.get<std::string>());
if (sym && sym->type == SYM_EQUS) {
std::shared_ptr<std::string> str = sym->getEqus();
@@ -2176,17 +2174,22 @@ yy::parser::symbol_type yylex() {
Token token = lexerModeFuncs[lexerState->mode]();
// Captures end at their buffer's boundary no matter what
if (token.type == T_(YYEOF) && !lexerState->capturing)
token = Token(T_(EOB));
if (token.type == T_(YYEOF) && !lexerState->capturing) {
// Doing `token = Token(T_(EOB));` here would be valid but redundant, because YYEOF and EOB
// both have the same empty value. Furthermore, g++ 11.4.0 was giving a false-positive
// '-Wmaybe-uninitialized' warning for `Token::value.Either<...>::_tag` that way.
// (This was on a developer's local machine; GitHub Actions CI's g++ was not warning.)
token.type = T_(EOB);
}
lexerState->lastToken = token.type;
lexerState->atLineStart = token.type == T_(NEWLINE) || token.type == T_(EOB);
if (auto *numValue = std::get_if<uint32_t>(&token.value); numValue) {
return yy::parser::symbol_type(token.type, *numValue);
} else if (auto *strValue = std::get_if<std::string>(&token.value); strValue) {
return yy::parser::symbol_type(token.type, *strValue);
if (token.value.holds<uint32_t>()) {
return yy::parser::symbol_type(token.type, token.value.get<uint32_t>());
} else if (token.value.holds<std::string>()) {
return yy::parser::symbol_type(token.type, token.value.get<std::string>());
} else {
assume(std::holds_alternative<std::monostate>(token.value));
assume(token.value.empty());
return yy::parser::symbol_type(token.type);
}
}
@@ -2202,10 +2205,10 @@ static Capture startCapture() {
lexerState->captureSize = 0;
uint32_t lineNo = lexer_GetLineNo();
if (auto *view = std::get_if<ViewedContent>(&lexerState->content);
view && lexerState->expansions.empty()) {
if (lexerState->content.holds<ViewedContent>() && lexerState->expansions.empty()) {
auto &view = lexerState->content.get<ViewedContent>();
return {
.lineNo = lineNo, .span = {.ptr = view->makeSharedContentPtr(), .size = 0}
.lineNo = lineNo, .span = {.ptr = view.makeSharedContentPtr(), .size = 0}
};
} else {
assume(lexerState->captureBuf == nullptr);