Use a custom generic tagged union Either instead of std::variant for efficiency (#1476)

* Implement custom generic tagged union `Either`

This should be more efficient than `std::variant`, while still
keeping runtime safety as it `assert`s when `get`ting values.

* Use `Either` for RPN expressions

* Use `Either` for file stack node data

* Use `Either` for `File` buffer

* Use `Either` for `STRFMT` args

* Use `Either` for RGBLINK symbol values

* Support an equivalent of `std::monostate` for `Either`

* Use `Either` for lexer tokens

* Use `Either` for symbol values

* Use `Either` for lexer mmap/buffer state
This commit is contained in:
Sylvie
2024-08-20 15:19:11 -04:00
committed by GitHub
parent 7d98b9a900
commit 57c3d74b9e
17 changed files with 303 additions and 174 deletions

View File

@@ -10,16 +10,16 @@
#include <stdint.h>
#include <stdio.h>
#include <string>
#include <variant>
#include <vector>
#include "either.hpp"
#include "linkdefs.hpp"
#include "asm/lexer.hpp"
struct FileStackNode {
FileStackNodeType type;
std::variant<
Either<
std::vector<uint32_t>, // NODE_REPT
std::string // NODE_FILE, NODE_MACRO
>
@@ -34,13 +34,13 @@ struct FileStackNode {
uint32_t ID = -1;
// REPT iteration counts since last named node, in reverse depth order
std::vector<uint32_t> &iters();
std::vector<uint32_t> const &iters() const;
std::vector<uint32_t> &iters() { return data.get<std::vector<uint32_t>>(); }
std::vector<uint32_t> const &iters() const { return data.get<std::vector<uint32_t>>(); }
// File name for files, file::macro name for macros
std::string &name();
std::string const &name() const;
std::string &name() { return data.get<std::string>(); }
std::string const &name() const { return data.get<std::string>(); }
FileStackNode(FileStackNodeType type_, std::variant<std::vector<uint32_t>, std::string> data_)
FileStackNode(FileStackNodeType type_, Either<std::vector<uint32_t>, std::string> data_)
: type(type_), data(data_){};
std::string const &dump(uint32_t curLineNo) const;

View File

@@ -8,9 +8,9 @@
#include <optional>
#include <stdint.h>
#include <string>
#include <variant>
#include <vector>
#include "either.hpp"
#include "platform.hpp" // SSIZE_MAX
// This value is a compromise between `LexerState` allocation performance when `mmap` works, and
@@ -98,7 +98,7 @@ struct LexerState {
bool expandStrings;
std::deque<Expansion> expansions; // Front is the innermost current expansion
std::variant<std::monostate, ViewedContent, BufferedContent> content;
Either<ViewedContent, BufferedContent> content;
~LexerState();

View File

@@ -5,18 +5,19 @@
#include <stdint.h>
#include <string>
#include <variant>
#include <vector>
#include "either.hpp"
#include "linkdefs.hpp"
struct Symbol;
struct Expression {
std::variant<
int32_t, // If the expression's value is known, it's here
std::string // Why the expression is not known, if it isn't
> data = 0;
Either<
int32_t, // If the expression's value is known, it's here
std::string // Why the expression is not known, if it isn't
>
data = 0;
bool isSymbol = false; // Whether the expression represents a symbol suitable for const diffing
std::vector<uint8_t> rpn{}; // Bytes serializing the RPN expression
uint32_t rpnPatchSize = 0; // Size the expression will take in the object file
@@ -30,8 +31,12 @@ struct Expression {
Expression &operator=(Expression &&) = default;
bool isKnown() const { return std::holds_alternative<int32_t>(data); }
int32_t value() const;
bool isKnown() const {
return data.holds<int32_t>();
}
int32_t value() const {
return data.get<int32_t>();
}
int32_t getConstVal() const;
Symbol const *symbolOf() const;

176
include/either.hpp Normal file
View File

@@ -0,0 +1,176 @@
/* SPDX-License-Identifier: MIT */
#ifndef RGBDS_EITHER_HPP
#define RGBDS_EITHER_HPP
#include <type_traits>
#include <utility>
#include "helpers.hpp" // assume
template<typename T1, typename T2>
union Either {
typedef T1 type1;
typedef T2 type2;
private:
template<typename T, unsigned V>
struct Field {
constexpr static unsigned tag_value = V;
unsigned tag = tag_value;
T value;
Field() : value() {}
Field(T &value_) : value(value_) {}
Field(T const &value_) : value(value_) {}
Field(T &&value_) : value(std::move(value_)) {}
};
// The `_tag` unifies with the first `tag` member of each `struct`.
constexpr static unsigned nulltag = 0;
unsigned _tag = nulltag;
Field<T1, 1> _t1;
Field<T2, 2> _t2;
// Value accessors; the function parameters are dummies for overload resolution.
// Only used to implement `field()` below.
auto &pick(T1 *) { return _t1; }
auto const &pick(T1 *) const { return _t1; }
auto &pick(T2 *) { return _t2; }
auto const &pick(T2 *) const { return _t2; }
// Generic field accessors; for internal use only.
template<typename T>
auto &field() {
return pick((T *)nullptr);
}
template<typename T>
auto const &field() const {
return pick((T *)nullptr);
}
public:
// Equivalent of `std::monostate` for `std::variant`s.
Either() : _tag() {}
// These constructors cannot be generic over the value type, because that would prevent
// constructible values from being inferred, e.g. a `const char *` string literal for an
// `std::string` field value.
Either(T1 &value) : _t1(value) {}
Either(T2 &value) : _t2(value) {}
Either(T1 const &value) : _t1(value) {}
Either(T2 const &value) : _t2(value) {}
Either(T1 &&value) : _t1(std::move(value)) {}
Either(T2 &&value) : _t2(std::move(value)) {}
// Destructor manually calls the appropriate value destructor.
~Either() {
if (_tag == _t1.tag_value) {
_t1.value.~T1();
} else if (_tag == _t2.tag_value) {
_t2.value.~T2();
}
}
// Copy assignment operators for each possible value.
Either &operator=(T1 const &value) {
_t1.tag = _t1.tag_value;
new (&_t1.value) T1(value);
return *this;
}
Either &operator=(T2 const &value) {
_t2.tag = _t2.tag_value;
new (&_t2.value) T2(value);
return *this;
}
// Move assignment operators for each possible value.
Either &operator=(T1 &&value) {
_t1.tag = _t1.tag_value;
new (&_t1.value) T1(std::move(value));
return *this;
}
Either &operator=(T2 &&value) {
_t2.tag = _t2.tag_value;
new (&_t2.value) T2(std::move(value));
return *this;
}
// Copy assignment operator from another `Either`.
Either &operator=(Either other) {
if (other._tag == other._t1.tag_value) {
*this = other._t1.value;
} else if (other._tag == other._t2.tag_value) {
*this = other._t2.value;
} else {
_tag = nulltag;
}
return *this;
}
// Copy constructor from another `Either`; implemented in terms of value assignment operators.
Either(Either const &other) {
if (other._tag == other._t1.tag_value) {
*this = other._t1.value;
} else if (other._tag == other._t2.tag_value) {
*this = other._t2.value;
} else {
_tag = nulltag;
}
}
// Move constructor from another `Either`; implemented in terms of value assignment operators.
Either(Either &&other) {
if (other._tag == other._t1.tag_value) {
*this = std::move(other._t1.value);
} else if (other._tag == other._t2.tag_value) {
*this = std::move(other._t2.value);
} else {
_tag = nulltag;
}
}
// Equivalent of `.emplace<T>()` for `std::variant`s.
template<typename T, typename... Args>
void emplace(Args &&...args) {
this->~Either();
if constexpr (std::is_same_v<T, T1>) {
_t1.tag = _t1.tag_value;
new (&_t1.value) T1(std::forward<Args>(args)...);
} else if constexpr (std::is_same_v<T, T2>) {
_t2.tag = _t2.tag_value;
new (&_t2.value) T2(std::forward<Args>(args)...);
} else {
_tag = nulltag;
}
}
// Equivalent of `std::holds_alternative<std::monostate>()` for `std::variant`s.
bool empty() const { return _tag == nulltag; }
// Equivalent of `std::holds_alternative<T>()` for `std::variant`s.
template<typename T>
bool holds() const {
if constexpr (std::is_same_v<T, T1>) {
return _tag == _t1.tag_value;
} else if constexpr (std::is_same_v<T, T2>) {
return _tag == _t2.tag_value;
} else {
return false;
}
}
// Equivalent of `std::get<T>()` for `std::variant`s.
template<typename T>
auto &get() {
assume(holds<T>());
return field<T>().value;
}
template<typename T>
auto const &get() const {
assume(holds<T>());
return field<T>().value;
}
};
#endif // RGBDS_EITHER_HPP

View File

@@ -10,8 +10,8 @@
#include <streambuf>
#include <string.h>
#include <string>
#include <variant>
#include "either.hpp"
#include "helpers.hpp" // assume
#include "platform.hpp"
@@ -19,7 +19,7 @@
class File {
// Construct a `std::streambuf *` by default, since it's probably lighter than a `filebuf`.
std::variant<std::streambuf *, std::filebuf> _file;
Either<std::streambuf *, std::filebuf> _file;
public:
File() {}
@@ -31,7 +31,8 @@ public:
*/
File *open(std::string const &path, std::ios_base::openmode mode) {
if (path != "-") {
return _file.emplace<std::filebuf>().open(path, mode) ? this : nullptr;
_file.emplace<std::filebuf>();
return _file.get<std::filebuf>().open(path, mode) ? this : nullptr;
} else if (mode & std::ios_base::in) {
assume(!(mode & std::ios_base::out));
_file.emplace<std::streambuf *>(std::cin.rdbuf());
@@ -49,8 +50,8 @@ public:
return this;
}
std::streambuf &operator*() {
auto *file = std::get_if<std::filebuf>(&_file);
return file ? *file : *std::get<std::streambuf *>(_file);
return _file.holds<std::filebuf>() ? _file.get<std::filebuf>()
: *_file.get<std::streambuf *>();
}
std::streambuf const &operator*() const {
// The non-`const` version does not perform any modifications, so it's okay.
@@ -63,22 +64,23 @@ public:
}
File *close() {
if (auto *file = std::get_if<std::filebuf>(&_file); file) {
if (_file.holds<std::filebuf>()) {
// This is called by the destructor, and an explicit `close` shouldn't close twice.
std::filebuf fileBuf = std::move(_file.get<std::filebuf>());
_file.emplace<std::streambuf *>(nullptr);
if (file->close() != nullptr) {
if (fileBuf.close() != nullptr) {
return this;
}
} else if (std::get<std::streambuf *>(_file) != nullptr) {
} else if (_file.get<std::streambuf *>() != nullptr) {
return this;
}
return nullptr;
}
char const *c_str(std::string const &path) const {
return std::holds_alternative<std::filebuf>(_file) ? path.c_str()
: std::get<std::streambuf *>(_file) == std::cin.rdbuf() ? "<stdin>"
: "<stdout>";
return _file.holds<std::filebuf>() ? path.c_str()
: _file.get<std::streambuf *>() == std::cin.rdbuf() ? "<stdin>"
: "<stdout>";
}
};

View File

@@ -6,9 +6,9 @@
#include <stdint.h>
#include <stdio.h>
#include <string>
#include <variant>
#include <vector>
#include "either.hpp"
#include "linkdefs.hpp"
// Variables related to CLI options
@@ -38,8 +38,7 @@ extern bool disablePadding;
struct FileStackNode {
FileStackNodeType type;
std::variant<
std::monostate, // Default constructed; `.type` and `.data` must be set manually
Either<
std::vector<uint32_t>, // NODE_REPT
std::string // NODE_FILE, NODE_MACRO
>
@@ -50,11 +49,11 @@ struct FileStackNode {
uint32_t lineNo;
// REPT iteration counts since last named node, in reverse depth order
std::vector<uint32_t> &iters();
std::vector<uint32_t> const &iters() const;
std::vector<uint32_t> &iters() { return data.get<std::vector<uint32_t>>(); }
std::vector<uint32_t> const &iters() const { return data.get<std::vector<uint32_t>>(); }
// File name for files, file::macro name for macros
std::string &name();
std::string const &name() const;
std::string &name() { return data.get<std::string>(); }
std::string const &name() const { return data.get<std::string>(); }
std::string const &dump(uint32_t curLineNo) const;
};

View File

@@ -7,8 +7,8 @@
#include <stdint.h>
#include <string>
#include <variant>
#include "either.hpp"
#include "linkdefs.hpp"
struct FileStackNode;
@@ -28,14 +28,14 @@ struct Symbol {
char const *objFileName;
FileStackNode const *src;
int32_t lineNo;
std::variant<
Either<
int32_t, // Constants just have a numeric value
Label // Label values refer to an offset within a specific section
>
data;
Label &label();
Label const &label() const;
Label &label() { return data.get<Label>(); }
Label const &label() const { return data.get<Label>(); }
};
void sym_ForEach(void (*callback)(Symbol &));