mirror of
https://github.com/gbdev/rgbds.git
synced 2026-05-08 10:59:36 +00:00
Keep trie edges sorted for O(log N) lookup
This commit is contained in:
+31
-22
@@ -22,28 +22,46 @@
|
|||||||
|
|
||||||
#include "asm/warning.hpp"
|
#include "asm/warning.hpp"
|
||||||
|
|
||||||
// Charmaps are stored using a structure known as "trie".
|
static bool compareNode(std::pair<char, size_t> edge, char c) {
|
||||||
// Essentially a tree, where each nodes stores a single character's worth of info:
|
return edge.first < c;
|
||||||
// whether there exists a mapping that ends at the current character,
|
}
|
||||||
|
|
||||||
struct CharmapNode {
|
struct CharmapNode {
|
||||||
std::vector<int32_t> value; // The mapped value, if there exists a mapping that ends here
|
// The mapped value, if there exists a mapping that ends here; empty for non-terminal nodes.
|
||||||
|
std::vector<int32_t> value;
|
||||||
|
// Trie edges, pairing each next character with an index into the parent `Charmap`'s `nodes`.
|
||||||
|
// Sorted by character. Indexes must be nonzero.
|
||||||
// These MUST be indexes and not pointers, because pointers get invalidated by reallocation!
|
// These MUST be indexes and not pointers, because pointers get invalidated by reallocation!
|
||||||
std::vector<std::pair<char, size_t>> next; // Indexes of where to go next, must be nonzero
|
std::vector<std::pair<char, size_t>> children;
|
||||||
|
|
||||||
bool isTerminal() const { return !value.empty(); }
|
bool isTerminal() const { return !value.empty(); }
|
||||||
|
|
||||||
size_t nextIndex(char c) const {
|
size_t nextIndex(char c) const {
|
||||||
auto nextPair = std::find_if(RANGE(next), [c](std::pair<char, size_t> pair) {
|
if (auto pos = std::lower_bound(RANGE(children), c, compareNode);
|
||||||
return pair.first == c;
|
pos != children.end() && pos->first == c) {
|
||||||
});
|
assume(pos->second != 0);
|
||||||
assume(nextPair == next.end() || nextPair->second != 0);
|
return pos->second;
|
||||||
return nextPair != next.end() ? nextPair->second : 0;
|
}
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Charmap {
|
struct Charmap {
|
||||||
std::string name;
|
std::string name;
|
||||||
std::vector<CharmapNode> nodes; // first node is reserved for the root node
|
std::vector<CharmapNode> nodes; // Trie of mappings (first node is reserved for the root node)
|
||||||
|
|
||||||
|
size_t nextIndexOrAdd(size_t nodeIdx, char c) {
|
||||||
|
std::vector<std::pair<char, size_t>> &children = nodes[nodeIdx].children;
|
||||||
|
if (auto pos = std::lower_bound(RANGE(children), c, compareNode);
|
||||||
|
pos != children.end() && pos->first == c) {
|
||||||
|
assume(pos->second != 0);
|
||||||
|
return pos->second;
|
||||||
|
} else {
|
||||||
|
auto nextIdx = children.emplace(pos, c, nodes.size());
|
||||||
|
nodes.emplace_back();
|
||||||
|
return nextIdx->second;
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Traverse the trie depth-first to derive the character mappings in definition order
|
// Traverse the trie depth-first to derive the character mappings in definition order
|
||||||
@@ -58,7 +76,7 @@ bool forEachChar(Charmap const &charmap, CallbackFnT callback) {
|
|||||||
if (node.isTerminal() && !callback(nodeIdx, mapping)) {
|
if (node.isTerminal() && !callback(nodeIdx, mapping)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
for (auto const &[c, nextIdx] : node.next) {
|
for (auto const &[c, nextIdx] : node.children) {
|
||||||
assume(nextIdx);
|
assume(nextIdx);
|
||||||
prefixes.push({nextIdx, mapping + c});
|
prefixes.push({nextIdx, mapping + c});
|
||||||
}
|
}
|
||||||
@@ -155,16 +173,7 @@ void charmap_Add(std::string const &mapping, std::vector<int32_t> &&value) {
|
|||||||
size_t nodeIdx = 0;
|
size_t nodeIdx = 0;
|
||||||
|
|
||||||
for (char c : mapping) {
|
for (char c : mapping) {
|
||||||
size_t nextIdx = charmap.nodes[nodeIdx].nextIndex(c);
|
nodeIdx = charmap.nextIndexOrAdd(nodeIdx, c);
|
||||||
|
|
||||||
if (!nextIdx) {
|
|
||||||
// Switch to and zero-init the new node
|
|
||||||
nextIdx = charmap.nodes.size();
|
|
||||||
charmap.nodes[nodeIdx].next.emplace_back(c, nextIdx);
|
|
||||||
charmap.nodes.emplace_back();
|
|
||||||
}
|
|
||||||
|
|
||||||
nodeIdx = nextIdx;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
CharmapNode &node = charmap.nodes[nodeIdx];
|
CharmapNode &node = charmap.nodes[nodeIdx];
|
||||||
|
|||||||
Reference in New Issue
Block a user