Replace charmaps' fixed-size 256-value array with a vector of pairs

This should save memory at the cost of O(n) instead of O(1) access,
which should be okay because the `next` vector is generally small.
This commit is contained in:
Rangi
2026-04-16 18:51:23 -04:00
committed by Rangi
parent ac2a022124
commit eb9e9c0f33
+19 -14
View File
@@ -2,6 +2,7 @@
#include "asm/charmap.hpp" #include "asm/charmap.hpp"
#include <algorithm>
#include <map> #include <map>
#include <optional> #include <optional>
#include <stack> #include <stack>
@@ -27,9 +28,17 @@
struct CharmapNode { struct CharmapNode {
std::vector<int32_t> value; // The mapped value, if there exists a mapping that ends here std::vector<int32_t> value; // The mapped value, if there exists a mapping that ends here
// These MUST be indexes and not pointers, because pointers get invalidated by reallocation! // These MUST be indexes and not pointers, because pointers get invalidated by reallocation!
size_t next[256]; // Indexes of where to go next, 0 = nowhere std::vector<std::pair<char, size_t>> next; // Indexes of where to go next, must be nonzero
bool isTerminal() const { return !value.empty(); } bool isTerminal() const { return !value.empty(); }
size_t nextIndex(char c) const {
auto nextPair = std::find_if(RANGE(next), [c](std::pair<char, size_t> pair) {
return pair.first == c;
});
assume(nextPair == next.end() || nextPair->second != 0);
return nextPair != next.end() ? nextPair->second : 0;
}
}; };
struct Charmap { struct Charmap {
@@ -49,10 +58,9 @@ bool forEachChar(Charmap const &charmap, CallbackFnT callback) {
if (node.isTerminal() && !callback(nodeIdx, mapping)) { if (node.isTerminal() && !callback(nodeIdx, mapping)) {
return false; return false;
} }
for (unsigned c = 0; c < std::size(node.next); ++c) { for (auto const &[c, nextIdx] : node.next) {
if (size_t nextIdx = node.next[c]; nextIdx) { assume(nextIdx);
prefixes.push({nextIdx, mapping + static_cast<char>(c)}); prefixes.push({nextIdx, mapping + c});
}
} }
} }
return true; return true;
@@ -147,15 +155,12 @@ void charmap_Add(std::string const &mapping, std::vector<int32_t> &&value) {
size_t nodeIdx = 0; size_t nodeIdx = 0;
for (char c : mapping) { for (char c : mapping) {
size_t &nextIdxRef = charmap.nodes[nodeIdx].next[static_cast<uint8_t>(c)]; size_t nextIdx = charmap.nodes[nodeIdx].nextIndex(c);
size_t nextIdx = nextIdxRef;
if (!nextIdx) { if (!nextIdx) {
// Switch to and zero-init the new node // Switch to and zero-init the new node
nextIdxRef = charmap.nodes.size(); nextIdx = charmap.nodes.size();
nextIdx = nextIdxRef; charmap.nodes[nodeIdx].next.emplace_back(c, nextIdx);
// This may reallocate `charmap.nodes` and invalidate `nextIdxRef`,
// which is why we keep the actual value in `nextIdx`
charmap.nodes.emplace_back(); charmap.nodes.emplace_back();
} }
@@ -176,7 +181,7 @@ bool charmap_HasChar(std::string const &mapping) {
size_t nodeIdx = 0; size_t nodeIdx = 0;
for (char c : mapping) { for (char c : mapping) {
nodeIdx = charmap.nodes[nodeIdx].next[static_cast<uint8_t>(c)]; nodeIdx = charmap.nodes[nodeIdx].nextIndex(c);
if (!nodeIdx) { if (!nodeIdx) {
return false; return false;
@@ -191,7 +196,7 @@ static CharmapNode const *charmapEntry(std::string const &mapping) {
size_t nodeIdx = 0; size_t nodeIdx = 0;
for (char c : mapping) { for (char c : mapping) {
nodeIdx = charmap.nodes[nodeIdx].next[static_cast<uint8_t>(c)]; nodeIdx = charmap.nodes[nodeIdx].nextIndex(c);
if (!nodeIdx) { if (!nodeIdx) {
return nullptr; return nullptr;
@@ -231,7 +236,7 @@ size_t charmap_ConvertNext(std::string_view &input, std::vector<int32_t> *output
size_t inputIdx = 0; size_t inputIdx = 0;
for (size_t nodeIdx = 0; inputIdx < input.length();) { for (size_t nodeIdx = 0; inputIdx < input.length();) {
nodeIdx = charmap.nodes[nodeIdx].next[static_cast<uint8_t>(input[inputIdx])]; nodeIdx = charmap.nodes[nodeIdx].nextIndex(input[inputIdx]);
if (!nodeIdx) { if (!nodeIdx) {
break; break;