Use automatic allocation for charmaps

This commit is contained in:
Rangi42
2024-02-22 19:50:28 -05:00
committed by Sylvie
parent 1b8e588961
commit a4ed7e1d18
3 changed files with 93 additions and 135 deletions

View File

@@ -7,8 +7,7 @@
#define DEFAULT_CHARMAP_NAME "main" #define DEFAULT_CHARMAP_NAME "main"
struct Charmap *charmap_New(char const *name, char const *baseName); void charmap_New(char const *name, char const *baseName);
void charmap_Cleanup(void);
void charmap_Set(char const *name); void charmap_Set(char const *name);
void charmap_Push(void); void charmap_Push(void);
void charmap_Pop(void); void charmap_Pop(void);

View File

@@ -21,7 +21,7 @@
// Charmaps are stored using a structure known as "trie". // Charmaps are stored using a structure known as "trie".
// Essentially a tree, where each nodes stores a single character's worth of info: // Essentially a tree, where each nodes stores a single character's worth of info:
// whether there exists a mapping that ends at the current character, // whether there exists a mapping that ends at the current character,
struct Charnode { struct CharmapNode {
bool isTerminal; // Whether there exists a mapping that ends here bool isTerminal; // Whether there exists a mapping that ends here
uint8_t value; // If the above is true, its corresponding value uint8_t value; // If the above is true, its corresponding value
// This MUST be indexes and not pointers, because pointers get invalidated by `realloc`! // This MUST be indexes and not pointers, because pointers get invalidated by `realloc`!
@@ -29,78 +29,43 @@ struct Charnode {
}; };
struct Charmap { struct Charmap {
char *name; std::string name;
std::vector<struct Charnode> *nodes; // first node is reserved for the root node std::vector<struct CharmapNode> nodes; // first node is reserved for the root node
}; };
static std::map<std::string, struct Charmap *> charmaps; static std::map<std::string, struct Charmap> charmaps;
// Store pointers to `charmaps` values, so that there is only one pointer to the memory block static struct Charmap *currentCharmap;
// that gets reallocated. std::stack<struct Charmap *> charmapStack;
static struct Charmap **currentCharmap;
std::stack<struct Charmap **> charmapStack; void charmap_New(char const *name, char const *baseName)
static struct Charmap *charmap_Get(char const *name)
{
auto search = charmaps.find(name);
return search != charmaps.end() ? search->second : NULL;
}
static void initNode(struct Charnode *node)
{
node->isTerminal = false;
memset(node->next, 0, sizeof(node->next));
}
struct Charmap *charmap_New(char const *name, char const *baseName)
{ {
struct Charmap *base = NULL; struct Charmap *base = NULL;
if (baseName != NULL) { if (baseName != NULL) {
base = charmap_Get(baseName); auto search = charmaps.find(baseName);
if (base == NULL) if (search == charmaps.end())
error("Base charmap '%s' doesn't exist\n", baseName); error("Base charmap '%s' doesn't exist\n", baseName);
else
base = &search->second;
} }
struct Charmap *charmap = charmap_Get(name); if (charmaps.find(name) != charmaps.end()) {
if (charmap) {
error("Charmap '%s' already exists\n", name); error("Charmap '%s' already exists\n", name);
return charmap; return;
} }
// Init the new charmap's fields // Init the new charmap's fields
charmap = (struct Charmap *)malloc(sizeof(*charmap)); struct Charmap &charmap = charmaps[name];
if (charmap)
charmap->nodes = new(std::nothrow) std::vector<struct Charnode>();
if (!charmap || !charmap->nodes)
fatalerror("Failed to create charmap: %s\n", strerror(errno));
if (base) { if (base)
*charmap->nodes = *base->nodes; // Copies `base->nodes` charmap.nodes = base->nodes; // Copies `base->nodes`
} else { else
charmap->nodes->emplace_back(); charmap.nodes.emplace_back(); // Zero-init the root node
initNode(&charmap->nodes->back()); // Init the root node charmap.name = name;
}
charmap->name = strdup(name);
charmaps[charmap->name] = charmap; currentCharmap = &charmap;
currentCharmap = &charmaps[charmap->name];
return charmap;
}
void charmap_Cleanup(void)
{
for (auto &it : charmaps) {
struct Charmap *charmap = it.second;
free(charmap->name);
free(charmap);
}
charmaps.clear();
} }
void charmap_Set(char const *name) void charmap_Set(char const *name)
@@ -131,45 +96,47 @@ void charmap_Pop(void)
void charmap_Add(char *mapping, uint8_t value) void charmap_Add(char *mapping, uint8_t value)
{ {
struct Charmap *charmap = *currentCharmap; struct Charmap &charmap = *currentCharmap;
struct Charnode *node = &charmap->nodes->front(); size_t nodeIdx = 0;
for (uint8_t c; *mapping; mapping++) { for (; *mapping; mapping++) {
c = *mapping - 1; size_t &nextIdxRef = charmap.nodes[nodeIdx].next[(uint8_t)*mapping - 1];
size_t nextIdx = nextIdxRef;
if (node->next[c]) { if (!nextIdx) {
node = &(*charmap->nodes)[node->next[c]]; // Switch to and zero-init the new node
} else { nextIdxRef = charmap.nodes.size();
// Register next available node nextIdx = nextIdxRef;
node->next[c] = charmap->nodes->size(); // This may reallocate `charmap.nodes` and invalidate `nextIdxRef`,
// which is why we keep the actual value in `nextIdx`
// Switch to and init new node charmap.nodes.emplace_back();
node = &charmap->nodes->emplace_back();
initNode(node);
}
} }
if (node->isTerminal) nodeIdx = nextIdx;
}
struct CharmapNode &node = charmap.nodes[nodeIdx];
if (node.isTerminal)
warning(WARNING_CHARMAP_REDEF, "Overriding charmap mapping\n"); warning(WARNING_CHARMAP_REDEF, "Overriding charmap mapping\n");
node->isTerminal = true; node.isTerminal = true;
node->value = value; node.value = value;
} }
bool charmap_HasChar(char const *input) bool charmap_HasChar(char const *input)
{ {
struct Charmap const *charmap = *currentCharmap; struct Charmap const &charmap = *currentCharmap;
struct Charnode const *node = &charmap->nodes->front(); size_t nodeIdx = 0;
for (; *input; input++) { for (; *input; input++) {
size_t next = node->next[(uint8_t)*input - 1]; nodeIdx = charmap.nodes[nodeIdx].next[(uint8_t)*input - 1];
if (!next) if (!nodeIdx)
return false; return false;
node = &(*charmap->nodes)[next];
} }
return node->isTerminal; return charmap.nodes[nodeIdx].isTerminal;
} }
size_t charmap_Convert(char const *input, uint8_t *output) size_t charmap_Convert(char const *input, uint8_t *output)
@@ -188,35 +155,33 @@ size_t charmap_ConvertNext(char const **input, uint8_t **output)
// For that, advance through the trie with each character read. // For that, advance through the trie with each character read.
// If that would lead to a dead end, rewind characters until the last match, and output. // If that would lead to a dead end, rewind characters until the last match, and output.
// If no match, read a UTF-8 codepoint and output that. // If no match, read a UTF-8 codepoint and output that.
struct Charmap const *charmap = *currentCharmap; struct Charmap const &charmap = *currentCharmap;
struct Charnode const *node = &charmap->nodes->front(); size_t matchIdx = 0;
struct Charnode const *match = NULL;
size_t rewindDistance = 0; size_t rewindDistance = 0;
for (;;) { for (size_t nodeIdx = 0; **input;) {
uint8_t c = (uint8_t)**input - 1; nodeIdx = charmap.nodes[nodeIdx].next[(uint8_t)**input - 1];
if (**input && node->next[c]) { if (!nodeIdx)
// Consume that char break;
(*input)++;
rewindDistance++;
// Advance to next node (index starts at 1) (*input)++; // Consume that char
node = &(*charmap->nodes)[node->next[c]];
if (node->isTerminal) { if (charmap.nodes[nodeIdx].isTerminal) {
// This node matches, register it matchIdx = nodeIdx; // This node matches, register it
match = node;
rewindDistance = 0; // If no longer match is found, rewind here rewindDistance = 0; // If no longer match is found, rewind here
} else {
rewindDistance++;
}
} }
} else { // We are at a dead end (either because we reached the end of input, or of the trie),
// We are at a dead end (either because we reached the end of input, or of // so rewind up to the last match, and output.
// the trie), so rewind up to the last match, and output.
*input -= rewindDistance; // This will rewind all the way if no match found *input -= rewindDistance; // This will rewind all the way if no match found
if (match) { // A match was found, use it if (matchIdx) { // A match was found, use it
if (output) if (output)
*(*output)++ = match->value; *(*output)++ = charmap.nodes[matchIdx].value;
return 1; return 1;
@@ -234,13 +199,12 @@ size_t charmap_ConvertNext(char const **input, uint8_t **output)
*output += codepointLen; *output += codepointLen;
// Warn if this character is not mapped but any others are // Warn if this character is not mapped but any others are
if (charmap->nodes->size() > 1) if (charmap.nodes.size() > 1)
warning(WARNING_UNMAPPED_CHAR_1, warning(WARNING_UNMAPPED_CHAR_1, "Unmapped character %s\n",
"Unmapped character %s\n", printChar(firstChar)); printChar(firstChar));
else if (strcmp(charmap->name, DEFAULT_CHARMAP_NAME)) else if (charmap.name != DEFAULT_CHARMAP_NAME)
warning(WARNING_UNMAPPED_CHAR_2, warning(WARNING_UNMAPPED_CHAR_2, "Unmapped character %s not in "
"Unmapped character %s not in " DEFAULT_CHARMAP_NAME DEFAULT_CHARMAP_NAME " charmap\n", printChar(firstChar));
" charmap\n", printChar(firstChar));
return codepointLen; return codepointLen;
@@ -248,5 +212,3 @@ size_t charmap_ConvertNext(char const **input, uint8_t **output)
return 0; return 0;
} }
} }
}
}

View File

@@ -412,9 +412,6 @@ int main(int argc, char *argv[])
if (yyparse() != 0 && nbErrors == 0) if (yyparse() != 0 && nbErrors == 0)
nbErrors = 1; nbErrors = 1;
// Free all charmaps (they're not needed after parsing)
charmap_Cleanup();
if (dependfile) if (dependfile)
fclose(dependfile); fclose(dependfile);
free(targetFileName); free(targetFileName);