mirror of
https://github.com/gbdev/rgbds.git
synced 2025-11-20 18:22:07 +00:00
Improve charmap structure with trie
Charmap's previous structure was using brute-force comparison for converting the strings in source files. It always compared given strings to all of the strings in charmap, which was very costly in huge projects. For its improvement, I changed its structure into trie, which is being used in many string-processing areas. It's now much faster than before.
This commit is contained in:
@@ -13,14 +13,25 @@
|
|||||||
|
|
||||||
#define MAXCHARMAPS 512
|
#define MAXCHARMAPS 512
|
||||||
#define CHARMAPLENGTH 16
|
#define CHARMAPLENGTH 16
|
||||||
|
#define MAXCHARNODES (MAXCHARMAPS * CHARMAPLENGTH + 1)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A node for trie structure.
|
||||||
|
*/
|
||||||
|
struct Charnode {
|
||||||
|
uint8_t code; /* the value in a key-value pair. */
|
||||||
|
uint8_t isCode; /* has one if it's a code node, not just a bridge node. */
|
||||||
|
struct Charnode *next[256]; /* each index representing the next possible character from its current state. */
|
||||||
|
};
|
||||||
|
|
||||||
struct Charmap {
|
struct Charmap {
|
||||||
int32_t count;
|
int32_t charCount; /* user-side count. */
|
||||||
char input[MAXCHARMAPS][CHARMAPLENGTH + 1];
|
int32_t nodeCount; /* node-side count. */
|
||||||
char output[MAXCHARMAPS];
|
struct Charnode nodes[MAXCHARNODES]; /* first node is reserved for the root node in charmap. */
|
||||||
};
|
};
|
||||||
|
|
||||||
int32_t readUTF8Char(char *destination, char *source);
|
int32_t readUTF8Char(char *destination, char *source);
|
||||||
|
|
||||||
int32_t charmap_Add(char *input, uint8_t output);
|
int32_t charmap_Add(char *input, uint8_t output);
|
||||||
int32_t charmap_Convert(char **input);
|
int32_t charmap_Convert(char **input);
|
||||||
|
|
||||||
|
|||||||
@@ -42,11 +42,10 @@ int32_t readUTF8Char(char *dest, char *src)
|
|||||||
int32_t charmap_Add(char *input, uint8_t output)
|
int32_t charmap_Add(char *input, uint8_t output)
|
||||||
{
|
{
|
||||||
int32_t i;
|
int32_t i;
|
||||||
size_t input_length;
|
uint8_t v;
|
||||||
char temp1i[CHARMAPLENGTH + 1], temp2i[CHARMAPLENGTH + 1];
|
|
||||||
char temp1o = 0, temp2o = 0;
|
|
||||||
|
|
||||||
struct Charmap *charmap;
|
struct Charmap *charmap;
|
||||||
|
struct Charnode *curr_node, *temp_node;
|
||||||
|
|
||||||
if (pCurrentSection) {
|
if (pCurrentSection) {
|
||||||
if (pCurrentSection->charmap) {
|
if (pCurrentSection->charmap) {
|
||||||
@@ -55,91 +54,109 @@ int32_t charmap_Add(char *input, uint8_t output)
|
|||||||
charmap = calloc(1, sizeof(struct Charmap));
|
charmap = calloc(1, sizeof(struct Charmap));
|
||||||
if (charmap == NULL)
|
if (charmap == NULL)
|
||||||
fatalerror("Not enough memory for charmap");
|
fatalerror("Not enough memory for charmap");
|
||||||
|
|
||||||
pCurrentSection->charmap = charmap;
|
pCurrentSection->charmap = charmap;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
charmap = &globalCharmap;
|
charmap = &globalCharmap;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (charmap->count > MAXCHARMAPS || strlen(input) > CHARMAPLENGTH)
|
if (charmap->charCount >= MAXCHARMAPS || strlen(input) > CHARMAPLENGTH)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
input_length = strlen(input);
|
curr_node = &charmap->nodes[0];
|
||||||
if (input_length > 1) {
|
|
||||||
i = 0;
|
for (i = 0; (v = (uint8_t)input[i]); i++) {
|
||||||
while (i < charmap->count + 1) {
|
if (curr_node->next[v]) {
|
||||||
if (input_length > strlen(charmap->input[i])) {
|
curr_node = curr_node->next[v];
|
||||||
memcpy(temp1i, charmap->input[i],
|
|
||||||
CHARMAPLENGTH + 1);
|
|
||||||
memcpy(charmap->input[i], input, input_length);
|
|
||||||
temp1o = charmap->output[i];
|
|
||||||
charmap->output[i] = output;
|
|
||||||
i++;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
while (i < charmap->count + 1) {
|
|
||||||
memcpy(temp2i, charmap->input[i], CHARMAPLENGTH + 1);
|
|
||||||
memcpy(charmap->input[i], temp1i, CHARMAPLENGTH + 1);
|
|
||||||
memcpy(temp1i, temp2i, CHARMAPLENGTH + 1);
|
|
||||||
temp2o = charmap->output[i];
|
|
||||||
charmap->output[i] = temp1o;
|
|
||||||
temp1o = temp2o;
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
memcpy(charmap->input[charmap->count + 1], temp1i,
|
|
||||||
CHARMAPLENGTH + 1);
|
|
||||||
charmap->output[charmap->count + 1] = temp1o;
|
|
||||||
} else {
|
} else {
|
||||||
memcpy(charmap->input[charmap->count], input, input_length);
|
temp_node = &charmap->nodes[charmap->nodeCount + 1];
|
||||||
charmap->output[charmap->count] = output;
|
|
||||||
|
curr_node->next[v] = temp_node;
|
||||||
|
curr_node = temp_node;
|
||||||
|
|
||||||
|
++charmap->nodeCount;
|
||||||
}
|
}
|
||||||
return ++charmap->count;
|
}
|
||||||
|
|
||||||
|
/* prevent duplicated keys by accepting only first key-value pair. */
|
||||||
|
if (curr_node->isCode)
|
||||||
|
return charmap->charCount;
|
||||||
|
|
||||||
|
curr_node->code = output;
|
||||||
|
curr_node->isCode = 1;
|
||||||
|
|
||||||
|
return ++charmap->charCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t charmap_Convert(char **input)
|
int32_t charmap_Convert(char **input)
|
||||||
{
|
{
|
||||||
struct Charmap *charmap;
|
struct Charmap *charmap;
|
||||||
|
struct Charnode *charnode;
|
||||||
|
|
||||||
char outchar[CHARMAPLENGTH + 1];
|
char *output;
|
||||||
char *buffer;
|
char outchar[8];
|
||||||
int32_t i, j, length;
|
|
||||||
|
int32_t i, match, length;
|
||||||
|
uint8_t v, foundCode;
|
||||||
|
|
||||||
if (pCurrentSection && pCurrentSection->charmap)
|
if (pCurrentSection && pCurrentSection->charmap)
|
||||||
charmap = pCurrentSection->charmap;
|
charmap = pCurrentSection->charmap;
|
||||||
else
|
else
|
||||||
charmap = &globalCharmap;
|
charmap = &globalCharmap;
|
||||||
|
|
||||||
buffer = malloc(strlen(*input));
|
output = malloc(strlen(*input));
|
||||||
if (buffer == NULL)
|
if (output == NULL)
|
||||||
fatalerror("Not enough memory for buffer");
|
fatalerror("Not enough memory for buffer");
|
||||||
|
|
||||||
length = 0;
|
length = 0;
|
||||||
|
|
||||||
while (**input) {
|
while (**input) {
|
||||||
j = 0;
|
charnode = &charmap->nodes[0];
|
||||||
for (i = 0; i < charmap->count; i++) {
|
|
||||||
j = strlen(charmap->input[i]);
|
/*
|
||||||
if (memcmp(*input, charmap->input[i], j) == 0) {
|
* find the longest valid match which has been registered in charmap.
|
||||||
outchar[0] = charmap->output[i];
|
* note that there could be either multiple matches or no match.
|
||||||
outchar[1] = 0;
|
* and it possibly takes the longest match between them,
|
||||||
|
* which means that it ignores partial matches shorter than the longest one.
|
||||||
|
*/
|
||||||
|
for (i = match = 0; (v = (*input)[i]);) {
|
||||||
|
if (!charnode->next[v])
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
charnode = charnode->next[v];
|
||||||
|
i++;
|
||||||
|
|
||||||
|
if (charnode->isCode) {
|
||||||
|
match = i;
|
||||||
|
foundCode = charnode->code;
|
||||||
}
|
}
|
||||||
j = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!j)
|
if (match) {
|
||||||
j = readUTF8Char(outchar, *input);
|
output[length] = foundCode;
|
||||||
|
|
||||||
if (!outchar[0]) {
|
length += 1;
|
||||||
buffer[length++] = 0;
|
|
||||||
} else {
|
} else {
|
||||||
for (i = 0; outchar[i]; i++)
|
/*
|
||||||
buffer[length++] = outchar[i];
|
* put a utf-8 character
|
||||||
|
* if failed to find a match.
|
||||||
|
*/
|
||||||
|
match = readUTF8Char(outchar, *input);
|
||||||
|
|
||||||
|
if (match) {
|
||||||
|
memcpy(output + length, *input, match);
|
||||||
|
} else {
|
||||||
|
output[length] = 0;
|
||||||
|
match = 1;
|
||||||
}
|
}
|
||||||
*input += j;
|
|
||||||
|
length += match;
|
||||||
}
|
}
|
||||||
*input = buffer;
|
|
||||||
|
*input += match;
|
||||||
|
}
|
||||||
|
|
||||||
|
*input = output;
|
||||||
|
|
||||||
return length;
|
return length;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user