mirror of
https://github.com/gbdev/rgbds.git
synced 2025-11-21 02:32:06 +00:00
Merge pull request #360 from jidoc01/master
Improve charmap structure with trie.
This commit is contained in:
@@ -13,14 +13,25 @@
|
|||||||
|
|
||||||
#define MAXCHARMAPS 512
|
#define MAXCHARMAPS 512
|
||||||
#define CHARMAPLENGTH 16
|
#define CHARMAPLENGTH 16
|
||||||
|
#define MAXCHARNODES (MAXCHARMAPS * CHARMAPLENGTH + 1)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A node for trie structure.
|
||||||
|
*/
|
||||||
|
struct Charnode {
|
||||||
|
uint8_t code; /* the value in a key-value pair. */
|
||||||
|
uint8_t isCode; /* has one if it's a code node, not just a bridge node. */
|
||||||
|
struct Charnode *next[256]; /* each index representing the next possible character from its current state. */
|
||||||
|
};
|
||||||
|
|
||||||
struct Charmap {
|
struct Charmap {
|
||||||
int32_t count;
|
int32_t charCount; /* user-side count. */
|
||||||
char input[MAXCHARMAPS][CHARMAPLENGTH + 1];
|
int32_t nodeCount; /* node-side count. */
|
||||||
char output[MAXCHARMAPS];
|
struct Charnode nodes[MAXCHARNODES]; /* first node is reserved for the root node in charmap. */
|
||||||
};
|
};
|
||||||
|
|
||||||
int32_t readUTF8Char(char *destination, char *source);
|
int32_t readUTF8Char(char *destination, char *source);
|
||||||
|
|
||||||
int32_t charmap_Add(char *input, uint8_t output);
|
int32_t charmap_Add(char *input, uint8_t output);
|
||||||
int32_t charmap_Convert(char **input);
|
int32_t charmap_Convert(char **input);
|
||||||
|
|
||||||
|
|||||||
@@ -42,11 +42,10 @@ int32_t readUTF8Char(char *dest, char *src)
|
|||||||
int32_t charmap_Add(char *input, uint8_t output)
|
int32_t charmap_Add(char *input, uint8_t output)
|
||||||
{
|
{
|
||||||
int32_t i;
|
int32_t i;
|
||||||
size_t input_length;
|
uint8_t v;
|
||||||
char temp1i[CHARMAPLENGTH + 1], temp2i[CHARMAPLENGTH + 1];
|
|
||||||
char temp1o = 0, temp2o = 0;
|
|
||||||
|
|
||||||
struct Charmap *charmap;
|
struct Charmap *charmap;
|
||||||
|
struct Charnode *curr_node, *temp_node;
|
||||||
|
|
||||||
if (pCurrentSection) {
|
if (pCurrentSection) {
|
||||||
if (pCurrentSection->charmap) {
|
if (pCurrentSection->charmap) {
|
||||||
@@ -55,91 +54,109 @@ int32_t charmap_Add(char *input, uint8_t output)
|
|||||||
charmap = calloc(1, sizeof(struct Charmap));
|
charmap = calloc(1, sizeof(struct Charmap));
|
||||||
if (charmap == NULL)
|
if (charmap == NULL)
|
||||||
fatalerror("Not enough memory for charmap");
|
fatalerror("Not enough memory for charmap");
|
||||||
|
|
||||||
pCurrentSection->charmap = charmap;
|
pCurrentSection->charmap = charmap;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
charmap = &globalCharmap;
|
charmap = &globalCharmap;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (charmap->count > MAXCHARMAPS || strlen(input) > CHARMAPLENGTH)
|
if (charmap->charCount >= MAXCHARMAPS || strlen(input) > CHARMAPLENGTH)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
input_length = strlen(input);
|
curr_node = &charmap->nodes[0];
|
||||||
if (input_length > 1) {
|
|
||||||
i = 0;
|
for (i = 0; (v = (uint8_t)input[i]); i++) {
|
||||||
while (i < charmap->count + 1) {
|
if (curr_node->next[v]) {
|
||||||
if (input_length > strlen(charmap->input[i])) {
|
curr_node = curr_node->next[v];
|
||||||
memcpy(temp1i, charmap->input[i],
|
|
||||||
CHARMAPLENGTH + 1);
|
|
||||||
memcpy(charmap->input[i], input, input_length);
|
|
||||||
temp1o = charmap->output[i];
|
|
||||||
charmap->output[i] = output;
|
|
||||||
i++;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
while (i < charmap->count + 1) {
|
|
||||||
memcpy(temp2i, charmap->input[i], CHARMAPLENGTH + 1);
|
|
||||||
memcpy(charmap->input[i], temp1i, CHARMAPLENGTH + 1);
|
|
||||||
memcpy(temp1i, temp2i, CHARMAPLENGTH + 1);
|
|
||||||
temp2o = charmap->output[i];
|
|
||||||
charmap->output[i] = temp1o;
|
|
||||||
temp1o = temp2o;
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
memcpy(charmap->input[charmap->count + 1], temp1i,
|
|
||||||
CHARMAPLENGTH + 1);
|
|
||||||
charmap->output[charmap->count + 1] = temp1o;
|
|
||||||
} else {
|
} else {
|
||||||
memcpy(charmap->input[charmap->count], input, input_length);
|
temp_node = &charmap->nodes[charmap->nodeCount + 1];
|
||||||
charmap->output[charmap->count] = output;
|
|
||||||
|
curr_node->next[v] = temp_node;
|
||||||
|
curr_node = temp_node;
|
||||||
|
|
||||||
|
++charmap->nodeCount;
|
||||||
}
|
}
|
||||||
return ++charmap->count;
|
}
|
||||||
|
|
||||||
|
/* prevent duplicated keys by accepting only first key-value pair. */
|
||||||
|
if (curr_node->isCode)
|
||||||
|
return charmap->charCount;
|
||||||
|
|
||||||
|
curr_node->code = output;
|
||||||
|
curr_node->isCode = 1;
|
||||||
|
|
||||||
|
return ++charmap->charCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t charmap_Convert(char **input)
|
int32_t charmap_Convert(char **input)
|
||||||
{
|
{
|
||||||
struct Charmap *charmap;
|
struct Charmap *charmap;
|
||||||
|
struct Charnode *charnode;
|
||||||
|
|
||||||
char outchar[CHARMAPLENGTH + 1];
|
char *output;
|
||||||
char *buffer;
|
char outchar[8];
|
||||||
int32_t i, j, length;
|
|
||||||
|
int32_t i, match, length;
|
||||||
|
uint8_t v, foundCode;
|
||||||
|
|
||||||
if (pCurrentSection && pCurrentSection->charmap)
|
if (pCurrentSection && pCurrentSection->charmap)
|
||||||
charmap = pCurrentSection->charmap;
|
charmap = pCurrentSection->charmap;
|
||||||
else
|
else
|
||||||
charmap = &globalCharmap;
|
charmap = &globalCharmap;
|
||||||
|
|
||||||
buffer = malloc(strlen(*input));
|
output = malloc(strlen(*input));
|
||||||
if (buffer == NULL)
|
if (output == NULL)
|
||||||
fatalerror("Not enough memory for buffer");
|
fatalerror("Not enough memory for buffer");
|
||||||
|
|
||||||
length = 0;
|
length = 0;
|
||||||
|
|
||||||
while (**input) {
|
while (**input) {
|
||||||
j = 0;
|
charnode = &charmap->nodes[0];
|
||||||
for (i = 0; i < charmap->count; i++) {
|
|
||||||
j = strlen(charmap->input[i]);
|
/*
|
||||||
if (memcmp(*input, charmap->input[i], j) == 0) {
|
* find the longest valid match which has been registered in charmap.
|
||||||
outchar[0] = charmap->output[i];
|
* note that there could be either multiple matches or no match.
|
||||||
outchar[1] = 0;
|
* and it possibly takes the longest match between them,
|
||||||
|
* which means that it ignores partial matches shorter than the longest one.
|
||||||
|
*/
|
||||||
|
for (i = match = 0; (v = (*input)[i]);) {
|
||||||
|
if (!charnode->next[v])
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
charnode = charnode->next[v];
|
||||||
|
i++;
|
||||||
|
|
||||||
|
if (charnode->isCode) {
|
||||||
|
match = i;
|
||||||
|
foundCode = charnode->code;
|
||||||
}
|
}
|
||||||
j = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!j)
|
if (match) {
|
||||||
j = readUTF8Char(outchar, *input);
|
output[length] = foundCode;
|
||||||
|
|
||||||
if (!outchar[0]) {
|
length += 1;
|
||||||
buffer[length++] = 0;
|
|
||||||
} else {
|
} else {
|
||||||
for (i = 0; outchar[i]; i++)
|
/*
|
||||||
buffer[length++] = outchar[i];
|
* put a utf-8 character
|
||||||
|
* if failed to find a match.
|
||||||
|
*/
|
||||||
|
match = readUTF8Char(outchar, *input);
|
||||||
|
|
||||||
|
if (match) {
|
||||||
|
memcpy(output + length, *input, match);
|
||||||
|
} else {
|
||||||
|
output[length] = 0;
|
||||||
|
match = 1;
|
||||||
}
|
}
|
||||||
*input += j;
|
|
||||||
|
length += match;
|
||||||
}
|
}
|
||||||
*input = buffer;
|
|
||||||
|
*input += match;
|
||||||
|
}
|
||||||
|
|
||||||
|
*input = output;
|
||||||
|
|
||||||
return length;
|
return length;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user