diff --git a/include/asm/charmap.h b/include/asm/charmap.h index 18509d85..87052648 100644 --- a/include/asm/charmap.h +++ b/include/asm/charmap.h @@ -13,14 +13,25 @@ #define MAXCHARMAPS 512 #define CHARMAPLENGTH 16 +#define MAXCHARNODES (MAXCHARMAPS * CHARMAPLENGTH + 1) + +/* + * A node for trie structure. + */ +struct Charnode { + uint8_t code; /* the value in a key-value pair. */ + uint8_t isCode; /* has one if it's a code node, not just a bridge node. */ + struct Charnode *next[256]; /* each index representing the next possible character from its current state. */ +}; struct Charmap { - int32_t count; - char input[MAXCHARMAPS][CHARMAPLENGTH + 1]; - char output[MAXCHARMAPS]; + int32_t charCount; /* user-side count. */ + int32_t nodeCount; /* node-side count. */ + struct Charnode nodes[MAXCHARNODES]; /* first node is reserved for the root node in charmap. */ }; int32_t readUTF8Char(char *destination, char *source); + int32_t charmap_Add(char *input, uint8_t output); int32_t charmap_Convert(char **input); diff --git a/src/asm/charmap.c b/src/asm/charmap.c index 6efc2bf5..6f54bc97 100644 --- a/src/asm/charmap.c +++ b/src/asm/charmap.c @@ -42,11 +42,10 @@ int32_t readUTF8Char(char *dest, char *src) int32_t charmap_Add(char *input, uint8_t output) { int32_t i; - size_t input_length; - char temp1i[CHARMAPLENGTH + 1], temp2i[CHARMAPLENGTH + 1]; - char temp1o = 0, temp2o = 0; + uint8_t v; - struct Charmap *charmap; + struct Charmap *charmap; + struct Charnode *curr_node, *temp_node; if (pCurrentSection) { if (pCurrentSection->charmap) { @@ -55,91 +54,109 @@ int32_t charmap_Add(char *input, uint8_t output) charmap = calloc(1, sizeof(struct Charmap)); if (charmap == NULL) fatalerror("Not enough memory for charmap"); - pCurrentSection->charmap = charmap; } } else { charmap = &globalCharmap; } - if (charmap->count > MAXCHARMAPS || strlen(input) > CHARMAPLENGTH) + if (charmap->charCount >= MAXCHARMAPS || strlen(input) > CHARMAPLENGTH) return -1; - input_length = strlen(input); - if (input_length > 1) { - i = 0; - while (i < charmap->count + 1) { - if (input_length > strlen(charmap->input[i])) { - memcpy(temp1i, charmap->input[i], - CHARMAPLENGTH + 1); - memcpy(charmap->input[i], input, input_length); - temp1o = charmap->output[i]; - charmap->output[i] = output; - i++; - break; - } - i++; + curr_node = &charmap->nodes[0]; + + for (i = 0; (v = (uint8_t)input[i]); i++) { + if (curr_node->next[v]) { + curr_node = curr_node->next[v]; + } else { + temp_node = &charmap->nodes[charmap->nodeCount + 1]; + + curr_node->next[v] = temp_node; + curr_node = temp_node; + + ++charmap->nodeCount; } - while (i < charmap->count + 1) { - memcpy(temp2i, charmap->input[i], CHARMAPLENGTH + 1); - memcpy(charmap->input[i], temp1i, CHARMAPLENGTH + 1); - memcpy(temp1i, temp2i, CHARMAPLENGTH + 1); - temp2o = charmap->output[i]; - charmap->output[i] = temp1o; - temp1o = temp2o; - i++; - } - memcpy(charmap->input[charmap->count + 1], temp1i, - CHARMAPLENGTH + 1); - charmap->output[charmap->count + 1] = temp1o; - } else { - memcpy(charmap->input[charmap->count], input, input_length); - charmap->output[charmap->count] = output; } - return ++charmap->count; + + /* prevent duplicated keys by accepting only first key-value pair. */ + if (curr_node->isCode) + return charmap->charCount; + + curr_node->code = output; + curr_node->isCode = 1; + + return ++charmap->charCount; } int32_t charmap_Convert(char **input) { - struct Charmap *charmap; + struct Charmap *charmap; + struct Charnode *charnode; - char outchar[CHARMAPLENGTH + 1]; - char *buffer; - int32_t i, j, length; + char *output; + char outchar[8]; + + int32_t i, match, length; + uint8_t v, foundCode; if (pCurrentSection && pCurrentSection->charmap) charmap = pCurrentSection->charmap; else charmap = &globalCharmap; - buffer = malloc(strlen(*input)); - if (buffer == NULL) + output = malloc(strlen(*input)); + if (output == NULL) fatalerror("Not enough memory for buffer"); length = 0; + while (**input) { - j = 0; - for (i = 0; i < charmap->count; i++) { - j = strlen(charmap->input[i]); - if (memcmp(*input, charmap->input[i], j) == 0) { - outchar[0] = charmap->output[i]; - outchar[1] = 0; + charnode = &charmap->nodes[0]; + + /* + * find the longest valid match which has been registered in charmap. + * note that there could be either multiple matches or no match. + * and it possibly takes the longest match between them, + * which means that it ignores partial matches shorter than the longest one. + */ + for (i = match = 0; (v = (*input)[i]);) { + if (!charnode->next[v]) break; + + charnode = charnode->next[v]; + i++; + + if (charnode->isCode) { + match = i; + foundCode = charnode->code; } - j = 0; } - if (!j) - j = readUTF8Char(outchar, *input); + if (match) { + output[length] = foundCode; - if (!outchar[0]) { - buffer[length++] = 0; + length += 1; } else { - for (i = 0; outchar[i]; i++) - buffer[length++] = outchar[i]; + /* + * put a utf-8 character + * if failed to find a match. + */ + match = readUTF8Char(outchar, *input); + + if (match) { + memcpy(output + length, *input, match); + } else { + output[length] = 0; + match = 1; + } + + length += match; } - *input += j; + + *input += match; } - *input = buffer; + + *input = output; + return length; }