mirror of
https://github.com/gbdev/rgbds.git
synced 2025-11-20 18:22:07 +00:00
Rewrite charmap system
Avoid allocating a *ton* of data per charmap Stop relying on uninitialized data in charmap nodes Only initialize charmap nodes lazily
This commit is contained in:
@@ -11,38 +11,12 @@
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "asm/symbol.h"
|
||||
|
||||
#define MAXCHARMAPS 512
|
||||
#define CHARMAPLENGTH 16
|
||||
#define MAXCHARNODES (MAXCHARMAPS * CHARMAPLENGTH + 1)
|
||||
|
||||
/*
|
||||
* A node for trie structure.
|
||||
*/
|
||||
struct Charnode {
|
||||
uint8_t code; /* the value in a key-value pair. */
|
||||
uint8_t isCode; /* has 1 if it's a code node, not just a bridge node. */
|
||||
struct Charnode *next[256]; /* each index representing the next possible
|
||||
* character from its current state.
|
||||
*/
|
||||
};
|
||||
|
||||
struct Charmap {
|
||||
char name[MAXSYMLEN + 1];
|
||||
int32_t charCount; /* user-side count. */
|
||||
int32_t nodeCount; /* node-side count. */
|
||||
struct Charnode nodes[MAXCHARNODES]; /* first node is reserved for the
|
||||
* root node in charmap.
|
||||
*/
|
||||
};
|
||||
|
||||
void charmap_InitMain(void);
|
||||
struct Charmap *charmap_New(const char *name, const char *baseName);
|
||||
void charmap_Delete(struct Charmap *charmap);
|
||||
void charmap_Set(const char *name);
|
||||
void charmap_Push(void);
|
||||
void charmap_Pop(void);
|
||||
int32_t charmap_Add(char *input, uint8_t output);
|
||||
int32_t charmap_Convert(char **input);
|
||||
void charmap_Add(char *mapping, uint8_t value);
|
||||
size_t charmap_Convert(char const *input, uint8_t *output);
|
||||
|
||||
#endif /* RGBDS_ASM_CHARMAP_H */
|
||||
|
||||
@@ -12,6 +12,6 @@
|
||||
#include <stdint.h>
|
||||
|
||||
uint32_t calchash(const char *s);
|
||||
int32_t readUTF8Char(char *dest, char *src);
|
||||
size_t readUTF8Char(uint8_t *dest, char const *src);
|
||||
|
||||
#endif /* RGBDS_UTIL_H */
|
||||
|
||||
@@ -16,9 +16,9 @@ extern unsigned int nbErrors;
|
||||
enum WarningID {
|
||||
WARNING_ASSERT, /* Assertions */
|
||||
WARNING_BUILTIN_ARG, /* Invalid args to builtins */
|
||||
WARNING_CHARMAP_REDEF, /* Charmap entry re-definition */
|
||||
WARNING_DIV, /* Division undefined behavior */
|
||||
WARNING_EMPTY_DATA_DIRECTIVE,
|
||||
/* `db`, `dw` or `dl` with no directive in ROM */
|
||||
WARNING_EMPTY_DATA_DIRECTIVE, /* `db`, `dw` or `dl` with no directive in ROM */
|
||||
WARNING_EMPTY_ENTRY, /* Empty entry in `db`, `dw` or `dl` */
|
||||
WARNING_LARGE_CONSTANT, /* Constants too large */
|
||||
WARNING_LONG_STR, /* String too long for internal buffers */
|
||||
|
||||
@@ -96,7 +96,7 @@ size_t symvaluetostring(char *dest, size_t maxLength, char *symName,
|
||||
return length;
|
||||
}
|
||||
|
||||
static uint32_t str2int2(char *s, int32_t length)
|
||||
static uint32_t str2int2(uint8_t *s, int32_t length)
|
||||
{
|
||||
int32_t i;
|
||||
uint32_t r = 0;
|
||||
@@ -104,7 +104,7 @@ static uint32_t str2int2(char *s, int32_t length)
|
||||
i = length < 4 ? 0 : length - 4;
|
||||
while (i < length) {
|
||||
r <<= 8;
|
||||
r |= (uint8_t)s[i];
|
||||
r |= s[i];
|
||||
i++;
|
||||
}
|
||||
|
||||
@@ -1023,9 +1023,7 @@ incbin : T_POP_INCBIN string {
|
||||
charmap : T_POP_CHARMAP string ',' const {
|
||||
if ($4 < INT8_MIN || $4 > UINT8_MAX)
|
||||
warning(WARNING_TRUNCATION, "Expression must be 8-bit\n");
|
||||
|
||||
if (charmap_Add($2, (uint8_t)$4) == -1)
|
||||
error("Error adding new charmap mapping: %s\n", strerror(errno));
|
||||
charmap_Add($2, (uint8_t)$4);
|
||||
}
|
||||
;
|
||||
|
||||
@@ -1132,11 +1130,11 @@ constlist_8bit_entry : /* empty */ {
|
||||
}
|
||||
| reloc_8bit_no_str { out_RelByte(&$1); }
|
||||
| string {
|
||||
char *s = $1;
|
||||
int32_t length = charmap_Convert(&s);
|
||||
uint8_t *output = malloc(strlen($1)); /* Cannot be larger than that */
|
||||
int32_t length = charmap_Convert($1, output);
|
||||
|
||||
out_AbsByteGroup((uint8_t*)s, length);
|
||||
free(s);
|
||||
out_AbsByteGroup(output, length);
|
||||
free(output);
|
||||
}
|
||||
;
|
||||
|
||||
@@ -1189,11 +1187,11 @@ reloc_16bit : relocexpr {
|
||||
|
||||
relocexpr : relocexpr_no_str
|
||||
| string {
|
||||
char *s = $1;
|
||||
int32_t length = charmap_Convert(&s);
|
||||
uint32_t r = str2int2(s, length);
|
||||
uint8_t *output = malloc(strlen($1)); /* Cannot be longer than that */
|
||||
int32_t length = charmap_Convert($1, output);
|
||||
uint32_t r = str2int2(output, length);
|
||||
|
||||
free(s);
|
||||
free(output);
|
||||
rpn_Number(&$$, r);
|
||||
}
|
||||
;
|
||||
|
||||
@@ -22,18 +22,36 @@
|
||||
|
||||
#include "hashmap.h"
|
||||
|
||||
#define CHARMAP_HASH_SIZE (1 << 9)
|
||||
/*
|
||||
* Charmaps are stored using a structure known as "trie".
|
||||
* Essentially a tree, where each nodes stores a single character's worth of info:
|
||||
* whether there exists a mapping that ends at the current character,
|
||||
*/
|
||||
struct Charnode {
|
||||
bool isTerminal; /* Whether there exists a mapping that ends here */
|
||||
uint8_t value; /* If the above is true, its corresponding value */
|
||||
/* This MUST be indexes and not pointers, because pointers get invalidated by `realloc`!! */
|
||||
size_t next[255]; /* Indexes of where to go next, 0 = nowhere */
|
||||
};
|
||||
|
||||
#define INITIAL_CAPACITY 32
|
||||
|
||||
struct Charmap {
|
||||
char *name;
|
||||
size_t usedNodes; /* How many nodes are being used */
|
||||
size_t capacity; /* How many nodes have been allocated */
|
||||
struct Charnode nodes[]; /* first node is reserved for the root node */
|
||||
};
|
||||
|
||||
static HashMap charmaps;
|
||||
|
||||
static struct Charmap *currentCharmap;
|
||||
|
||||
struct CharmapStackEntry {
|
||||
struct Charmap *charmap;
|
||||
struct CharmapStackEntry *next;
|
||||
};
|
||||
|
||||
static HashMap charmaps;
|
||||
|
||||
static struct Charmap *mainCharmap;
|
||||
static struct Charmap *currentCharmap;
|
||||
|
||||
struct CharmapStackEntry *charmapStack;
|
||||
|
||||
static inline struct Charmap *charmap_Get(const char *name)
|
||||
@@ -41,16 +59,21 @@ static inline struct Charmap *charmap_Get(const char *name)
|
||||
return hash_GetElement(charmaps, name);
|
||||
}
|
||||
|
||||
static void CopyNode(struct Charmap *dest,
|
||||
const struct Charmap *src,
|
||||
int nodeIdx)
|
||||
static inline struct Charmap *resizeCharmap(struct Charmap *map, size_t capacity)
|
||||
{
|
||||
dest->nodes[nodeIdx].code = src->nodes[nodeIdx].code;
|
||||
dest->nodes[nodeIdx].isCode = src->nodes[nodeIdx].isCode;
|
||||
for (int i = 0; i < 256; i++)
|
||||
if (src->nodes[nodeIdx].next[i])
|
||||
dest->nodes[nodeIdx].next[i] = dest->nodes +
|
||||
(src->nodes[nodeIdx].next[i] - src->nodes);
|
||||
struct Charmap *new = realloc(map, sizeof(*map) + sizeof(*map->nodes) * capacity);
|
||||
|
||||
if (!new)
|
||||
fatalerror("Failed to %s charmap: %s\n",
|
||||
map ? "create" : "resize", strerror(errno));
|
||||
new->capacity = capacity;
|
||||
return new;
|
||||
}
|
||||
|
||||
static inline void initNode(struct Charnode *node)
|
||||
{
|
||||
node->isTerminal = false;
|
||||
memset(node->next, 0, sizeof(node->next));
|
||||
}
|
||||
|
||||
struct Charmap *charmap_New(const char *name, const char *baseName)
|
||||
@@ -66,28 +89,23 @@ struct Charmap *charmap_New(const char *name, const char *baseName)
|
||||
|
||||
struct Charmap *charmap = charmap_Get(name);
|
||||
|
||||
if (charmap != NULL) {
|
||||
if (charmap) {
|
||||
error("Charmap '%s' already exists\n", name);
|
||||
return NULL;
|
||||
return charmap;
|
||||
}
|
||||
|
||||
charmap = malloc(sizeof(*charmap));
|
||||
if (charmap == NULL)
|
||||
fatalerror("Failed to create charmap: %s\n", strerror(errno));
|
||||
|
||||
/* Init the new charmap's fields */
|
||||
snprintf(charmap->name, sizeof(charmap->name), "%s", name);
|
||||
if (base != NULL) {
|
||||
charmap->charCount = base->charCount;
|
||||
charmap->nodeCount = base->nodeCount;
|
||||
if (base) {
|
||||
charmap = resizeCharmap(NULL, base->capacity);
|
||||
charmap->usedNodes = base->usedNodes;
|
||||
|
||||
for (int i = 0; i < MAXCHARNODES; i++)
|
||||
CopyNode(charmap, base, i);
|
||||
memcpy(charmap->nodes, base->nodes, sizeof(base->nodes[0]) * charmap->usedNodes);
|
||||
} else {
|
||||
charmap->charCount = 0;
|
||||
charmap->nodeCount = 0;
|
||||
memset(charmap->nodes, 0, sizeof(charmap->nodes));
|
||||
charmap = resizeCharmap(NULL, INITIAL_CAPACITY);
|
||||
charmap->usedNodes = 1;
|
||||
initNode(&charmap->nodes[0]); /* Init the root node */
|
||||
}
|
||||
charmap->name = strdup(name);
|
||||
|
||||
hash_AddElement(charmaps, charmap->name, charmap);
|
||||
currentCharmap = charmap;
|
||||
@@ -95,6 +113,12 @@ struct Charmap *charmap_New(const char *name, const char *baseName)
|
||||
return charmap;
|
||||
}
|
||||
|
||||
void charmap_Delete(struct Charmap *charmap)
|
||||
{
|
||||
free(charmap->name);
|
||||
free(charmap);
|
||||
}
|
||||
|
||||
void charmap_Set(const char *name)
|
||||
{
|
||||
struct Charmap *charmap = charmap_Get(name);
|
||||
@@ -109,9 +133,9 @@ void charmap_Push(void)
|
||||
{
|
||||
struct CharmapStackEntry *stackEntry;
|
||||
|
||||
stackEntry = malloc(sizeof(struct CharmapStackEntry));
|
||||
stackEntry = malloc(sizeof(*stackEntry));
|
||||
if (stackEntry == NULL)
|
||||
fatalerror("No memory for charmap stack\n");
|
||||
fatalerror("Failed to alloc charmap stack entry: %s\n", strerror(errno));
|
||||
|
||||
stackEntry->charmap = currentCharmap;
|
||||
stackEntry->next = charmapStack;
|
||||
@@ -121,8 +145,10 @@ void charmap_Push(void)
|
||||
|
||||
void charmap_Pop(void)
|
||||
{
|
||||
if (charmapStack == NULL)
|
||||
fatalerror("No entries in the charmap stack\n");
|
||||
if (charmapStack == NULL) {
|
||||
error("No entries in the charmap stack\n");
|
||||
return;
|
||||
}
|
||||
|
||||
struct CharmapStackEntry *top = charmapStack;
|
||||
|
||||
@@ -131,109 +157,86 @@ void charmap_Pop(void)
|
||||
free(top);
|
||||
}
|
||||
|
||||
void charmap_InitMain(void)
|
||||
void charmap_Add(char *mapping, uint8_t value)
|
||||
{
|
||||
mainCharmap = charmap_New("main", NULL);
|
||||
}
|
||||
struct Charnode *node = ¤tCharmap->nodes[0];
|
||||
|
||||
int32_t charmap_Add(char *input, uint8_t output)
|
||||
{
|
||||
int32_t i;
|
||||
uint8_t v;
|
||||
for (uint8_t c; *mapping; mapping++) {
|
||||
c = *mapping - 1;
|
||||
|
||||
struct Charmap *charmap = currentCharmap;
|
||||
struct Charnode *curr_node, *temp_node;
|
||||
|
||||
curr_node = &charmap->nodes[0];
|
||||
|
||||
for (i = 0; (v = (uint8_t)input[i]); i++) {
|
||||
if (curr_node->next[v]) {
|
||||
curr_node = curr_node->next[v];
|
||||
if (node->next[c]) {
|
||||
node = ¤tCharmap->nodes[node->next[c]];
|
||||
} else {
|
||||
temp_node = &charmap->nodes[charmap->nodeCount + 1];
|
||||
/* Register next available node */
|
||||
node->next[c] = currentCharmap->usedNodes;
|
||||
/* If no more nodes are available, get new ones */
|
||||
if (currentCharmap->usedNodes == currentCharmap->capacity) {
|
||||
currentCharmap->capacity *= 2;
|
||||
currentCharmap = resizeCharmap(currentCharmap, currentCharmap->capacity);
|
||||
}
|
||||
|
||||
curr_node->next[v] = temp_node;
|
||||
curr_node = temp_node;
|
||||
|
||||
++charmap->nodeCount;
|
||||
/* Switch to and init new node */
|
||||
node = ¤tCharmap->nodes[currentCharmap->usedNodes++];
|
||||
initNode(node);
|
||||
}
|
||||
}
|
||||
|
||||
/* prevent duplicated keys by accepting only first key-value pair. */
|
||||
if (curr_node->isCode)
|
||||
return charmap->charCount;
|
||||
if (node->isTerminal)
|
||||
warning(WARNING_CHARMAP_REDEF, "Overriding charmap mapping");
|
||||
|
||||
curr_node->code = output;
|
||||
curr_node->isCode = 1;
|
||||
|
||||
return ++charmap->charCount;
|
||||
node->isTerminal = true;
|
||||
node->value = value;
|
||||
}
|
||||
|
||||
int32_t charmap_Convert(char **input)
|
||||
size_t charmap_Convert(char const *input, uint8_t *output)
|
||||
{
|
||||
struct Charmap *charmap = currentCharmap;
|
||||
struct Charnode *charnode;
|
||||
|
||||
char *output;
|
||||
char outchar[8];
|
||||
|
||||
int32_t i, match, length;
|
||||
uint8_t v, foundCode;
|
||||
|
||||
output = malloc(strlen(*input));
|
||||
if (output == NULL)
|
||||
fatalerror("Not enough memory for charmap conversion buffer: %s\n",
|
||||
strerror(errno));
|
||||
|
||||
length = 0;
|
||||
|
||||
while (**input) {
|
||||
charnode = &charmap->nodes[0];
|
||||
|
||||
/*
|
||||
* Find the longest valid match which has been registered in
|
||||
* charmap, possibly yielding multiple or no matches.
|
||||
* The longest match is taken, meaning partial matches shorter
|
||||
* than the longest one are ignored.
|
||||
* The goal is to match the longest mapping possible.
|
||||
* For that, advance through the trie with each character read.
|
||||
* If that would lead to a dead end, rewind characters until the last match, and output.
|
||||
* If no match, read a UTF-8 codepoint and output that.
|
||||
*/
|
||||
for (i = match = 0; (v = (*input)[i]);) {
|
||||
if (!charnode->next[v])
|
||||
size_t outputLen = 0;
|
||||
struct Charnode const *node = ¤tCharmap->nodes[0];
|
||||
struct Charnode const *match = NULL;
|
||||
size_t rewindDistance = 0;
|
||||
|
||||
for (;;) {
|
||||
/* We still want NULs to reach the `else` path, to give a chance to rewind */
|
||||
uint8_t c = *input - 1;
|
||||
|
||||
if (*input && node->next[c]) {
|
||||
input++; /* Consume that char */
|
||||
rewindDistance++;
|
||||
|
||||
node = ¤tCharmap->nodes[node->next[c]];
|
||||
if (node->isTerminal) {
|
||||
match = node;
|
||||
rewindDistance = 0; /* Rewind from after the match */
|
||||
}
|
||||
|
||||
} else {
|
||||
input -= rewindDistance; /* Rewind */
|
||||
rewindDistance = 0;
|
||||
node = ¤tCharmap->nodes[0];
|
||||
|
||||
if (match) { /* Arrived at a dead end with a match found */
|
||||
*output++ = match->value;
|
||||
outputLen++;
|
||||
match = NULL; /* Reset match for next round */
|
||||
|
||||
} else if (*input) { /* No match found */
|
||||
size_t codepointLen = readUTF8Char(output, input);
|
||||
|
||||
input += codepointLen; /* OK because UTF-8 has no NUL in multi-byte chars */
|
||||
output += codepointLen;
|
||||
outputLen += codepointLen;
|
||||
}
|
||||
|
||||
if (!*input)
|
||||
break;
|
||||
|
||||
charnode = charnode->next[v];
|
||||
i++;
|
||||
|
||||
if (charnode->isCode) {
|
||||
match = i;
|
||||
foundCode = charnode->code;
|
||||
}
|
||||
}
|
||||
|
||||
if (match) {
|
||||
output[length] = foundCode;
|
||||
|
||||
length++;
|
||||
} else {
|
||||
/*
|
||||
* put a utf-8 character
|
||||
* if failed to find a match.
|
||||
*/
|
||||
match = readUTF8Char(outchar, *input);
|
||||
|
||||
if (match) {
|
||||
memcpy(output + length, *input, match);
|
||||
} else {
|
||||
output[length] = 0;
|
||||
match = 1;
|
||||
}
|
||||
|
||||
length += match;
|
||||
}
|
||||
|
||||
*input += match;
|
||||
}
|
||||
|
||||
*input = output;
|
||||
|
||||
return length;
|
||||
return outputLen;
|
||||
}
|
||||
|
||||
@@ -538,7 +538,7 @@ int main(int argc, char *argv[])
|
||||
sym_SetExportAll(exportall);
|
||||
fstk_Init(tzMainfile);
|
||||
opt_ParseDefines();
|
||||
charmap_InitMain();
|
||||
charmap_New("main", NULL);
|
||||
|
||||
yy_set_state(LEX_STATE_NORMAL);
|
||||
opt_SetCurrentOptions(&DefaultOptions);
|
||||
|
||||
@@ -203,6 +203,10 @@ Warn about incorrect arguments to built-in functions, such as
|
||||
with indexes outside of the string's bounds.
|
||||
This warning is enabled by
|
||||
.Fl Wall .
|
||||
.It Fl Wcharmap-redef
|
||||
Warn when re-defining a charmap mapping.
|
||||
This warning is enabled by
|
||||
.Fl Wall .
|
||||
.It Fl Wdiv
|
||||
Warn when dividing the smallest negative integer by -1, which yields itself due to integer overflow.
|
||||
.It Fl Wempty-entry
|
||||
|
||||
@@ -27,21 +27,20 @@ uint32_t calchash(const char *s)
|
||||
return hash;
|
||||
}
|
||||
|
||||
int32_t readUTF8Char(char *dest, char *src)
|
||||
size_t readUTF8Char(uint8_t *dest, char const *src)
|
||||
{
|
||||
uint32_t state;
|
||||
uint32_t state = 0;
|
||||
uint32_t codep;
|
||||
int32_t i;
|
||||
size_t i = 0;
|
||||
|
||||
for (i = 0, state = 0;; i++) {
|
||||
for (;;) {
|
||||
if (decode(&state, &codep, (uint8_t)src[i]) == 1)
|
||||
fatalerror("invalid UTF-8 character\n");
|
||||
|
||||
dest[i] = src[i];
|
||||
i++;
|
||||
|
||||
if (state == 0) {
|
||||
dest[++i] = '\0';
|
||||
if (state == 0)
|
||||
return i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,6 +30,7 @@ enum WarningState {
|
||||
static enum WarningState const defaultWarnings[NB_WARNINGS] = {
|
||||
[WARNING_ASSERT] = WARNING_ENABLED,
|
||||
[WARNING_BUILTIN_ARG] = WARNING_DISABLED,
|
||||
[WARNING_CHARMAP_REDEF] = WARNING_DISABLED,
|
||||
[WARNING_DIV] = WARNING_DISABLED,
|
||||
[WARNING_EMPTY_DATA_DIRECTIVE] = WARNING_DISABLED,
|
||||
[WARNING_EMPTY_ENTRY] = WARNING_DISABLED,
|
||||
@@ -68,6 +69,7 @@ static enum WarningState warningState(enum WarningID id)
|
||||
static char const *warningFlags[NB_WARNINGS_ALL] = {
|
||||
"assert",
|
||||
"builtin-args",
|
||||
"charmap-redef",
|
||||
"div",
|
||||
"empty-data-directive",
|
||||
"empty-entry",
|
||||
@@ -92,6 +94,7 @@ enum MetaWarningCommand {
|
||||
/* Warnings that probably indicate an error */
|
||||
static uint8_t const _wallCommands[] = {
|
||||
WARNING_BUILTIN_ARG,
|
||||
WARNING_CHARMAP_REDEF,
|
||||
WARNING_EMPTY_DATA_DIRECTIVE,
|
||||
WARNING_LARGE_CONSTANT,
|
||||
WARNING_LONG_STR,
|
||||
|
||||
@@ -4,3 +4,4 @@ ERROR: multiple-charmaps.asm(102) -> multiple-charmaps.asm::set_(13):
|
||||
Charmap 'map5' doesn't exist
|
||||
ERROR: multiple-charmaps.asm(104) -> multiple-charmaps.asm::pop_(23):
|
||||
No entries in the charmap stack
|
||||
error: Assembly aborted (3 errors)!
|
||||
|
||||
Reference in New Issue
Block a user