diff --git a/Makefile b/Makefile index e3315768..869b4b2d 100644 --- a/Makefile +++ b/Makefile @@ -57,6 +57,7 @@ rgbasm_obj := \ src/asm/output.o \ src/asm/rpn.o \ src/asm/symbol.o \ + src/asm/util.o \ src/extern/err.o \ src/extern/utf8decoder.o \ src/version.o diff --git a/include/asm/charmap.h b/include/asm/charmap.h index 87052648..ae64e875 100644 --- a/include/asm/charmap.h +++ b/include/asm/charmap.h @@ -25,13 +25,16 @@ struct Charnode { }; struct Charmap { + char name[MAXSYMLEN + 1]; int32_t charCount; /* user-side count. */ int32_t nodeCount; /* node-side count. */ struct Charnode nodes[MAXCHARNODES]; /* first node is reserved for the root node in charmap. */ + struct Charmap *next; /* next charmap in hash table bucket */ }; -int32_t readUTF8Char(char *destination, char *source); - +void charmap_InitMain(void); +struct Charmap *charmap_New(const char *name, const char *baseName); +void charmap_Set(const char *name); int32_t charmap_Add(char *input, uint8_t output); int32_t charmap_Convert(char **input); diff --git a/include/asm/main.h b/include/asm/main.h index 49ab7add..39857fe9 100644 --- a/include/asm/main.h +++ b/include/asm/main.h @@ -11,6 +11,7 @@ #include #include +#include #include "helpers.h" diff --git a/include/asm/symbol.h b/include/asm/symbol.h index 58058b85..859d5acf 100644 --- a/include/asm/symbol.h +++ b/include/asm/symbol.h @@ -51,7 +51,7 @@ struct sSymbol { /* Symbol has a constant value, will not be changed during linking */ #define SYMF_CONST 0x200 -uint32_t calchash(char *s); +uint32_t sym_CalcHash(const char *s); void sym_SetExportAll(uint8_t set); void sym_AddLocalReloc(char *tzSym); void sym_AddReloc(char *tzSym); diff --git a/include/asm/util.h b/include/asm/util.h new file mode 100644 index 00000000..6e40fa83 --- /dev/null +++ b/include/asm/util.h @@ -0,0 +1,17 @@ +/* + * This file is part of RGBDS. + * + * Copyright (c) 1997-2019, Carsten Sorensen and RGBDS contributors. + * + * SPDX-License-Identifier: MIT + */ + +#ifndef RGBDS_UTIL_H +#define RGBDS_UTIL_H + +#include + +uint32_t calchash(const char *s); +int32_t readUTF8Char(char *dest, char *src); + +#endif /* RGBDS_UTIL_H */ diff --git a/src/asm/asmy.y b/src/asm/asmy.y index 8cf48f8f..afb9aa53 100644 --- a/src/asm/asmy.y +++ b/src/asm/asmy.y @@ -26,6 +26,7 @@ #include "asm/output.h" #include "asm/rpn.h" #include "asm/symbol.h" +#include "asm/util.h" #include "extern/utf8decoder.h" @@ -618,6 +619,8 @@ static void strsubUTF8(char *dest, const char *src, uint32_t pos, uint32_t len) %token T_POP_UNION T_POP_NEXTU T_POP_ENDU %token T_POP_INCBIN T_POP_REPT %token T_POP_CHARMAP +%token T_POP_NEWCHARMAP +%token T_POP_SETCHARMAP %token T_POP_SHIFT %token T_POP_ENDR %token T_POP_FAIL @@ -771,6 +774,8 @@ simple_pseudoop : include | endu | incbin | charmap + | newcharmap + | setcharmap | rept | shift | fail @@ -1034,6 +1039,20 @@ charmap : T_POP_CHARMAP string comma string } ; +newcharmap : T_POP_NEWCHARMAP T_ID + { + charmap_New($2, NULL); + } + | T_POP_NEWCHARMAP T_ID comma T_ID + { + charmap_New($2, $4); + } + +setcharmap : T_POP_SETCHARMAP T_ID + { + charmap_Set($2); + } + printt : T_POP_PRINTT string { printf("%s", $2); diff --git a/src/asm/charmap.c b/src/asm/charmap.c index 6f54bc97..092a4be5 100644 --- a/src/asm/charmap.c +++ b/src/asm/charmap.c @@ -6,6 +6,7 @@ * SPDX-License-Identifier: MIT */ +#include #include #include #include @@ -15,28 +16,112 @@ #include "asm/charmap.h" #include "asm/main.h" #include "asm/output.h" +#include "asm/util.h" -#include "extern/utf8decoder.h" +#define CHARMAP_HASH_SIZE (1 << 9) -struct Charmap globalCharmap = {0}; +static struct Charmap *tHashedCharmaps[CHARMAP_HASH_SIZE]; -int32_t readUTF8Char(char *dest, char *src) +static struct Charmap *mainCharmap; +static struct Charmap *currentCharmap; + +static void warnSectionCharmap(void) { - uint32_t state; - uint32_t codep; - int32_t i; + static bool warned = false; - for (i = 0, state = 0;; i++) { - if (decode(&state, &codep, (uint8_t)src[i]) == 1) - fatalerror("invalid UTF-8 character"); + if (warned) + return; - dest[i] = src[i]; + warning("Using 'charmap' within a section when the current charmap is 'main' is deprecated"); + warned = true; +} - if (state == 0) { - dest[++i] = '\0'; - return i; +static uint32_t charmap_CalcHash(const char *s) +{ + return calchash(s) % CHARMAP_HASH_SIZE; +} + +static struct Charmap **charmap_Get(const char *name) +{ + struct Charmap **ppCharmap = &tHashedCharmaps[charmap_CalcHash(name)]; + + while (*ppCharmap != NULL && strcmp((*ppCharmap)->name, name)) + ppCharmap = &(*ppCharmap)->next; + + return ppCharmap; +} + +static void CopyNode(struct Charmap *dest, + const struct Charmap *src, + int nodeIdx) +{ + dest->nodes[nodeIdx].code = src->nodes[nodeIdx].code; + dest->nodes[nodeIdx].isCode = src->nodes[nodeIdx].isCode; + for (int i = 0; i < 256; i++) + if (src->nodes[nodeIdx].next[i]) + dest->nodes[nodeIdx].next[i] = dest->nodes + + (src->nodes[nodeIdx].next[i] - src->nodes); +} + +struct Charmap *charmap_New(const char *name, const char *baseName) +{ + struct Charmap *pBase = NULL; + + if (baseName != NULL) { + struct Charmap **ppBase = charmap_Get(baseName); + + if (*ppBase == NULL) { + yyerror("Base charmap '%s' doesn't exist", baseName); + return NULL; } + + pBase = *ppBase; } + + struct Charmap **ppCharmap = charmap_Get(name); + + if (*ppCharmap != NULL) { + yyerror("Charmap '%s' already exists", name); + return NULL; + } + + *ppCharmap = calloc(1, sizeof(struct Charmap)); + + if (*ppCharmap == NULL) + fatalerror("Not enough memory for charmap"); + + struct Charmap *pCharmap = *ppCharmap; + + snprintf(pCharmap->name, sizeof(pCharmap->name), "%s", name); + + if (pBase != NULL) { + pCharmap->charCount = pBase->charCount; + pCharmap->nodeCount = pBase->nodeCount; + + for (int i = 0; i < MAXCHARNODES; i++) + CopyNode(pCharmap, pBase, i); + } + + currentCharmap = pCharmap; + + return pCharmap; +} + +void charmap_Set(const char *name) +{ + struct Charmap **ppCharmap = charmap_Get(name); + + if (*ppCharmap == NULL) { + yyerror("Charmap '%s' doesn't exist", name); + return; + } + + currentCharmap = *ppCharmap; +} + +void charmap_InitMain(void) +{ + mainCharmap = charmap_New("main", NULL); } int32_t charmap_Add(char *input, uint8_t output) @@ -47,7 +132,15 @@ int32_t charmap_Add(char *input, uint8_t output) struct Charmap *charmap; struct Charnode *curr_node, *temp_node; - if (pCurrentSection) { + /* + * If the user tries to define a character mapping inside a section + * and the current global charmap is the "main" one, then a local + * section charmap will be created or modified instead of the global + * one. In other words, the local section charmap can override the + * main global one, but not the others. + */ + if (pCurrentSection && currentCharmap == mainCharmap) { + warnSectionCharmap(); if (pCurrentSection->charmap) { charmap = pCurrentSection->charmap; } else { @@ -57,7 +150,7 @@ int32_t charmap_Add(char *input, uint8_t output) pCurrentSection->charmap = charmap; } } else { - charmap = &globalCharmap; + charmap = currentCharmap; } if (charmap->charCount >= MAXCHARMAPS || strlen(input) > CHARMAPLENGTH) @@ -99,10 +192,18 @@ int32_t charmap_Convert(char **input) int32_t i, match, length; uint8_t v, foundCode; - if (pCurrentSection && pCurrentSection->charmap) + /* + * If there is a local section charmap and the current global charmap + * is the "main" one, the local one is used. Otherwise, the global + * one is used. In other words, the local section charmap can override + * the main global one, but not the others. + */ + if (pCurrentSection && + pCurrentSection->charmap && + currentCharmap == mainCharmap) charmap = pCurrentSection->charmap; else - charmap = &globalCharmap; + charmap = currentCharmap; output = malloc(strlen(*input)); if (output == NULL) diff --git a/src/asm/globlex.c b/src/asm/globlex.c index 64235732..c848aff8 100644 --- a/src/asm/globlex.c +++ b/src/asm/globlex.c @@ -479,6 +479,8 @@ const struct sLexInitString lexer_strings[] = { {"incbin", T_POP_INCBIN}, {"charmap", T_POP_CHARMAP}, + {"newcharmap", T_POP_NEWCHARMAP}, + {"setcharmap", T_POP_SETCHARMAP}, {"fail", T_POP_FAIL}, {"warn", T_POP_WARN}, diff --git a/src/asm/main.c b/src/asm/main.c index c2c79e3e..7a5358bd 100644 --- a/src/asm/main.c +++ b/src/asm/main.c @@ -21,6 +21,7 @@ #include "asm/lexer.h" #include "asm/output.h" #include "asm/main.h" +#include "asm/charmap.h" #include "extern/err.h" @@ -437,6 +438,7 @@ int main(int argc, char *argv[]) sym_SetExportAll(CurrentOptions.exportall); fstk_Init(tzMainfile); opt_ParseDefines(); + charmap_InitMain(); yy_set_state(LEX_STATE_NORMAL); opt_SetCurrentOptions(&DefaultOptions); diff --git a/src/asm/output.c b/src/asm/output.c index b28b6de6..f4d3efad 100644 --- a/src/asm/output.c +++ b/src/asm/output.c @@ -321,7 +321,7 @@ static uint32_t addsymbol(struct sSymbol *pSym) struct PatchSymbol *pPSym, **ppPSym; uint32_t hash; - hash = calchash(pSym->tzName); + hash = sym_CalcHash(pSym->tzName); ppPSym = &(tHashedPatchSymbols[hash]); while ((*ppPSym) != NULL) { diff --git a/src/asm/rgbasm.5 b/src/asm/rgbasm.5 index babe827c..9d5ea4fd 100644 --- a/src/asm/rgbasm.5 +++ b/src/asm/rgbasm.5 @@ -1140,9 +1140,27 @@ CHARMAP "í", 20 CHARMAP "A", 128 .Ed .Pp +It is possible to create multiple character maps and then switch between them +as desired. This can be used to encode debug information in ASCII and use +a different encoding for other purposes, for example. Initially, there is +one character map called +.Sy main +and it is automatically selected as the current character map from the +beginning. +.Bl -column "NEWCHARMAP name, basename" +.It Sy Command Ta Sy Meaning +.It Ic NEWCHARMAP Ar name Ta Creates a new, empty character map called +.Ic name . +.It Ic NEWCHARMAP Ar name , basename Ta Creates a new character map called +. Ic name , +copied from character map +.Ic basename . +.It Ic SETCHARMAP Ar name Ta Switch to character map Ic name . +.El +.Pp .Sy Note: Character maps affect all strings in the file from the point in which they are -defined. +defined, until switching to a different character map. This means that any string that the code may want to print as debug information will also be affected by it. .Pp diff --git a/src/asm/symbol.c b/src/asm/symbol.c index 50e9f180..2498be95 100644 --- a/src/asm/symbol.c +++ b/src/asm/symbol.c @@ -22,6 +22,7 @@ #include "asm/main.h" #include "asm/mymath.h" #include "asm/output.h" +#include "asm/util.h" #include "extern/err.h" @@ -90,16 +91,11 @@ static int32_t getvaluefield(struct sSymbol *sym) } /* - * Calculate the hash value for a string + * Calculate the hash value for a symbol name */ -uint32_t calchash(char *s) +uint32_t sym_CalcHash(const char *s) { - uint32_t hash = 5381; - - while (*s != 0) - hash = (hash * 33) ^ (*s++); - - return hash % HASHSIZE; + return calchash(s) % HASHSIZE; } /* @@ -123,7 +119,7 @@ struct sSymbol *createsymbol(char *s) struct sSymbol **ppsym; uint32_t hash; - hash = calchash(s); + hash = sym_CalcHash(s); ppsym = &(tHashedSymbols[hash]); while ((*ppsym) != NULL) @@ -187,7 +183,7 @@ struct sSymbol **findpsymbol(char *s, struct sSymbol *scope) s); } - hash = calchash(s); + hash = sym_CalcHash(s); ppsym = &(tHashedSymbols[hash]); while ((*ppsym) != NULL) { diff --git a/src/asm/util.c b/src/asm/util.c new file mode 100644 index 00000000..0b13cfba --- /dev/null +++ b/src/asm/util.c @@ -0,0 +1,46 @@ +/* + * This file is part of RGBDS. + * + * Copyright (c) 1997-2019, Carsten Sorensen and RGBDS contributors. + * + * SPDX-License-Identifier: MIT + */ + +#include + +#include "asm/main.h" +#include "asm/util.h" + +#include "extern/utf8decoder.h" + +/* + * Calculate the hash value for a string + */ +uint32_t calchash(const char *s) +{ + uint32_t hash = 5381; + + while (*s != 0) + hash = (hash * 33) ^ (*s++); + + return hash; +} + +int32_t readUTF8Char(char *dest, char *src) +{ + uint32_t state; + uint32_t codep; + int32_t i; + + for (i = 0, state = 0;; i++) { + if (decode(&state, &codep, (uint8_t)src[i]) == 1) + fatalerror("invalid UTF-8 character"); + + dest[i] = src[i]; + + if (state == 0) { + dest[++i] = '\0'; + return i; + } + } +} diff --git a/test/asm/equ-charmap.asm b/test/asm/equ-charmap.asm index a1e8aa87..9caebf20 100644 --- a/test/asm/equ-charmap.asm +++ b/test/asm/equ-charmap.asm @@ -1,4 +1,4 @@ -SECTION "sec", ROM0 charmap "A", 1 +SECTION "sec", ROM0 _A_ EQU "A" db _A_ diff --git a/test/asm/multiple-charmaps.asm b/test/asm/multiple-charmaps.asm new file mode 100644 index 00000000..fb4e5f35 --- /dev/null +++ b/test/asm/multiple-charmaps.asm @@ -0,0 +1,88 @@ +printt "main charmap\n" + +charmap "ab", $0 + +x = "ab" +printt "{x}\n" + +printt "newcharmap map1\n" +newcharmap map1 + +x = "ab" +printt "{x}\n" + +printt "newcharmap map2, main\n" +newcharmap map2, main + +x = "ab" +printt "{x}\n" + +printt "setcharmap map1\n" +setcharmap map1 + +x = "ab" +printt "{x}\n" + +printt "newcharmap map3\n" +newcharmap map3 + +charmap "ab", $1 + +x = "ab" +printt "{x}\n" + +printt "newcharmap map4, map3\n" +newcharmap map4, map3 + +charmap "ab", $1 +charmap "cd", $2 + +x = "ab" +printt "{x}\n" + +x = "cd" +printt "{x}\n" + +printt "setcharmap map3\n" +setcharmap map3 + +x = "ab" +printt "{x}\n" + +x = "cd" +printt "{x}\n" + +printt "setcharmap main\n" +setcharmap main + +SECTION "sec0", ROM0 + +x = "ab" +printt "{x}\n" + +printt "override main charmap\n" +charmap "ef", $3 + +x = "ab" +printt "{x}\n" + +x = "ef" +printt "{x}\n" + +printt "setcharmap map3\n" +setcharmap map3 + +x = "ab" +printt "{x}\n" + +x = "cd" +printt "{x}\n" + +x = "ef" +printt "{x}\n" + +printt "newcharmap map1\n" +newcharmap map1 + +printt "setcharmap map5\n" +setcharmap map5 diff --git a/test/asm/multiple-charmaps.out b/test/asm/multiple-charmaps.out new file mode 100644 index 00000000..79b502c6 --- /dev/null +++ b/test/asm/multiple-charmaps.out @@ -0,0 +1,34 @@ +warning: multiple-charmaps.asm(64): + Using 'charmap' within a section when the current charmap is 'main' is deprecated +ERROR: multiple-charmaps.asm(85): + Charmap 'map1' already exists +ERROR: multiple-charmaps.asm(88): + Charmap 'map5' doesn't exist +error: Assembly aborted (2 errors)! +main charmap +$0 +newcharmap map1 +$6162 +newcharmap map2, main +$0 +setcharmap map1 +$6162 +newcharmap map3 +$1 +newcharmap map4, map3 +$1 +$2 +setcharmap map3 +$1 +$6364 +setcharmap main +$0 +override main charmap +$6162 +$3 +setcharmap map3 +$1 +$6364 +$6566 +newcharmap map1 +setcharmap map5 diff --git a/test/asm/multiple-charmaps.out.pipe b/test/asm/multiple-charmaps.out.pipe new file mode 100644 index 00000000..03727000 --- /dev/null +++ b/test/asm/multiple-charmaps.out.pipe @@ -0,0 +1,34 @@ +warning: -(64): + Using 'charmap' within a section when the current charmap is 'main' is deprecated +ERROR: -(85): + Charmap 'map1' already exists +ERROR: -(88): + Charmap 'map5' doesn't exist +error: Assembly aborted (2 errors)! +main charmap +$0 +newcharmap map1 +$6162 +newcharmap map2, main +$0 +setcharmap map1 +$6162 +newcharmap map3 +$1 +newcharmap map4, map3 +$1 +$2 +setcharmap map3 +$1 +$6364 +setcharmap main +$0 +override main charmap +$6162 +$3 +setcharmap map3 +$1 +$6364 +$6566 +newcharmap map1 +setcharmap map5