Add support for multiple charmaps

This adds two new directives: newcharmap and setcharmap.
newcharmap creates a new charmap and switches to it.
setcharmap switches to an existing charmap.
This commit is contained in:
dbrotz
2019-08-29 21:54:06 -07:00
parent 12d82eb768
commit e05199ca1e
17 changed files with 395 additions and 33 deletions

View File

@@ -57,6 +57,7 @@ rgbasm_obj := \
src/asm/output.o \
src/asm/rpn.o \
src/asm/symbol.o \
src/asm/util.o \
src/extern/err.o \
src/extern/utf8decoder.o \
src/version.o

View File

@@ -25,13 +25,16 @@ struct Charnode {
};
struct Charmap {
char name[MAXSYMLEN + 1];
int32_t charCount; /* user-side count. */
int32_t nodeCount; /* node-side count. */
struct Charnode nodes[MAXCHARNODES]; /* first node is reserved for the root node in charmap. */
struct Charmap *next; /* next charmap in hash table bucket */
};
int32_t readUTF8Char(char *destination, char *source);
void charmap_InitMain(void);
struct Charmap *charmap_New(const char *name, const char *baseName);
void charmap_Set(const char *name);
int32_t charmap_Add(char *input, uint8_t output);
int32_t charmap_Convert(char **input);

View File

@@ -11,6 +11,7 @@
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include "helpers.h"

View File

@@ -51,7 +51,7 @@ struct sSymbol {
/* Symbol has a constant value, will not be changed during linking */
#define SYMF_CONST 0x200
uint32_t calchash(char *s);
uint32_t sym_CalcHash(const char *s);
void sym_SetExportAll(uint8_t set);
void sym_AddLocalReloc(char *tzSym);
void sym_AddReloc(char *tzSym);

17
include/asm/util.h Normal file
View File

@@ -0,0 +1,17 @@
/*
* This file is part of RGBDS.
*
* Copyright (c) 1997-2019, Carsten Sorensen and RGBDS contributors.
*
* SPDX-License-Identifier: MIT
*/
#ifndef RGBDS_UTIL_H
#define RGBDS_UTIL_H
#include <stdint.h>
uint32_t calchash(const char *s);
int32_t readUTF8Char(char *dest, char *src);
#endif /* RGBDS_UTIL_H */

View File

@@ -26,6 +26,7 @@
#include "asm/output.h"
#include "asm/rpn.h"
#include "asm/symbol.h"
#include "asm/util.h"
#include "extern/utf8decoder.h"
@@ -618,6 +619,8 @@ static void strsubUTF8(char *dest, const char *src, uint32_t pos, uint32_t len)
%token T_POP_UNION T_POP_NEXTU T_POP_ENDU
%token T_POP_INCBIN T_POP_REPT
%token T_POP_CHARMAP
%token T_POP_NEWCHARMAP
%token T_POP_SETCHARMAP
%token T_POP_SHIFT
%token T_POP_ENDR
%token T_POP_FAIL
@@ -771,6 +774,8 @@ simple_pseudoop : include
| endu
| incbin
| charmap
| newcharmap
| setcharmap
| rept
| shift
| fail
@@ -1034,6 +1039,20 @@ charmap : T_POP_CHARMAP string comma string
}
;
newcharmap : T_POP_NEWCHARMAP T_ID
{
charmap_New($2, NULL);
}
| T_POP_NEWCHARMAP T_ID comma T_ID
{
charmap_New($2, $4);
}
setcharmap : T_POP_SETCHARMAP T_ID
{
charmap_Set($2);
}
printt : T_POP_PRINTT string
{
printf("%s", $2);

View File

@@ -6,6 +6,7 @@
* SPDX-License-Identifier: MIT
*/
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
@@ -15,28 +16,112 @@
#include "asm/charmap.h"
#include "asm/main.h"
#include "asm/output.h"
#include "asm/util.h"
#include "extern/utf8decoder.h"
#define CHARMAP_HASH_SIZE (1 << 9)
struct Charmap globalCharmap = {0};
static struct Charmap *tHashedCharmaps[CHARMAP_HASH_SIZE];
int32_t readUTF8Char(char *dest, char *src)
static struct Charmap *mainCharmap;
static struct Charmap *currentCharmap;
static void warnSectionCharmap(void)
{
uint32_t state;
uint32_t codep;
int32_t i;
static bool warned = false;
for (i = 0, state = 0;; i++) {
if (decode(&state, &codep, (uint8_t)src[i]) == 1)
fatalerror("invalid UTF-8 character");
if (warned)
return;
dest[i] = src[i];
warning("Using 'charmap' within a section when the current charmap is 'main' is deprecated");
warned = true;
}
if (state == 0) {
dest[++i] = '\0';
return i;
static uint32_t charmap_CalcHash(const char *s)
{
return calchash(s) % CHARMAP_HASH_SIZE;
}
static struct Charmap **charmap_Get(const char *name)
{
struct Charmap **ppCharmap = &tHashedCharmaps[charmap_CalcHash(name)];
while (*ppCharmap != NULL && strcmp((*ppCharmap)->name, name))
ppCharmap = &(*ppCharmap)->next;
return ppCharmap;
}
static void CopyNode(struct Charmap *dest,
const struct Charmap *src,
int nodeIdx)
{
dest->nodes[nodeIdx].code = src->nodes[nodeIdx].code;
dest->nodes[nodeIdx].isCode = src->nodes[nodeIdx].isCode;
for (int i = 0; i < 256; i++)
if (src->nodes[nodeIdx].next[i])
dest->nodes[nodeIdx].next[i] = dest->nodes +
(src->nodes[nodeIdx].next[i] - src->nodes);
}
struct Charmap *charmap_New(const char *name, const char *baseName)
{
struct Charmap *pBase = NULL;
if (baseName != NULL) {
struct Charmap **ppBase = charmap_Get(baseName);
if (*ppBase == NULL) {
yyerror("Base charmap '%s' doesn't exist", baseName);
return NULL;
}
pBase = *ppBase;
}
struct Charmap **ppCharmap = charmap_Get(name);
if (*ppCharmap != NULL) {
yyerror("Charmap '%s' already exists", name);
return NULL;
}
*ppCharmap = calloc(1, sizeof(struct Charmap));
if (*ppCharmap == NULL)
fatalerror("Not enough memory for charmap");
struct Charmap *pCharmap = *ppCharmap;
snprintf(pCharmap->name, sizeof(pCharmap->name), "%s", name);
if (pBase != NULL) {
pCharmap->charCount = pBase->charCount;
pCharmap->nodeCount = pBase->nodeCount;
for (int i = 0; i < MAXCHARNODES; i++)
CopyNode(pCharmap, pBase, i);
}
currentCharmap = pCharmap;
return pCharmap;
}
void charmap_Set(const char *name)
{
struct Charmap **ppCharmap = charmap_Get(name);
if (*ppCharmap == NULL) {
yyerror("Charmap '%s' doesn't exist", name);
return;
}
currentCharmap = *ppCharmap;
}
void charmap_InitMain(void)
{
mainCharmap = charmap_New("main", NULL);
}
int32_t charmap_Add(char *input, uint8_t output)
@@ -47,7 +132,15 @@ int32_t charmap_Add(char *input, uint8_t output)
struct Charmap *charmap;
struct Charnode *curr_node, *temp_node;
if (pCurrentSection) {
/*
* If the user tries to define a character mapping inside a section
* and the current global charmap is the "main" one, then a local
* section charmap will be created or modified instead of the global
* one. In other words, the local section charmap can override the
* main global one, but not the others.
*/
if (pCurrentSection && currentCharmap == mainCharmap) {
warnSectionCharmap();
if (pCurrentSection->charmap) {
charmap = pCurrentSection->charmap;
} else {
@@ -57,7 +150,7 @@ int32_t charmap_Add(char *input, uint8_t output)
pCurrentSection->charmap = charmap;
}
} else {
charmap = &globalCharmap;
charmap = currentCharmap;
}
if (charmap->charCount >= MAXCHARMAPS || strlen(input) > CHARMAPLENGTH)
@@ -99,10 +192,18 @@ int32_t charmap_Convert(char **input)
int32_t i, match, length;
uint8_t v, foundCode;
if (pCurrentSection && pCurrentSection->charmap)
/*
* If there is a local section charmap and the current global charmap
* is the "main" one, the local one is used. Otherwise, the global
* one is used. In other words, the local section charmap can override
* the main global one, but not the others.
*/
if (pCurrentSection &&
pCurrentSection->charmap &&
currentCharmap == mainCharmap)
charmap = pCurrentSection->charmap;
else
charmap = &globalCharmap;
charmap = currentCharmap;
output = malloc(strlen(*input));
if (output == NULL)

View File

@@ -479,6 +479,8 @@ const struct sLexInitString lexer_strings[] = {
{"incbin", T_POP_INCBIN},
{"charmap", T_POP_CHARMAP},
{"newcharmap", T_POP_NEWCHARMAP},
{"setcharmap", T_POP_SETCHARMAP},
{"fail", T_POP_FAIL},
{"warn", T_POP_WARN},

View File

@@ -21,6 +21,7 @@
#include "asm/lexer.h"
#include "asm/output.h"
#include "asm/main.h"
#include "asm/charmap.h"
#include "extern/err.h"
@@ -437,6 +438,7 @@ int main(int argc, char *argv[])
sym_SetExportAll(CurrentOptions.exportall);
fstk_Init(tzMainfile);
opt_ParseDefines();
charmap_InitMain();
yy_set_state(LEX_STATE_NORMAL);
opt_SetCurrentOptions(&DefaultOptions);

View File

@@ -321,7 +321,7 @@ static uint32_t addsymbol(struct sSymbol *pSym)
struct PatchSymbol *pPSym, **ppPSym;
uint32_t hash;
hash = calchash(pSym->tzName);
hash = sym_CalcHash(pSym->tzName);
ppPSym = &(tHashedPatchSymbols[hash]);
while ((*ppPSym) != NULL) {

View File

@@ -1140,9 +1140,27 @@ CHARMAP "&iacute", 20
CHARMAP "A", 128
.Ed
.Pp
It is possible to create multiple character maps and then switch between them
as desired. This can be used to encode debug information in ASCII and use
a different encoding for other purposes, for example. Initially, there is
one character map called
.Sy main
and it is automatically selected as the current character map from the
beginning.
.Bl -column "NEWCHARMAP name, basename"
.It Sy Command Ta Sy Meaning
.It Ic NEWCHARMAP Ar name Ta Creates a new, empty character map called
.Ic name .
.It Ic NEWCHARMAP Ar name , basename Ta Creates a new character map called
. Ic name ,
copied from character map
.Ic basename .
.It Ic SETCHARMAP Ar name Ta Switch to character map Ic name .
.El
.Pp
.Sy Note:
Character maps affect all strings in the file from the point in which they are
defined.
defined, until switching to a different character map.
This means that any string that the code may want to print as debug information
will also be affected by it.
.Pp

View File

@@ -22,6 +22,7 @@
#include "asm/main.h"
#include "asm/mymath.h"
#include "asm/output.h"
#include "asm/util.h"
#include "extern/err.h"
@@ -90,16 +91,11 @@ static int32_t getvaluefield(struct sSymbol *sym)
}
/*
* Calculate the hash value for a string
* Calculate the hash value for a symbol name
*/
uint32_t calchash(char *s)
uint32_t sym_CalcHash(const char *s)
{
uint32_t hash = 5381;
while (*s != 0)
hash = (hash * 33) ^ (*s++);
return hash % HASHSIZE;
return calchash(s) % HASHSIZE;
}
/*
@@ -123,7 +119,7 @@ struct sSymbol *createsymbol(char *s)
struct sSymbol **ppsym;
uint32_t hash;
hash = calchash(s);
hash = sym_CalcHash(s);
ppsym = &(tHashedSymbols[hash]);
while ((*ppsym) != NULL)
@@ -187,7 +183,7 @@ struct sSymbol **findpsymbol(char *s, struct sSymbol *scope)
s);
}
hash = calchash(s);
hash = sym_CalcHash(s);
ppsym = &(tHashedSymbols[hash]);
while ((*ppsym) != NULL) {

46
src/asm/util.c Normal file
View File

@@ -0,0 +1,46 @@
/*
* This file is part of RGBDS.
*
* Copyright (c) 1997-2019, Carsten Sorensen and RGBDS contributors.
*
* SPDX-License-Identifier: MIT
*/
#include <stdint.h>
#include "asm/main.h"
#include "asm/util.h"
#include "extern/utf8decoder.h"
/*
* Calculate the hash value for a string
*/
uint32_t calchash(const char *s)
{
uint32_t hash = 5381;
while (*s != 0)
hash = (hash * 33) ^ (*s++);
return hash;
}
int32_t readUTF8Char(char *dest, char *src)
{
uint32_t state;
uint32_t codep;
int32_t i;
for (i = 0, state = 0;; i++) {
if (decode(&state, &codep, (uint8_t)src[i]) == 1)
fatalerror("invalid UTF-8 character");
dest[i] = src[i];
if (state == 0) {
dest[++i] = '\0';
return i;
}
}
}

View File

@@ -1,4 +1,4 @@
SECTION "sec", ROM0
charmap "A", 1
SECTION "sec", ROM0
_A_ EQU "A"
db _A_

View File

@@ -0,0 +1,88 @@
printt "main charmap\n"
charmap "ab", $0
x = "ab"
printt "{x}\n"
printt "newcharmap map1\n"
newcharmap map1
x = "ab"
printt "{x}\n"
printt "newcharmap map2, main\n"
newcharmap map2, main
x = "ab"
printt "{x}\n"
printt "setcharmap map1\n"
setcharmap map1
x = "ab"
printt "{x}\n"
printt "newcharmap map3\n"
newcharmap map3
charmap "ab", $1
x = "ab"
printt "{x}\n"
printt "newcharmap map4, map3\n"
newcharmap map4, map3
charmap "ab", $1
charmap "cd", $2
x = "ab"
printt "{x}\n"
x = "cd"
printt "{x}\n"
printt "setcharmap map3\n"
setcharmap map3
x = "ab"
printt "{x}\n"
x = "cd"
printt "{x}\n"
printt "setcharmap main\n"
setcharmap main
SECTION "sec0", ROM0
x = "ab"
printt "{x}\n"
printt "override main charmap\n"
charmap "ef", $3
x = "ab"
printt "{x}\n"
x = "ef"
printt "{x}\n"
printt "setcharmap map3\n"
setcharmap map3
x = "ab"
printt "{x}\n"
x = "cd"
printt "{x}\n"
x = "ef"
printt "{x}\n"
printt "newcharmap map1\n"
newcharmap map1
printt "setcharmap map5\n"
setcharmap map5

View File

@@ -0,0 +1,34 @@
warning: multiple-charmaps.asm(64):
Using 'charmap' within a section when the current charmap is 'main' is deprecated
ERROR: multiple-charmaps.asm(85):
Charmap 'map1' already exists
ERROR: multiple-charmaps.asm(88):
Charmap 'map5' doesn't exist
error: Assembly aborted (2 errors)!
main charmap
$0
newcharmap map1
$6162
newcharmap map2, main
$0
setcharmap map1
$6162
newcharmap map3
$1
newcharmap map4, map3
$1
$2
setcharmap map3
$1
$6364
setcharmap main
$0
override main charmap
$6162
$3
setcharmap map3
$1
$6364
$6566
newcharmap map1
setcharmap map5

View File

@@ -0,0 +1,34 @@
warning: -(64):
Using 'charmap' within a section when the current charmap is 'main' is deprecated
ERROR: -(85):
Charmap 'map1' already exists
ERROR: -(88):
Charmap 'map5' doesn't exist
error: Assembly aborted (2 errors)!
main charmap
$0
newcharmap map1
$6162
newcharmap map2, main
$0
setcharmap map1
$6162
newcharmap map3
$1
newcharmap map4, map3
$1
$2
setcharmap map3
$1
$6364
setcharmap main
$0
override main charmap
$6162
$3
setcharmap map3
$1
$6364
$6566
newcharmap map1
setcharmap map5