Merge pull request #403 from dbrotz/multiple-charmaps

Add support for multiple charmaps
This commit is contained in:
Eldred Habert
2019-08-31 04:44:58 +02:00
committed by GitHub
18 changed files with 486 additions and 33 deletions

View File

@@ -70,3 +70,6 @@
# Don't complain when bools are used in structs
--ignore BOOL_MEMBER
# Don't complain about initializing statics (this is specific to the kernel)
--ignore INITIALISED_STATIC

View File

@@ -57,6 +57,7 @@ rgbasm_obj := \
src/asm/output.o \
src/asm/rpn.o \
src/asm/symbol.o \
src/asm/util.o \
src/extern/err.o \
src/extern/utf8decoder.o \
src/version.o

View File

@@ -25,13 +25,18 @@ struct Charnode {
};
struct Charmap {
char name[MAXSYMLEN + 1];
int32_t charCount; /* user-side count. */
int32_t nodeCount; /* node-side count. */
struct Charnode nodes[MAXCHARNODES]; /* first node is reserved for the root node in charmap. */
struct Charmap *next; /* next charmap in hash table bucket */
};
int32_t readUTF8Char(char *destination, char *source);
void charmap_InitMain(void);
struct Charmap *charmap_New(const char *name, const char *baseName);
void charmap_Set(const char *name);
void charmap_Push(void);
void charmap_Pop(void);
int32_t charmap_Add(char *input, uint8_t output);
int32_t charmap_Convert(char **input);

View File

@@ -11,6 +11,7 @@
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include "helpers.h"

View File

@@ -51,7 +51,7 @@ struct sSymbol {
/* Symbol has a constant value, will not be changed during linking */
#define SYMF_CONST 0x200
uint32_t calchash(char *s);
uint32_t sym_CalcHash(const char *s);
void sym_SetExportAll(uint8_t set);
void sym_AddLocalReloc(char *tzSym);
void sym_AddReloc(char *tzSym);

17
include/asm/util.h Normal file
View File

@@ -0,0 +1,17 @@
/*
* This file is part of RGBDS.
*
* Copyright (c) 1997-2019, Carsten Sorensen and RGBDS contributors.
*
* SPDX-License-Identifier: MIT
*/
#ifndef RGBDS_UTIL_H
#define RGBDS_UTIL_H
#include <stdint.h>
uint32_t calchash(const char *s);
int32_t readUTF8Char(char *dest, char *src);
#endif /* RGBDS_UTIL_H */

View File

@@ -26,6 +26,7 @@
#include "asm/output.h"
#include "asm/rpn.h"
#include "asm/symbol.h"
#include "asm/util.h"
#include "extern/utf8decoder.h"
@@ -618,6 +619,10 @@ static void strsubUTF8(char *dest, const char *src, uint32_t pos, uint32_t len)
%token T_POP_UNION T_POP_NEXTU T_POP_ENDU
%token T_POP_INCBIN T_POP_REPT
%token T_POP_CHARMAP
%token T_POP_NEWCHARMAP
%token T_POP_SETCHARMAP
%token T_POP_PUSHC
%token T_POP_POPC
%token T_POP_SHIFT
%token T_POP_ENDR
%token T_POP_FAIL
@@ -771,6 +776,10 @@ simple_pseudoop : include
| endu
| incbin
| charmap
| newcharmap
| setcharmap
| pushc
| popc
| rept
| shift
| fail
@@ -1034,6 +1043,28 @@ charmap : T_POP_CHARMAP string comma string
}
;
newcharmap : T_POP_NEWCHARMAP T_ID
{
charmap_New($2, NULL);
}
| T_POP_NEWCHARMAP T_ID comma T_ID
{
charmap_New($2, $4);
}
;
setcharmap : T_POP_SETCHARMAP T_ID
{
charmap_Set($2);
}
;
pushc : T_POP_PUSHC { charmap_Push(); }
;
popc : T_POP_POPC { charmap_Pop(); }
;
printt : T_POP_PRINTT string
{
printf("%s", $2);

View File

@@ -6,6 +6,7 @@
* SPDX-License-Identifier: MIT
*/
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
@@ -15,28 +16,145 @@
#include "asm/charmap.h"
#include "asm/main.h"
#include "asm/output.h"
#include "asm/util.h"
#include "extern/utf8decoder.h"
#define CHARMAP_HASH_SIZE (1 << 9)
struct Charmap globalCharmap = {0};
struct CharmapStackEntry {
struct Charmap *charmap;
struct CharmapStackEntry *next;
};
int32_t readUTF8Char(char *dest, char *src)
static struct Charmap *tHashedCharmaps[CHARMAP_HASH_SIZE];
static struct Charmap *mainCharmap;
static struct Charmap *currentCharmap;
struct CharmapStackEntry *charmapStack;
static void warnSectionCharmap(void)
{
uint32_t state;
uint32_t codep;
int32_t i;
static bool warned = false;
for (i = 0, state = 0;; i++) {
if (decode(&state, &codep, (uint8_t)src[i]) == 1)
fatalerror("invalid UTF-8 character");
if (warned)
return;
dest[i] = src[i];
warning("Using 'charmap' within a section when the current charmap is 'main' is deprecated");
warned = true;
}
if (state == 0) {
dest[++i] = '\0';
return i;
static uint32_t charmap_CalcHash(const char *s)
{
return calchash(s) % CHARMAP_HASH_SIZE;
}
static struct Charmap **charmap_Get(const char *name)
{
struct Charmap **ppCharmap = &tHashedCharmaps[charmap_CalcHash(name)];
while (*ppCharmap != NULL && strcmp((*ppCharmap)->name, name))
ppCharmap = &(*ppCharmap)->next;
return ppCharmap;
}
static void CopyNode(struct Charmap *dest,
const struct Charmap *src,
int nodeIdx)
{
dest->nodes[nodeIdx].code = src->nodes[nodeIdx].code;
dest->nodes[nodeIdx].isCode = src->nodes[nodeIdx].isCode;
for (int i = 0; i < 256; i++)
if (src->nodes[nodeIdx].next[i])
dest->nodes[nodeIdx].next[i] = dest->nodes +
(src->nodes[nodeIdx].next[i] - src->nodes);
}
struct Charmap *charmap_New(const char *name, const char *baseName)
{
struct Charmap *pBase = NULL;
if (baseName != NULL) {
struct Charmap **ppBase = charmap_Get(baseName);
if (*ppBase == NULL) {
yyerror("Base charmap '%s' doesn't exist", baseName);
return NULL;
}
pBase = *ppBase;
}
struct Charmap **ppCharmap = charmap_Get(name);
if (*ppCharmap != NULL) {
yyerror("Charmap '%s' already exists", name);
return NULL;
}
*ppCharmap = calloc(1, sizeof(struct Charmap));
if (*ppCharmap == NULL)
fatalerror("Not enough memory for charmap");
struct Charmap *pCharmap = *ppCharmap;
snprintf(pCharmap->name, sizeof(pCharmap->name), "%s", name);
if (pBase != NULL) {
pCharmap->charCount = pBase->charCount;
pCharmap->nodeCount = pBase->nodeCount;
for (int i = 0; i < MAXCHARNODES; i++)
CopyNode(pCharmap, pBase, i);
}
currentCharmap = pCharmap;
return pCharmap;
}
void charmap_Set(const char *name)
{
struct Charmap **ppCharmap = charmap_Get(name);
if (*ppCharmap == NULL) {
yyerror("Charmap '%s' doesn't exist", name);
return;
}
currentCharmap = *ppCharmap;
}
void charmap_Push(void)
{
struct CharmapStackEntry *stackEntry;
stackEntry = malloc(sizeof(struct CharmapStackEntry));
if (stackEntry == NULL)
fatalerror("No memory for charmap stack");
stackEntry->charmap = currentCharmap;
stackEntry->next = charmapStack;
charmapStack = stackEntry;
}
void charmap_Pop(void)
{
if (charmapStack == NULL)
fatalerror("No entries in the charmap stack");
struct CharmapStackEntry *top = charmapStack;
currentCharmap = top->charmap;
charmapStack = top->next;
free(top);
}
void charmap_InitMain(void)
{
mainCharmap = charmap_New("main", NULL);
}
int32_t charmap_Add(char *input, uint8_t output)
@@ -47,7 +165,15 @@ int32_t charmap_Add(char *input, uint8_t output)
struct Charmap *charmap;
struct Charnode *curr_node, *temp_node;
if (pCurrentSection) {
/*
* If the user tries to define a character mapping inside a section
* and the current global charmap is the "main" one, then a local
* section charmap will be created or modified instead of the global
* one. In other words, the local section charmap can override the
* main global one, but not the others.
*/
if (pCurrentSection && currentCharmap == mainCharmap) {
warnSectionCharmap();
if (pCurrentSection->charmap) {
charmap = pCurrentSection->charmap;
} else {
@@ -57,7 +183,7 @@ int32_t charmap_Add(char *input, uint8_t output)
pCurrentSection->charmap = charmap;
}
} else {
charmap = &globalCharmap;
charmap = currentCharmap;
}
if (charmap->charCount >= MAXCHARMAPS || strlen(input) > CHARMAPLENGTH)
@@ -99,10 +225,18 @@ int32_t charmap_Convert(char **input)
int32_t i, match, length;
uint8_t v, foundCode;
if (pCurrentSection && pCurrentSection->charmap)
/*
* If there is a local section charmap and the current global charmap
* is the "main" one, the local one is used. Otherwise, the global
* one is used. In other words, the local section charmap can override
* the main global one, but not the others.
*/
if (pCurrentSection &&
pCurrentSection->charmap &&
currentCharmap == mainCharmap)
charmap = pCurrentSection->charmap;
else
charmap = &globalCharmap;
charmap = currentCharmap;
output = malloc(strlen(*input));
if (output == NULL)

View File

@@ -479,6 +479,10 @@ const struct sLexInitString lexer_strings[] = {
{"incbin", T_POP_INCBIN},
{"charmap", T_POP_CHARMAP},
{"newcharmap", T_POP_NEWCHARMAP},
{"setcharmap", T_POP_SETCHARMAP},
{"pushc", T_POP_PUSHC},
{"popc", T_POP_POPC},
{"fail", T_POP_FAIL},
{"warn", T_POP_WARN},

View File

@@ -21,6 +21,7 @@
#include "asm/lexer.h"
#include "asm/output.h"
#include "asm/main.h"
#include "asm/charmap.h"
#include "extern/err.h"
@@ -437,6 +438,7 @@ int main(int argc, char *argv[])
sym_SetExportAll(CurrentOptions.exportall);
fstk_Init(tzMainfile);
opt_ParseDefines();
charmap_InitMain();
yy_set_state(LEX_STATE_NORMAL);
opt_SetCurrentOptions(&DefaultOptions);

View File

@@ -321,7 +321,7 @@ static uint32_t addsymbol(struct sSymbol *pSym)
struct PatchSymbol *pPSym, **ppPSym;
uint32_t hash;
hash = calchash(pSym->tzName);
hash = sym_CalcHash(pSym->tzName);
ppPSym = &(tHashedPatchSymbols[hash]);
while ((*ppPSym) != NULL) {

View File

@@ -1140,9 +1140,30 @@ CHARMAP "&iacute", 20
CHARMAP "A", 128
.Ed
.Pp
It is possible to create multiple character maps and then switch between them
as desired. This can be used to encode debug information in ASCII and use
a different encoding for other purposes, for example. Initially, there is
one character map called
.Sy main
and it is automatically selected as the current character map from the
beginning. There is also a character map stack that can be used to save and
restore which character map is currently active.
.Bl -column "NEWCHARMAP name, basename"
.It Sy Command Ta Sy Meaning
.It Ic NEWCHARMAP Ar name Ta Creates a new, empty character map called
.Ic name .
.It Ic NEWCHARMAP Ar name , basename Ta Creates a new character map called
. Ic name ,
copied from character map
.Ic basename .
.It Ic SETCHARMAP Ar name Ta Switch to character map Ic name .
.It Ic PUSHC Ta Push the current character map onto the stack.
.It Ic POPC Ta Pop a character map off the stack and switch to it.
.El
.Pp
.Sy Note:
Character maps affect all strings in the file from the point in which they are
defined.
defined, until switching to a different character map.
This means that any string that the code may want to print as debug information
will also be affected by it.
.Pp

View File

@@ -22,6 +22,7 @@
#include "asm/main.h"
#include "asm/mymath.h"
#include "asm/output.h"
#include "asm/util.h"
#include "extern/err.h"
@@ -90,16 +91,11 @@ static int32_t getvaluefield(struct sSymbol *sym)
}
/*
* Calculate the hash value for a string
* Calculate the hash value for a symbol name
*/
uint32_t calchash(char *s)
uint32_t sym_CalcHash(const char *s)
{
uint32_t hash = 5381;
while (*s != 0)
hash = (hash * 33) ^ (*s++);
return hash % HASHSIZE;
return calchash(s) % HASHSIZE;
}
/*
@@ -123,7 +119,7 @@ struct sSymbol *createsymbol(char *s)
struct sSymbol **ppsym;
uint32_t hash;
hash = calchash(s);
hash = sym_CalcHash(s);
ppsym = &(tHashedSymbols[hash]);
while ((*ppsym) != NULL)
@@ -187,7 +183,7 @@ struct sSymbol **findpsymbol(char *s, struct sSymbol *scope)
s);
}
hash = calchash(s);
hash = sym_CalcHash(s);
ppsym = &(tHashedSymbols[hash]);
while ((*ppsym) != NULL) {

46
src/asm/util.c Normal file
View File

@@ -0,0 +1,46 @@
/*
* This file is part of RGBDS.
*
* Copyright (c) 1997-2019, Carsten Sorensen and RGBDS contributors.
*
* SPDX-License-Identifier: MIT
*/
#include <stdint.h>
#include "asm/main.h"
#include "asm/util.h"
#include "extern/utf8decoder.h"
/*
* Calculate the hash value for a string
*/
uint32_t calchash(const char *s)
{
uint32_t hash = 5381;
while (*s != 0)
hash = (hash * 33) ^ (*s++);
return hash;
}
int32_t readUTF8Char(char *dest, char *src)
{
uint32_t state;
uint32_t codep;
int32_t i;
for (i = 0, state = 0;; i++) {
if (decode(&state, &codep, (uint8_t)src[i]) == 1)
fatalerror("invalid UTF-8 character");
dest[i] = src[i];
if (state == 0) {
dest[++i] = '\0';
return i;
}
}
}

View File

@@ -1,4 +1,4 @@
SECTION "sec", ROM0
charmap "A", 1
SECTION "sec", ROM0
_A_ EQU "A"
db _A_

View File

@@ -0,0 +1,104 @@
new_: MACRO
IF _NARG > 1
printt "newcharmap \1, \2\n"
newcharmap \1, \2
ELSE
printt "newcharmap \1\n"
newcharmap \1
ENDC
ENDM
set_: MACRO
printt "setcharmap \1\n"
setcharmap \1
ENDM
push_: MACRO
printt "pushc\n"
pushc
ENDM
pop_: MACRO
printt "popc\n"
popc
ENDM
print: MACRO
x = \1
printt "{x}\n"
ENDM
printt "main charmap\n"
charmap "ab", $0
print "ab"
new_ map1
print "ab"
new_ map2, main
print "ab"
set_ map1
print "ab"
new_ map3
charmap "ab", $1
print "ab"
new_ map4, map3
charmap "ab", $1
charmap "cd", $2
print "ab"
print "cd"
set_ map3
print "ab"
print "cd"
set_ main
SECTION "sec0", ROM0
print "ab"
printt "override main charmap\n"
charmap "ef", $3
print "ab"
print "ef"
set_ map1
push_
set_ map2
push_
set_ map3
print "ab"
print "cd"
print "ef"
pop_
print "ab"
pop_
print "ab"
new_ map1
set_ map5
pop_

View File

@@ -0,0 +1,44 @@
warning: multiple-charmaps.asm(75):
Using 'charmap' within a section when the current charmap is 'main' is deprecated
ERROR: multiple-charmaps.asm(100) -> new_(5):
Charmap 'map1' already exists
ERROR: multiple-charmaps.asm(102) -> set_(2):
Charmap 'map5' doesn't exist
ERROR: multiple-charmaps.asm(104) -> pop_(2):
No entries in the charmap stack
main charmap
$0
newcharmap map1
$6162
newcharmap map2, main
$0
setcharmap map1
$6162
newcharmap map3
$1
newcharmap map4, map3
$1
$2
setcharmap map3
$1
$6364
setcharmap main
$0
override main charmap
$6162
$3
setcharmap map1
pushc
setcharmap map2
pushc
setcharmap map3
$1
$6364
$6566
popc
$0
popc
$6162
newcharmap map1
setcharmap map5
popc

View File

@@ -0,0 +1,44 @@
warning: -(75):
Using 'charmap' within a section when the current charmap is 'main' is deprecated
ERROR: -(100) -> new_(5):
Charmap 'map1' already exists
ERROR: -(102) -> set_(2):
Charmap 'map5' doesn't exist
ERROR: -(104) -> pop_(2):
No entries in the charmap stack
main charmap
$0
newcharmap map1
$6162
newcharmap map2, main
$0
setcharmap map1
$6162
newcharmap map3
$1
newcharmap map4, map3
$1
$2
setcharmap map3
$1
$6364
setcharmap main
$0
override main charmap
$6162
$3
setcharmap map1
pushc
setcharmap map2
pushc
setcharmap map3
$1
$6364
$6566
popc
$0
popc
$6162
newcharmap map1
setcharmap map5
popc