mirror of
https://github.com/gbdev/rgbds.git
synced 2025-11-20 10:12:06 +00:00
@@ -18,5 +18,6 @@ void charmap_Push(void);
|
||||
void charmap_Pop(void);
|
||||
void charmap_Add(char *mapping, uint8_t value);
|
||||
size_t charmap_Convert(char const *input, uint8_t *output);
|
||||
size_t charmap_ConvertNext(char const **input, uint8_t *output);
|
||||
|
||||
#endif /* RGBDS_ASM_CHARMAP_H */
|
||||
|
||||
@@ -57,7 +57,7 @@ struct CharmapStackEntry {
|
||||
|
||||
struct CharmapStackEntry *charmapStack;
|
||||
|
||||
static struct Charmap *charmap_Get(const char *name)
|
||||
static struct Charmap *charmap_Get(char const *name)
|
||||
{
|
||||
return hash_GetElement(charmaps, name);
|
||||
}
|
||||
@@ -192,6 +192,19 @@ void charmap_Add(char *mapping, uint8_t value)
|
||||
}
|
||||
|
||||
size_t charmap_Convert(char const *input, uint8_t *output)
|
||||
{
|
||||
size_t outputLen = 0;
|
||||
|
||||
for (size_t charLen = charmap_ConvertNext(&input, output); charLen;
|
||||
charLen = charmap_ConvertNext(&input, output)) {
|
||||
output += charLen;
|
||||
outputLen += charLen;
|
||||
}
|
||||
|
||||
return outputLen;
|
||||
}
|
||||
|
||||
size_t charmap_ConvertNext(char const **input, uint8_t *output)
|
||||
{
|
||||
/*
|
||||
* The goal is to match the longest mapping possible.
|
||||
@@ -199,7 +212,6 @@ size_t charmap_Convert(char const *input, uint8_t *output)
|
||||
* If that would lead to a dead end, rewind characters until the last match, and output.
|
||||
* If no match, read a UTF-8 codepoint and output that.
|
||||
*/
|
||||
size_t outputLen = 0;
|
||||
struct Charmap const *charmap = *currentCharmap;
|
||||
struct Charnode const *node = &charmap->nodes[0];
|
||||
struct Charnode const *match = NULL;
|
||||
@@ -207,10 +219,10 @@ size_t charmap_Convert(char const *input, uint8_t *output)
|
||||
|
||||
for (;;) {
|
||||
/* We still want NULs to reach the `else` path, to give a chance to rewind */
|
||||
uint8_t c = *input - 1;
|
||||
uint8_t c = **input - 1;
|
||||
|
||||
if (*input && node->next[c]) {
|
||||
input++; /* Consume that char */
|
||||
if (**input && node->next[c]) {
|
||||
(*input)++; /* Consume that char */
|
||||
rewindDistance++;
|
||||
|
||||
node = &charmap->nodes[node->next[c]];
|
||||
@@ -220,31 +232,32 @@ size_t charmap_Convert(char const *input, uint8_t *output)
|
||||
}
|
||||
|
||||
} else {
|
||||
input -= rewindDistance; /* Rewind */
|
||||
*input -= rewindDistance; /* Rewind */
|
||||
rewindDistance = 0;
|
||||
node = &charmap->nodes[0];
|
||||
|
||||
if (match) { /* Arrived at a dead end with a match found */
|
||||
*output++ = match->value;
|
||||
outputLen++;
|
||||
match = NULL; /* Reset match for next round */
|
||||
if (output)
|
||||
*output = match->value;
|
||||
|
||||
} else if (*input) { /* No match found */
|
||||
size_t codepointLen = readUTF8Char(output, input);
|
||||
return 1;
|
||||
|
||||
if (codepointLen == 0) {
|
||||
} else if (**input) { /* No match found */
|
||||
size_t codepointLen = readUTF8Char(output, *input);
|
||||
|
||||
if (codepointLen == 0)
|
||||
error("Input string is not valid UTF-8!\n");
|
||||
break;
|
||||
}
|
||||
input += codepointLen; /* OK because UTF-8 has no NUL in multi-byte chars */
|
||||
output += codepointLen;
|
||||
outputLen += codepointLen;
|
||||
}
|
||||
|
||||
if (!*input)
|
||||
break;
|
||||
/* OK because UTF-8 has no NUL in multi-byte chars */
|
||||
*input += codepointLen;
|
||||
|
||||
return codepointLen;
|
||||
|
||||
} else { /* End of input */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return outputLen;
|
||||
unreachable_();
|
||||
}
|
||||
|
||||
@@ -210,6 +210,9 @@ static struct KeywordMapping {
|
||||
{"STRRPL", T_OP_STRRPL},
|
||||
{"STRFMT", T_OP_STRFMT},
|
||||
|
||||
{"CHARLEN", T_OP_CHARLEN},
|
||||
{"CHARSUB", T_OP_CHARSUB},
|
||||
|
||||
{"INCLUDE", T_POP_INCLUDE},
|
||||
{"PRINT", T_POP_PRINT},
|
||||
{"PRINTLN", T_POP_PRINTLN},
|
||||
@@ -589,7 +592,7 @@ struct KeywordDictNode {
|
||||
uint16_t children[0x60 - ' '];
|
||||
struct KeywordMapping const *keyword;
|
||||
/* Since the keyword structure is invariant, the min number of nodes is known at compile time */
|
||||
} keywordDict[351] = {0}; /* Make sure to keep this correct when adding keywords! */
|
||||
} keywordDict[357] = {0}; /* Make sure to keep this correct when adding keywords! */
|
||||
|
||||
/* Convert a char into its index into the dict */
|
||||
static uint8_t dictIndex(char c)
|
||||
|
||||
@@ -82,13 +82,12 @@ static char *strrstr(char *s1, char *s2)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static size_t strlenUTF8(const char *s)
|
||||
static size_t strlenUTF8(char const *s)
|
||||
{
|
||||
size_t len = 0;
|
||||
uint32_t state = 0;
|
||||
uint32_t codep = 0;
|
||||
|
||||
while (*s) {
|
||||
for (uint32_t codep = 0; *s; s++) {
|
||||
switch (decode(&state, &codep, *s)) {
|
||||
case 1:
|
||||
fatalerror("STRLEN: Invalid UTF-8 character\n");
|
||||
@@ -97,7 +96,6 @@ static size_t strlenUTF8(const char *s)
|
||||
len++;
|
||||
break;
|
||||
}
|
||||
s++;
|
||||
}
|
||||
|
||||
/* Check for partial code point. */
|
||||
@@ -107,13 +105,12 @@ static size_t strlenUTF8(const char *s)
|
||||
return len;
|
||||
}
|
||||
|
||||
static void strsubUTF8(char *dest, size_t destLen, const char *src, uint32_t pos, uint32_t len)
|
||||
static void strsubUTF8(char *dest, size_t destLen, char const *src, uint32_t pos, uint32_t len)
|
||||
{
|
||||
size_t srcIndex = 0;
|
||||
size_t destIndex = 0;
|
||||
uint32_t state = 0;
|
||||
uint32_t codep = 0;
|
||||
uint32_t curPos = 1;
|
||||
uint32_t curLen = 0;
|
||||
|
||||
if (pos < 1) {
|
||||
@@ -122,7 +119,7 @@ static void strsubUTF8(char *dest, size_t destLen, const char *src, uint32_t pos
|
||||
}
|
||||
|
||||
/* Advance to starting position in source string. */
|
||||
while (src[srcIndex] && curPos < pos) {
|
||||
for (uint32_t curPos = 1; src[srcIndex] && curPos < pos; srcIndex++) {
|
||||
switch (decode(&state, &codep, src[srcIndex])) {
|
||||
case 1:
|
||||
fatalerror("STRSUB: Invalid UTF-8 character\n");
|
||||
@@ -131,7 +128,6 @@ static void strsubUTF8(char *dest, size_t destLen, const char *src, uint32_t pos
|
||||
curPos++;
|
||||
break;
|
||||
}
|
||||
srcIndex++;
|
||||
}
|
||||
|
||||
if (!src[srcIndex] && len)
|
||||
@@ -162,6 +158,42 @@ static void strsubUTF8(char *dest, size_t destLen, const char *src, uint32_t pos
|
||||
dest[destIndex] = '\0';
|
||||
}
|
||||
|
||||
static size_t charlenUTF8(char const *s)
|
||||
{
|
||||
size_t len;
|
||||
|
||||
for (len = 0; charmap_ConvertNext(&s, NULL); len++)
|
||||
;
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
static void charsubUTF8(char *dest, char const *src, uint32_t pos)
|
||||
{
|
||||
size_t charLen = 1;
|
||||
|
||||
if (pos < 1) {
|
||||
warning(WARNING_BUILTIN_ARG, "CHARSUB: Position starts at 1\n");
|
||||
pos = 1;
|
||||
}
|
||||
|
||||
/* Advance to starting position in source string. */
|
||||
for (uint32_t curPos = 1; charLen && curPos < pos; curPos++)
|
||||
charLen = charmap_ConvertNext(&src, NULL);
|
||||
|
||||
char const *start = src;
|
||||
|
||||
if (!charmap_ConvertNext(&src, NULL))
|
||||
warning(WARNING_BUILTIN_ARG,
|
||||
"CHARSUB: Position %lu is past the end of the string\n",
|
||||
(unsigned long)pos);
|
||||
|
||||
/* Copy from source to destination. */
|
||||
memcpy(dest, start, src - start);
|
||||
|
||||
dest[src - start] = '\0';
|
||||
}
|
||||
|
||||
static void strrpl(char *dest, size_t destLen, char const *src, char const *old, char const *new)
|
||||
{
|
||||
size_t oldLen = strlen(old);
|
||||
@@ -503,6 +535,9 @@ enum {
|
||||
%token T_OP_STRRPL "STRRPL"
|
||||
%token T_OP_STRFMT "STRFMT"
|
||||
|
||||
%token T_OP_CHARLEN "CHARLEN"
|
||||
%token T_OP_CHARSUB "CHARSUB"
|
||||
|
||||
%token <tzSym> T_LABEL "label"
|
||||
%token <tzSym> T_ID "identifier"
|
||||
%token <tzSym> T_LOCAL_ID "local identifier"
|
||||
@@ -1451,6 +1486,9 @@ relocexpr_no_str : scoped_anon_id { rpn_Symbol(&$$, $1); }
|
||||
| T_OP_STRLEN T_LPAREN string T_RPAREN {
|
||||
rpn_Number(&$$, strlenUTF8($3));
|
||||
}
|
||||
| T_OP_CHARLEN T_LPAREN string T_RPAREN {
|
||||
rpn_Number(&$$, charlenUTF8($3));
|
||||
}
|
||||
| T_LPAREN relocexpr T_RPAREN { $$ = $2; }
|
||||
;
|
||||
|
||||
@@ -1488,6 +1526,9 @@ string : T_STRING
|
||||
| T_OP_STRSUB T_LPAREN string T_COMMA uconst T_COMMA uconst T_RPAREN {
|
||||
strsubUTF8($$, sizeof($$), $3, $5, $7);
|
||||
}
|
||||
| T_OP_CHARSUB T_LPAREN string T_COMMA uconst T_RPAREN {
|
||||
charsubUTF8($$, $3, $5);
|
||||
}
|
||||
| T_OP_STRCAT T_LPAREN T_RPAREN {
|
||||
$$[0] = '\0';
|
||||
}
|
||||
|
||||
@@ -394,11 +394,13 @@ Most of them return a string, however some of these functions actually return an
|
||||
.It Fn STRCMP str1 str2 Ta Returns -1 if Ar str1 No is alphabetically lower than Ar str2 No , zero if they match, 1 if Ar str1 No is greater than Ar str2 .
|
||||
.It Fn STRIN str1 str2 Ta Returns the first position of Ar str2 No in Ar str1 No or zero if it's not present Pq first character is position 1 .
|
||||
.It Fn STRRIN str1 str2 Ta Returns the last position of Ar str2 No in Ar str1 No or zero if it's not present Pq first character is position 1 .
|
||||
.It Fn STRSUB str pos len Ta Returns a substring from Ar str No starting at Ar pos Po first character is position 1 Pc and Ar len No characters long.
|
||||
.It Fn STRSUB str pos len Ta Returns a substring from Ar str No starting at Ar pos No (first character is position 1) and Ar len No characters long.
|
||||
.It Fn STRUPR str Ta Returns Ar str No with all letters in uppercase.
|
||||
.It Fn STRLWR str Ta Returns Ar str No with all letters in lowercase.
|
||||
.It Fn STRRPL str old new Ta Returns Ar str No with each non-overlapping occurrence of the substring Ar old No replaced with Ar new .
|
||||
.It Fn STRFMT fmt args... Ta Returns the string Ar fmt No with each
|
||||
.It Fn CHARLEN str Ta Returns the number of charmap entries in Ar str No with the current charmap.
|
||||
.It Fn CHARSUB str pos Ta Returns the substring for the charmap entry at Ar pos No in Ar str No (first character is position 1) with the current charmap.
|
||||
.Ql %spec
|
||||
pattern replaced by interpolating the format
|
||||
.Ar spec
|
||||
|
||||
@@ -67,6 +67,7 @@ size_t readUTF8Char(uint8_t *dest, char const *src)
|
||||
if (decode(&state, &codep, src[i]) == 1)
|
||||
return 0;
|
||||
|
||||
if (dest)
|
||||
dest[i] = src[i];
|
||||
i++;
|
||||
|
||||
|
||||
25
test/asm/charlen-charsub.asm
Normal file
25
test/asm/charlen-charsub.asm
Normal file
@@ -0,0 +1,25 @@
|
||||
charmap "<NULL>", $00
|
||||
charmap "A", $10
|
||||
charmap "B", $20
|
||||
charmap "C", $30
|
||||
charmap "Bold", $88
|
||||
|
||||
SECTION "test", ROM0
|
||||
|
||||
S EQUS "XBold<NULL>ABC"
|
||||
|
||||
assert CHARLEN("{S}") == 6
|
||||
println CHARSUB("{S}", 2)
|
||||
assert !STRCMP(CHARSUB("{S}", 2), "Bold")
|
||||
assert CHARSUB("{S}", 2) == "Bold" && "Bold" == $88
|
||||
assert CHARSUB("{S}", 1) == $58 ; ASCII "X"
|
||||
db "{S}"
|
||||
|
||||
newcharmap ascii
|
||||
|
||||
assert CHARLEN("{S}") == 14
|
||||
println CHARSUB("{S}", 2)
|
||||
assert !STRCMP(CHARSUB("{S}", 2), "B")
|
||||
assert CHARSUB("{S}", 2) == "B" && "B" == $42 ; ASCII "B"
|
||||
assert CHARSUB("{S}", 1) == $58 ; ASCII "X"
|
||||
db "{S}"
|
||||
0
test/asm/charlen-charsub.err
Normal file
0
test/asm/charlen-charsub.err
Normal file
2
test/asm/charlen-charsub.out
Normal file
2
test/asm/charlen-charsub.out
Normal file
@@ -0,0 +1,2 @@
|
||||
Bold
|
||||
B
|
||||
BIN
test/asm/charlen-charsub.out.bin
Normal file
BIN
test/asm/charlen-charsub.out.bin
Normal file
Binary file not shown.
Reference in New Issue
Block a user