mirror of
https://github.com/gbdev/rgbds.git
synced 2025-11-20 10:12:06 +00:00
@@ -18,5 +18,6 @@ void charmap_Push(void);
|
|||||||
void charmap_Pop(void);
|
void charmap_Pop(void);
|
||||||
void charmap_Add(char *mapping, uint8_t value);
|
void charmap_Add(char *mapping, uint8_t value);
|
||||||
size_t charmap_Convert(char const *input, uint8_t *output);
|
size_t charmap_Convert(char const *input, uint8_t *output);
|
||||||
|
size_t charmap_ConvertNext(char const **input, uint8_t *output);
|
||||||
|
|
||||||
#endif /* RGBDS_ASM_CHARMAP_H */
|
#endif /* RGBDS_ASM_CHARMAP_H */
|
||||||
|
|||||||
@@ -57,7 +57,7 @@ struct CharmapStackEntry {
|
|||||||
|
|
||||||
struct CharmapStackEntry *charmapStack;
|
struct CharmapStackEntry *charmapStack;
|
||||||
|
|
||||||
static struct Charmap *charmap_Get(const char *name)
|
static struct Charmap *charmap_Get(char const *name)
|
||||||
{
|
{
|
||||||
return hash_GetElement(charmaps, name);
|
return hash_GetElement(charmaps, name);
|
||||||
}
|
}
|
||||||
@@ -192,6 +192,19 @@ void charmap_Add(char *mapping, uint8_t value)
|
|||||||
}
|
}
|
||||||
|
|
||||||
size_t charmap_Convert(char const *input, uint8_t *output)
|
size_t charmap_Convert(char const *input, uint8_t *output)
|
||||||
|
{
|
||||||
|
size_t outputLen = 0;
|
||||||
|
|
||||||
|
for (size_t charLen = charmap_ConvertNext(&input, output); charLen;
|
||||||
|
charLen = charmap_ConvertNext(&input, output)) {
|
||||||
|
output += charLen;
|
||||||
|
outputLen += charLen;
|
||||||
|
}
|
||||||
|
|
||||||
|
return outputLen;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t charmap_ConvertNext(char const **input, uint8_t *output)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* The goal is to match the longest mapping possible.
|
* The goal is to match the longest mapping possible.
|
||||||
@@ -199,7 +212,6 @@ size_t charmap_Convert(char const *input, uint8_t *output)
|
|||||||
* If that would lead to a dead end, rewind characters until the last match, and output.
|
* If that would lead to a dead end, rewind characters until the last match, and output.
|
||||||
* If no match, read a UTF-8 codepoint and output that.
|
* If no match, read a UTF-8 codepoint and output that.
|
||||||
*/
|
*/
|
||||||
size_t outputLen = 0;
|
|
||||||
struct Charmap const *charmap = *currentCharmap;
|
struct Charmap const *charmap = *currentCharmap;
|
||||||
struct Charnode const *node = &charmap->nodes[0];
|
struct Charnode const *node = &charmap->nodes[0];
|
||||||
struct Charnode const *match = NULL;
|
struct Charnode const *match = NULL;
|
||||||
@@ -207,10 +219,10 @@ size_t charmap_Convert(char const *input, uint8_t *output)
|
|||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
/* We still want NULs to reach the `else` path, to give a chance to rewind */
|
/* We still want NULs to reach the `else` path, to give a chance to rewind */
|
||||||
uint8_t c = *input - 1;
|
uint8_t c = **input - 1;
|
||||||
|
|
||||||
if (*input && node->next[c]) {
|
if (**input && node->next[c]) {
|
||||||
input++; /* Consume that char */
|
(*input)++; /* Consume that char */
|
||||||
rewindDistance++;
|
rewindDistance++;
|
||||||
|
|
||||||
node = &charmap->nodes[node->next[c]];
|
node = &charmap->nodes[node->next[c]];
|
||||||
@@ -220,31 +232,32 @@ size_t charmap_Convert(char const *input, uint8_t *output)
|
|||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
input -= rewindDistance; /* Rewind */
|
*input -= rewindDistance; /* Rewind */
|
||||||
rewindDistance = 0;
|
rewindDistance = 0;
|
||||||
node = &charmap->nodes[0];
|
node = &charmap->nodes[0];
|
||||||
|
|
||||||
if (match) { /* Arrived at a dead end with a match found */
|
if (match) { /* Arrived at a dead end with a match found */
|
||||||
*output++ = match->value;
|
if (output)
|
||||||
outputLen++;
|
*output = match->value;
|
||||||
match = NULL; /* Reset match for next round */
|
|
||||||
|
|
||||||
} else if (*input) { /* No match found */
|
return 1;
|
||||||
size_t codepointLen = readUTF8Char(output, input);
|
|
||||||
|
|
||||||
if (codepointLen == 0) {
|
} else if (**input) { /* No match found */
|
||||||
|
size_t codepointLen = readUTF8Char(output, *input);
|
||||||
|
|
||||||
|
if (codepointLen == 0)
|
||||||
error("Input string is not valid UTF-8!\n");
|
error("Input string is not valid UTF-8!\n");
|
||||||
break;
|
|
||||||
}
|
|
||||||
input += codepointLen; /* OK because UTF-8 has no NUL in multi-byte chars */
|
|
||||||
output += codepointLen;
|
|
||||||
outputLen += codepointLen;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!*input)
|
/* OK because UTF-8 has no NUL in multi-byte chars */
|
||||||
break;
|
*input += codepointLen;
|
||||||
|
|
||||||
|
return codepointLen;
|
||||||
|
|
||||||
|
} else { /* End of input */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return outputLen;
|
unreachable_();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -210,6 +210,9 @@ static struct KeywordMapping {
|
|||||||
{"STRRPL", T_OP_STRRPL},
|
{"STRRPL", T_OP_STRRPL},
|
||||||
{"STRFMT", T_OP_STRFMT},
|
{"STRFMT", T_OP_STRFMT},
|
||||||
|
|
||||||
|
{"CHARLEN", T_OP_CHARLEN},
|
||||||
|
{"CHARSUB", T_OP_CHARSUB},
|
||||||
|
|
||||||
{"INCLUDE", T_POP_INCLUDE},
|
{"INCLUDE", T_POP_INCLUDE},
|
||||||
{"PRINT", T_POP_PRINT},
|
{"PRINT", T_POP_PRINT},
|
||||||
{"PRINTLN", T_POP_PRINTLN},
|
{"PRINTLN", T_POP_PRINTLN},
|
||||||
@@ -589,7 +592,7 @@ struct KeywordDictNode {
|
|||||||
uint16_t children[0x60 - ' '];
|
uint16_t children[0x60 - ' '];
|
||||||
struct KeywordMapping const *keyword;
|
struct KeywordMapping const *keyword;
|
||||||
/* Since the keyword structure is invariant, the min number of nodes is known at compile time */
|
/* Since the keyword structure is invariant, the min number of nodes is known at compile time */
|
||||||
} keywordDict[351] = {0}; /* Make sure to keep this correct when adding keywords! */
|
} keywordDict[357] = {0}; /* Make sure to keep this correct when adding keywords! */
|
||||||
|
|
||||||
/* Convert a char into its index into the dict */
|
/* Convert a char into its index into the dict */
|
||||||
static uint8_t dictIndex(char c)
|
static uint8_t dictIndex(char c)
|
||||||
|
|||||||
@@ -82,13 +82,12 @@ static char *strrstr(char *s1, char *s2)
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t strlenUTF8(const char *s)
|
static size_t strlenUTF8(char const *s)
|
||||||
{
|
{
|
||||||
size_t len = 0;
|
size_t len = 0;
|
||||||
uint32_t state = 0;
|
uint32_t state = 0;
|
||||||
uint32_t codep = 0;
|
|
||||||
|
|
||||||
while (*s) {
|
for (uint32_t codep = 0; *s; s++) {
|
||||||
switch (decode(&state, &codep, *s)) {
|
switch (decode(&state, &codep, *s)) {
|
||||||
case 1:
|
case 1:
|
||||||
fatalerror("STRLEN: Invalid UTF-8 character\n");
|
fatalerror("STRLEN: Invalid UTF-8 character\n");
|
||||||
@@ -97,7 +96,6 @@ static size_t strlenUTF8(const char *s)
|
|||||||
len++;
|
len++;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
s++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Check for partial code point. */
|
/* Check for partial code point. */
|
||||||
@@ -107,13 +105,12 @@ static size_t strlenUTF8(const char *s)
|
|||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void strsubUTF8(char *dest, size_t destLen, const char *src, uint32_t pos, uint32_t len)
|
static void strsubUTF8(char *dest, size_t destLen, char const *src, uint32_t pos, uint32_t len)
|
||||||
{
|
{
|
||||||
size_t srcIndex = 0;
|
size_t srcIndex = 0;
|
||||||
size_t destIndex = 0;
|
size_t destIndex = 0;
|
||||||
uint32_t state = 0;
|
uint32_t state = 0;
|
||||||
uint32_t codep = 0;
|
uint32_t codep = 0;
|
||||||
uint32_t curPos = 1;
|
|
||||||
uint32_t curLen = 0;
|
uint32_t curLen = 0;
|
||||||
|
|
||||||
if (pos < 1) {
|
if (pos < 1) {
|
||||||
@@ -122,7 +119,7 @@ static void strsubUTF8(char *dest, size_t destLen, const char *src, uint32_t pos
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Advance to starting position in source string. */
|
/* Advance to starting position in source string. */
|
||||||
while (src[srcIndex] && curPos < pos) {
|
for (uint32_t curPos = 1; src[srcIndex] && curPos < pos; srcIndex++) {
|
||||||
switch (decode(&state, &codep, src[srcIndex])) {
|
switch (decode(&state, &codep, src[srcIndex])) {
|
||||||
case 1:
|
case 1:
|
||||||
fatalerror("STRSUB: Invalid UTF-8 character\n");
|
fatalerror("STRSUB: Invalid UTF-8 character\n");
|
||||||
@@ -131,7 +128,6 @@ static void strsubUTF8(char *dest, size_t destLen, const char *src, uint32_t pos
|
|||||||
curPos++;
|
curPos++;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
srcIndex++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!src[srcIndex] && len)
|
if (!src[srcIndex] && len)
|
||||||
@@ -162,6 +158,42 @@ static void strsubUTF8(char *dest, size_t destLen, const char *src, uint32_t pos
|
|||||||
dest[destIndex] = '\0';
|
dest[destIndex] = '\0';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static size_t charlenUTF8(char const *s)
|
||||||
|
{
|
||||||
|
size_t len;
|
||||||
|
|
||||||
|
for (len = 0; charmap_ConvertNext(&s, NULL); len++)
|
||||||
|
;
|
||||||
|
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void charsubUTF8(char *dest, char const *src, uint32_t pos)
|
||||||
|
{
|
||||||
|
size_t charLen = 1;
|
||||||
|
|
||||||
|
if (pos < 1) {
|
||||||
|
warning(WARNING_BUILTIN_ARG, "CHARSUB: Position starts at 1\n");
|
||||||
|
pos = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Advance to starting position in source string. */
|
||||||
|
for (uint32_t curPos = 1; charLen && curPos < pos; curPos++)
|
||||||
|
charLen = charmap_ConvertNext(&src, NULL);
|
||||||
|
|
||||||
|
char const *start = src;
|
||||||
|
|
||||||
|
if (!charmap_ConvertNext(&src, NULL))
|
||||||
|
warning(WARNING_BUILTIN_ARG,
|
||||||
|
"CHARSUB: Position %lu is past the end of the string\n",
|
||||||
|
(unsigned long)pos);
|
||||||
|
|
||||||
|
/* Copy from source to destination. */
|
||||||
|
memcpy(dest, start, src - start);
|
||||||
|
|
||||||
|
dest[src - start] = '\0';
|
||||||
|
}
|
||||||
|
|
||||||
static void strrpl(char *dest, size_t destLen, char const *src, char const *old, char const *new)
|
static void strrpl(char *dest, size_t destLen, char const *src, char const *old, char const *new)
|
||||||
{
|
{
|
||||||
size_t oldLen = strlen(old);
|
size_t oldLen = strlen(old);
|
||||||
@@ -503,6 +535,9 @@ enum {
|
|||||||
%token T_OP_STRRPL "STRRPL"
|
%token T_OP_STRRPL "STRRPL"
|
||||||
%token T_OP_STRFMT "STRFMT"
|
%token T_OP_STRFMT "STRFMT"
|
||||||
|
|
||||||
|
%token T_OP_CHARLEN "CHARLEN"
|
||||||
|
%token T_OP_CHARSUB "CHARSUB"
|
||||||
|
|
||||||
%token <tzSym> T_LABEL "label"
|
%token <tzSym> T_LABEL "label"
|
||||||
%token <tzSym> T_ID "identifier"
|
%token <tzSym> T_ID "identifier"
|
||||||
%token <tzSym> T_LOCAL_ID "local identifier"
|
%token <tzSym> T_LOCAL_ID "local identifier"
|
||||||
@@ -1451,6 +1486,9 @@ relocexpr_no_str : scoped_anon_id { rpn_Symbol(&$$, $1); }
|
|||||||
| T_OP_STRLEN T_LPAREN string T_RPAREN {
|
| T_OP_STRLEN T_LPAREN string T_RPAREN {
|
||||||
rpn_Number(&$$, strlenUTF8($3));
|
rpn_Number(&$$, strlenUTF8($3));
|
||||||
}
|
}
|
||||||
|
| T_OP_CHARLEN T_LPAREN string T_RPAREN {
|
||||||
|
rpn_Number(&$$, charlenUTF8($3));
|
||||||
|
}
|
||||||
| T_LPAREN relocexpr T_RPAREN { $$ = $2; }
|
| T_LPAREN relocexpr T_RPAREN { $$ = $2; }
|
||||||
;
|
;
|
||||||
|
|
||||||
@@ -1488,6 +1526,9 @@ string : T_STRING
|
|||||||
| T_OP_STRSUB T_LPAREN string T_COMMA uconst T_COMMA uconst T_RPAREN {
|
| T_OP_STRSUB T_LPAREN string T_COMMA uconst T_COMMA uconst T_RPAREN {
|
||||||
strsubUTF8($$, sizeof($$), $3, $5, $7);
|
strsubUTF8($$, sizeof($$), $3, $5, $7);
|
||||||
}
|
}
|
||||||
|
| T_OP_CHARSUB T_LPAREN string T_COMMA uconst T_RPAREN {
|
||||||
|
charsubUTF8($$, $3, $5);
|
||||||
|
}
|
||||||
| T_OP_STRCAT T_LPAREN T_RPAREN {
|
| T_OP_STRCAT T_LPAREN T_RPAREN {
|
||||||
$$[0] = '\0';
|
$$[0] = '\0';
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -394,11 +394,13 @@ Most of them return a string, however some of these functions actually return an
|
|||||||
.It Fn STRCMP str1 str2 Ta Returns -1 if Ar str1 No is alphabetically lower than Ar str2 No , zero if they match, 1 if Ar str1 No is greater than Ar str2 .
|
.It Fn STRCMP str1 str2 Ta Returns -1 if Ar str1 No is alphabetically lower than Ar str2 No , zero if they match, 1 if Ar str1 No is greater than Ar str2 .
|
||||||
.It Fn STRIN str1 str2 Ta Returns the first position of Ar str2 No in Ar str1 No or zero if it's not present Pq first character is position 1 .
|
.It Fn STRIN str1 str2 Ta Returns the first position of Ar str2 No in Ar str1 No or zero if it's not present Pq first character is position 1 .
|
||||||
.It Fn STRRIN str1 str2 Ta Returns the last position of Ar str2 No in Ar str1 No or zero if it's not present Pq first character is position 1 .
|
.It Fn STRRIN str1 str2 Ta Returns the last position of Ar str2 No in Ar str1 No or zero if it's not present Pq first character is position 1 .
|
||||||
.It Fn STRSUB str pos len Ta Returns a substring from Ar str No starting at Ar pos Po first character is position 1 Pc and Ar len No characters long.
|
.It Fn STRSUB str pos len Ta Returns a substring from Ar str No starting at Ar pos No (first character is position 1) and Ar len No characters long.
|
||||||
.It Fn STRUPR str Ta Returns Ar str No with all letters in uppercase.
|
.It Fn STRUPR str Ta Returns Ar str No with all letters in uppercase.
|
||||||
.It Fn STRLWR str Ta Returns Ar str No with all letters in lowercase.
|
.It Fn STRLWR str Ta Returns Ar str No with all letters in lowercase.
|
||||||
.It Fn STRRPL str old new Ta Returns Ar str No with each non-overlapping occurrence of the substring Ar old No replaced with Ar new .
|
.It Fn STRRPL str old new Ta Returns Ar str No with each non-overlapping occurrence of the substring Ar old No replaced with Ar new .
|
||||||
.It Fn STRFMT fmt args... Ta Returns the string Ar fmt No with each
|
.It Fn STRFMT fmt args... Ta Returns the string Ar fmt No with each
|
||||||
|
.It Fn CHARLEN str Ta Returns the number of charmap entries in Ar str No with the current charmap.
|
||||||
|
.It Fn CHARSUB str pos Ta Returns the substring for the charmap entry at Ar pos No in Ar str No (first character is position 1) with the current charmap.
|
||||||
.Ql %spec
|
.Ql %spec
|
||||||
pattern replaced by interpolating the format
|
pattern replaced by interpolating the format
|
||||||
.Ar spec
|
.Ar spec
|
||||||
|
|||||||
@@ -67,6 +67,7 @@ size_t readUTF8Char(uint8_t *dest, char const *src)
|
|||||||
if (decode(&state, &codep, src[i]) == 1)
|
if (decode(&state, &codep, src[i]) == 1)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
if (dest)
|
||||||
dest[i] = src[i];
|
dest[i] = src[i];
|
||||||
i++;
|
i++;
|
||||||
|
|
||||||
|
|||||||
25
test/asm/charlen-charsub.asm
Normal file
25
test/asm/charlen-charsub.asm
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
charmap "<NULL>", $00
|
||||||
|
charmap "A", $10
|
||||||
|
charmap "B", $20
|
||||||
|
charmap "C", $30
|
||||||
|
charmap "Bold", $88
|
||||||
|
|
||||||
|
SECTION "test", ROM0
|
||||||
|
|
||||||
|
S EQUS "XBold<NULL>ABC"
|
||||||
|
|
||||||
|
assert CHARLEN("{S}") == 6
|
||||||
|
println CHARSUB("{S}", 2)
|
||||||
|
assert !STRCMP(CHARSUB("{S}", 2), "Bold")
|
||||||
|
assert CHARSUB("{S}", 2) == "Bold" && "Bold" == $88
|
||||||
|
assert CHARSUB("{S}", 1) == $58 ; ASCII "X"
|
||||||
|
db "{S}"
|
||||||
|
|
||||||
|
newcharmap ascii
|
||||||
|
|
||||||
|
assert CHARLEN("{S}") == 14
|
||||||
|
println CHARSUB("{S}", 2)
|
||||||
|
assert !STRCMP(CHARSUB("{S}", 2), "B")
|
||||||
|
assert CHARSUB("{S}", 2) == "B" && "B" == $42 ; ASCII "B"
|
||||||
|
assert CHARSUB("{S}", 1) == $58 ; ASCII "X"
|
||||||
|
db "{S}"
|
||||||
0
test/asm/charlen-charsub.err
Normal file
0
test/asm/charlen-charsub.err
Normal file
2
test/asm/charlen-charsub.out
Normal file
2
test/asm/charlen-charsub.out
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
Bold
|
||||||
|
B
|
||||||
BIN
test/asm/charlen-charsub.out.bin
Normal file
BIN
test/asm/charlen-charsub.out.bin
Normal file
Binary file not shown.
Reference in New Issue
Block a user