mirror of
https://github.com/gbdev/rgbds.git
synced 2025-11-20 10:12:06 +00:00
Merge pull request #351 from dbrotz/fix-strsub-strlen
Use code points instead of bytes for STRSUB/STRLEN
This commit is contained in:
@@ -26,6 +26,8 @@
|
||||
#include "asm/rpn.h"
|
||||
#include "asm/symbol.h"
|
||||
|
||||
#include "extern/utf8decoder.h"
|
||||
|
||||
#include "common.h"
|
||||
#include "linkdefs.h"
|
||||
|
||||
@@ -431,6 +433,85 @@ static void updateUnion(void)
|
||||
pPCSymbol->nValue = unionStart[unionIndex];
|
||||
}
|
||||
|
||||
static size_t strlenUTF8(const char *s)
|
||||
{
|
||||
size_t len = 0;
|
||||
uint32_t state = 0;
|
||||
uint32_t codep = 0;
|
||||
|
||||
while (*s) {
|
||||
switch (decode(&state, &codep, (uint8_t)*s)) {
|
||||
case 1:
|
||||
fatalerror("STRLEN: Invalid UTF-8 character");
|
||||
break;
|
||||
case 0:
|
||||
len++;
|
||||
break;
|
||||
}
|
||||
s++;
|
||||
}
|
||||
|
||||
/* Check for partial code point. */
|
||||
if (state != 0)
|
||||
fatalerror("STRLEN: Invalid UTF-8 character");
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
static void strsubUTF8(char *dest, const char *src, uint32_t pos, uint32_t len)
|
||||
{
|
||||
size_t srcIndex = 0;
|
||||
size_t destIndex = 0;
|
||||
uint32_t state = 0;
|
||||
uint32_t codep = 0;
|
||||
uint32_t curPos = 1;
|
||||
uint32_t curLen = 0;
|
||||
|
||||
if (pos < 1) {
|
||||
warning("STRSUB: Position starts at 1");
|
||||
pos = 1;
|
||||
}
|
||||
|
||||
/* Advance to starting position in source string. */
|
||||
while (src[srcIndex] && curPos < pos) {
|
||||
switch (decode(&state, &codep, (uint8_t)src[srcIndex])) {
|
||||
case 1:
|
||||
fatalerror("STRSUB: Invalid UTF-8 character");
|
||||
break;
|
||||
case 0:
|
||||
curPos++;
|
||||
break;
|
||||
}
|
||||
srcIndex++;
|
||||
}
|
||||
|
||||
if (!src[srcIndex])
|
||||
warning("STRSUB: Position %lu is past the end of the string",
|
||||
(unsigned long)pos);
|
||||
|
||||
/* Copy from source to destination. */
|
||||
while (src[srcIndex] && destIndex < MAXSTRLEN && curLen < len) {
|
||||
switch (decode(&state, &codep, (uint8_t)src[srcIndex])) {
|
||||
case 1:
|
||||
fatalerror("STRSUB: Invalid UTF-8 character");
|
||||
break;
|
||||
case 0:
|
||||
curLen++;
|
||||
break;
|
||||
}
|
||||
dest[destIndex++] = src[srcIndex++];
|
||||
}
|
||||
|
||||
if (curLen < len)
|
||||
warning("STRSUB: Length too big: %lu", (unsigned long)len);
|
||||
|
||||
/* Check for partial code point. */
|
||||
if (state != 0)
|
||||
fatalerror("STRSUB: Invalid UTF-8 character");
|
||||
|
||||
dest[destIndex] = 0;
|
||||
}
|
||||
|
||||
%}
|
||||
|
||||
%union
|
||||
@@ -1249,7 +1330,7 @@ relocconst : T_ID
|
||||
else
|
||||
rpn_Number(&$$, 0);
|
||||
}
|
||||
| T_OP_STRLEN '(' string ')' { rpn_Number(&$$, strlen($3)); }
|
||||
| T_OP_STRLEN '(' string ')' { rpn_Number(&$$, strlenUTF8($3)); }
|
||||
| '(' relocconst ')' { $$ = $2; }
|
||||
;
|
||||
|
||||
@@ -1327,7 +1408,7 @@ const : T_ID { constexpr_Symbol(&$$, $1); }
|
||||
else
|
||||
constexpr_Number(&$$, 0);
|
||||
}
|
||||
| T_OP_STRLEN '(' string ')' { constexpr_Number(&$$, strlen($3)); }
|
||||
| T_OP_STRLEN '(' string ')' { constexpr_Number(&$$, strlenUTF8($3)); }
|
||||
| '(' const ')' { $$ = $2; }
|
||||
;
|
||||
|
||||
@@ -1338,14 +1419,7 @@ string : T_STRING
|
||||
}
|
||||
| T_OP_STRSUB '(' string comma uconst comma uconst ')'
|
||||
{
|
||||
uint32_t len = $7;
|
||||
if (len > MAXSTRLEN) {
|
||||
warning("STRSUB: Length too big: %u", len);
|
||||
len = MAXSTRLEN;
|
||||
}
|
||||
|
||||
if (snprintf($$, len + 1, "%s", $3 + $5 - 1) > MAXSTRLEN)
|
||||
warning("STRSUB: String too long '%s'", $$);
|
||||
strsubUTF8($$, $3, $5, $7);
|
||||
}
|
||||
| T_OP_STRCAT '(' string comma string ')'
|
||||
{
|
||||
|
||||
9
test/asm/strlen.asm
Normal file
9
test/asm/strlen.asm
Normal file
@@ -0,0 +1,9 @@
|
||||
SECTION "sec", ROM0
|
||||
|
||||
xstrlen: MACRO
|
||||
PRINTV STRLEN(\1)
|
||||
PRINTT "\n"
|
||||
ENDM
|
||||
|
||||
xstrlen "ABC"
|
||||
xstrlen "カタカナ"
|
||||
2
test/asm/strlen.out
Normal file
2
test/asm/strlen.out
Normal file
@@ -0,0 +1,2 @@
|
||||
$3
|
||||
$4
|
||||
22
test/asm/strsub.asm
Normal file
22
test/asm/strsub.asm
Normal file
@@ -0,0 +1,22 @@
|
||||
SECTION "sec", ROM0
|
||||
|
||||
xstrsub: MACRO
|
||||
PRINTT STRSUB(\1, \2, \3)
|
||||
PRINTT "\n"
|
||||
ENDM
|
||||
|
||||
xstrsub "ABC", 1, 1
|
||||
xstrsub "ABC", 2, 1
|
||||
xstrsub "ABC", 3, 1
|
||||
xstrsub "ABC", 1, 2
|
||||
xstrsub "ABC", 2, 2
|
||||
xstrsub "ABC", 2, 32
|
||||
xstrsub "ABC", 2, 300
|
||||
xstrsub "ABC", 0, 300
|
||||
xstrsub "ABC", 4, 0
|
||||
xstrsub "ABC", 4, 1
|
||||
xstrsub "カタカナ", 1, 2
|
||||
xstrsub "カタカナ", 3, 2
|
||||
xstrsub "カタカナ", 3, 10
|
||||
xstrsub "g̈", 1, 1
|
||||
xstrsub "g̈", 1, 2
|
||||
31
test/asm/strsub.out
Normal file
31
test/asm/strsub.out
Normal file
@@ -0,0 +1,31 @@
|
||||
warning: strsub.asm(13) -> xstrsub(1):
|
||||
STRSUB: Length too big: 32
|
||||
warning: strsub.asm(14) -> xstrsub(1):
|
||||
STRSUB: Length too big: 300
|
||||
warning: strsub.asm(15) -> xstrsub(1):
|
||||
STRSUB: Position starts at 1
|
||||
warning: strsub.asm(15) -> xstrsub(1):
|
||||
STRSUB: Length too big: 300
|
||||
warning: strsub.asm(16) -> xstrsub(1):
|
||||
STRSUB: Position 4 is past the end of the string
|
||||
warning: strsub.asm(17) -> xstrsub(1):
|
||||
STRSUB: Position 4 is past the end of the string
|
||||
warning: strsub.asm(17) -> xstrsub(1):
|
||||
STRSUB: Length too big: 1
|
||||
warning: strsub.asm(20) -> xstrsub(1):
|
||||
STRSUB: Length too big: 10
|
||||
A
|
||||
B
|
||||
C
|
||||
AB
|
||||
BC
|
||||
BC
|
||||
BC
|
||||
ABC
|
||||
|
||||
|
||||
カタ
|
||||
カナ
|
||||
カナ
|
||||
g
|
||||
g̈
|
||||
Reference in New Issue
Block a user