mirror of
https://github.com/gbdev/rgbds.git
synced 2025-11-20 10:12:06 +00:00
Fix STRLEN and STRSUB on incomplete UTF-8 (#1633)
This commit is contained in:
@@ -72,7 +72,7 @@
|
|||||||
|
|
||||||
static uint32_t strToNum(std::vector<int32_t> const &s);
|
static uint32_t strToNum(std::vector<int32_t> const &s);
|
||||||
static void errorInvalidUTF8Byte(uint8_t byte, char const *functionName);
|
static void errorInvalidUTF8Byte(uint8_t byte, char const *functionName);
|
||||||
static size_t strlenUTF8(std::string const &str);
|
static size_t strlenUTF8(std::string const &str, bool printErrors);
|
||||||
static std::string strsubUTF8(std::string const &str, uint32_t pos, uint32_t len);
|
static std::string strsubUTF8(std::string const &str, uint32_t pos, uint32_t len);
|
||||||
static size_t charlenUTF8(std::string const &str);
|
static size_t charlenUTF8(std::string const &str);
|
||||||
static std::string charsubUTF8(std::string const &str, uint32_t pos);
|
static std::string charsubUTF8(std::string const &str, uint32_t pos);
|
||||||
@@ -1517,7 +1517,7 @@ relocexpr_no_str:
|
|||||||
$$.makeNumber(pos != std::string::npos ? pos + 1 : 0);
|
$$.makeNumber(pos != std::string::npos ? pos + 1 : 0);
|
||||||
}
|
}
|
||||||
| OP_STRLEN LPAREN string RPAREN {
|
| OP_STRLEN LPAREN string RPAREN {
|
||||||
$$.makeNumber(strlenUTF8($3));
|
$$.makeNumber(strlenUTF8($3, true));
|
||||||
}
|
}
|
||||||
| OP_CHARLEN LPAREN string RPAREN {
|
| OP_CHARLEN LPAREN string RPAREN {
|
||||||
$$.makeNumber(charlenUTF8($3));
|
$$.makeNumber(charlenUTF8($3));
|
||||||
@@ -1569,13 +1569,13 @@ string:
|
|||||||
$$ = std::move($1);
|
$$ = std::move($1);
|
||||||
}
|
}
|
||||||
| OP_STRSUB LPAREN string COMMA iconst COMMA uconst RPAREN {
|
| OP_STRSUB LPAREN string COMMA iconst COMMA uconst RPAREN {
|
||||||
size_t len = strlenUTF8($3);
|
size_t len = strlenUTF8($3, false);
|
||||||
uint32_t pos = adjustNegativePos($5, len, "STRSUB");
|
uint32_t pos = adjustNegativePos($5, len, "STRSUB");
|
||||||
|
|
||||||
$$ = strsubUTF8($3, pos, $7);
|
$$ = strsubUTF8($3, pos, $7);
|
||||||
}
|
}
|
||||||
| OP_STRSUB LPAREN string COMMA iconst RPAREN {
|
| OP_STRSUB LPAREN string COMMA iconst RPAREN {
|
||||||
size_t len = strlenUTF8($3);
|
size_t len = strlenUTF8($3, false);
|
||||||
uint32_t pos = adjustNegativePos($5, len, "STRSUB");
|
uint32_t pos = adjustNegativePos($5, len, "STRSUB");
|
||||||
|
|
||||||
$$ = strsubUTF8($3, pos, pos > len ? 0 : len + 1 - pos);
|
$$ = strsubUTF8($3, pos, pos > len ? 0 : len + 1 - pos);
|
||||||
@@ -2522,7 +2522,7 @@ static void errorInvalidUTF8Byte(uint8_t byte, char const *functionName) {
|
|||||||
error("%s: Invalid UTF-8 byte 0x%02hhX\n", functionName, byte);
|
error("%s: Invalid UTF-8 byte 0x%02hhX\n", functionName, byte);
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t strlenUTF8(std::string const &str) {
|
static size_t strlenUTF8(std::string const &str, bool printErrors) {
|
||||||
char const *ptr = str.c_str();
|
char const *ptr = str.c_str();
|
||||||
size_t len = 0;
|
size_t len = 0;
|
||||||
uint32_t state = 0;
|
uint32_t state = 0;
|
||||||
@@ -2532,7 +2532,9 @@ static size_t strlenUTF8(std::string const &str) {
|
|||||||
|
|
||||||
switch (decode(&state, &codepoint, byte)) {
|
switch (decode(&state, &codepoint, byte)) {
|
||||||
case 1:
|
case 1:
|
||||||
errorInvalidUTF8Byte(byte, "STRLEN");
|
if (printErrors) {
|
||||||
|
errorInvalidUTF8Byte(byte, "STRLEN");
|
||||||
|
}
|
||||||
state = 0;
|
state = 0;
|
||||||
// fallthrough
|
// fallthrough
|
||||||
case 0:
|
case 0:
|
||||||
@@ -2543,7 +2545,10 @@ static size_t strlenUTF8(std::string const &str) {
|
|||||||
|
|
||||||
// Check for partial code point.
|
// Check for partial code point.
|
||||||
if (state != 0) {
|
if (state != 0) {
|
||||||
error("STRLEN: Incomplete UTF-8 character\n");
|
if (printErrors) {
|
||||||
|
error("STRLEN: Incomplete UTF-8 character\n");
|
||||||
|
}
|
||||||
|
len++;
|
||||||
}
|
}
|
||||||
|
|
||||||
return len;
|
return len;
|
||||||
@@ -2595,13 +2600,14 @@ static std::string strsubUTF8(std::string const &str, uint32_t pos, uint32_t len
|
|||||||
index++;
|
index++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (curLen < len) {
|
|
||||||
warning(WARNING_BUILTIN_ARG, "STRSUB: Length too big: %" PRIu32 "\n", len);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for partial code point.
|
// Check for partial code point.
|
||||||
if (state != 0) {
|
if (state != 0) {
|
||||||
error("STRSUB: Incomplete UTF-8 character\n");
|
error("STRSUB: Incomplete UTF-8 character\n");
|
||||||
|
curLen++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (curLen < len) {
|
||||||
|
warning(WARNING_BUILTIN_ARG, "STRSUB: Length too big: %" PRIu32 "\n", len);
|
||||||
}
|
}
|
||||||
|
|
||||||
return std::string(ptr + startIndex, ptr + index);
|
return std::string(ptr + startIndex, ptr + index);
|
||||||
|
|||||||
@@ -16,11 +16,11 @@ DEF invalid EQUS "aäb漢,a
|
|||||||
DEF n = STRLEN("{invalid}")
|
DEF n = STRLEN("{invalid}")
|
||||||
DEF copy EQUS STRSUB("{invalid}", 1)
|
DEF copy EQUS STRSUB("{invalid}", 1)
|
||||||
|
|
||||||
println "\"{invalid}\" == \"{copy}\" ({d:n})"
|
println "\"{#s:invalid}\" == \"{#s:copy}\" ({d:n})"
|
||||||
|
|
||||||
DEF mid1 EQUS STRSUB("{invalid}", 5, 2)
|
DEF mid1 EQUS STRSUB("{invalid}", 5, 2)
|
||||||
DEF mid2 EQUS STRSUB("{invalid}", 9, 1)
|
DEF mid2 EQUS STRSUB("{invalid}", 9, 1)
|
||||||
println "\"{mid2}{mid1}\""
|
println "\"{#s:mid2}{#s:mid1}\""
|
||||||
|
|
||||||
; characters:
|
; characters:
|
||||||
; 1: U+0041 A
|
; 1: U+0041 A
|
||||||
@@ -38,4 +38,18 @@ println "\"{#s:invalid}\": {d:n} == {d:r}"
|
|||||||
|
|
||||||
REDEF mid1 EQUS CHARSUB("{invalid}", 4)
|
REDEF mid1 EQUS CHARSUB("{invalid}", 4)
|
||||||
REDEF mid2 EQUS CHARSUB("{invalid}", 7)
|
REDEF mid2 EQUS CHARSUB("{invalid}", 7)
|
||||||
println "\"{mid2}{mid1}\""
|
println "\"{#s:mid2}{#s:mid1}\""
|
||||||
|
|
||||||
|
; characters:
|
||||||
|
; 1: U+0061 a
|
||||||
|
; 2: U+0062 b
|
||||||
|
; 3: U+0063 c
|
||||||
|
; 4: incomplete U+6F22 kanji (0xE6 0xBC without 0xA2)
|
||||||
|
REDEF invalid EQUS "abc<62><63>"
|
||||||
|
|
||||||
|
DEF n = STRLEN("{invalid}")
|
||||||
|
DEF r = CHARLEN("{invalid}")
|
||||||
|
println "\"{#s:invalid}\": {d:n} == {d:r}"
|
||||||
|
|
||||||
|
DEF final EQUS STRSUB("{invalid}", 4, 1)
|
||||||
|
println "\"{#s:invalid}\" ends \"{#s:final}\""
|
||||||
|
|||||||
@@ -6,14 +6,6 @@ error: invalid-utf-8-strings.asm(16):
|
|||||||
STRLEN: Invalid UTF-8 byte 0xF0
|
STRLEN: Invalid UTF-8 byte 0xF0
|
||||||
error: invalid-utf-8-strings.asm(16):
|
error: invalid-utf-8-strings.asm(16):
|
||||||
STRLEN: Invalid UTF-8 byte 0xA2
|
STRLEN: Invalid UTF-8 byte 0xA2
|
||||||
error: invalid-utf-8-strings.asm(17):
|
|
||||||
STRLEN: Invalid UTF-8 byte 0xA3
|
|
||||||
error: invalid-utf-8-strings.asm(17):
|
|
||||||
STRLEN: Invalid UTF-8 byte 0xA4
|
|
||||||
error: invalid-utf-8-strings.asm(17):
|
|
||||||
STRLEN: Invalid UTF-8 byte 0xF0
|
|
||||||
error: invalid-utf-8-strings.asm(17):
|
|
||||||
STRLEN: Invalid UTF-8 byte 0xA2
|
|
||||||
error: invalid-utf-8-strings.asm(17):
|
error: invalid-utf-8-strings.asm(17):
|
||||||
STRSUB: Invalid UTF-8 byte 0xA3
|
STRSUB: Invalid UTF-8 byte 0xA3
|
||||||
error: invalid-utf-8-strings.asm(17):
|
error: invalid-utf-8-strings.asm(17):
|
||||||
@@ -22,22 +14,6 @@ error: invalid-utf-8-strings.asm(17):
|
|||||||
STRSUB: Invalid UTF-8 byte 0xF0
|
STRSUB: Invalid UTF-8 byte 0xF0
|
||||||
error: invalid-utf-8-strings.asm(17):
|
error: invalid-utf-8-strings.asm(17):
|
||||||
STRSUB: Invalid UTF-8 byte 0xA2
|
STRSUB: Invalid UTF-8 byte 0xA2
|
||||||
error: invalid-utf-8-strings.asm(21):
|
|
||||||
STRLEN: Invalid UTF-8 byte 0xA3
|
|
||||||
error: invalid-utf-8-strings.asm(21):
|
|
||||||
STRLEN: Invalid UTF-8 byte 0xA4
|
|
||||||
error: invalid-utf-8-strings.asm(21):
|
|
||||||
STRLEN: Invalid UTF-8 byte 0xF0
|
|
||||||
error: invalid-utf-8-strings.asm(21):
|
|
||||||
STRLEN: Invalid UTF-8 byte 0xA2
|
|
||||||
error: invalid-utf-8-strings.asm(22):
|
|
||||||
STRLEN: Invalid UTF-8 byte 0xA3
|
|
||||||
error: invalid-utf-8-strings.asm(22):
|
|
||||||
STRLEN: Invalid UTF-8 byte 0xA4
|
|
||||||
error: invalid-utf-8-strings.asm(22):
|
|
||||||
STRLEN: Invalid UTF-8 byte 0xF0
|
|
||||||
error: invalid-utf-8-strings.asm(22):
|
|
||||||
STRLEN: Invalid UTF-8 byte 0xA2
|
|
||||||
error: invalid-utf-8-strings.asm(22):
|
error: invalid-utf-8-strings.asm(22):
|
||||||
STRSUB: Invalid UTF-8 byte 0xA3
|
STRSUB: Invalid UTF-8 byte 0xA3
|
||||||
error: invalid-utf-8-strings.asm(22):
|
error: invalid-utf-8-strings.asm(22):
|
||||||
@@ -76,4 +52,8 @@ error: invalid-utf-8-strings.asm(40):
|
|||||||
Input string is not valid UTF-8
|
Input string is not valid UTF-8
|
||||||
error: invalid-utf-8-strings.asm(40):
|
error: invalid-utf-8-strings.asm(40):
|
||||||
Input string is not valid UTF-8
|
Input string is not valid UTF-8
|
||||||
error: Assembly aborted (39 errors)!
|
error: invalid-utf-8-strings.asm(50):
|
||||||
|
STRLEN: Incomplete UTF-8 character
|
||||||
|
error: invalid-utf-8-strings.asm(54):
|
||||||
|
STRSUB: Incomplete UTF-8 character
|
||||||
|
error: Assembly aborted (29 errors)!
|
||||||
|
|||||||
@@ -2,3 +2,5 @@
|
|||||||
"b,a"
|
"b,a"
|
||||||
"A <20><><EFBFBD> 漢": 7 == 7
|
"A <20><><EFBFBD> 漢": 7 == 7
|
||||||
"漢<>"
|
"漢<>"
|
||||||
|
"abc<62><63>": 4 == 4
|
||||||
|
"abc<62><63>" ends "<22><>"
|
||||||
|
|||||||
Reference in New Issue
Block a user