mirror of
https://github.com/gbdev/rgbds.git
synced 2025-11-20 10:12:06 +00:00
Fix CHARLEN and CHARSUB on invalid UTF-8 (#1630)
This commit is contained in:
@@ -227,24 +227,22 @@ size_t charmap_ConvertNext(std::string_view &input, std::vector<int32_t> *output
|
||||
} else if (inputIdx < input.length()) { // No match found, but there is some input left
|
||||
size_t codepointLen = 0;
|
||||
// This will write the codepoint's value to `output`, little-endian
|
||||
for (uint32_t state = 0, codepoint = 0;;) {
|
||||
for (uint32_t state = 0, codepoint = 0; inputIdx + codepointLen < input.length();) {
|
||||
if (decode(&state, &codepoint, input[inputIdx + codepointLen]) == 1) {
|
||||
codepointLen = 0;
|
||||
error("Input string is not valid UTF-8\n");
|
||||
codepointLen = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (output) {
|
||||
output->push_back(input[inputIdx + codepointLen]);
|
||||
}
|
||||
codepointLen++;
|
||||
|
||||
if (state == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (codepointLen == 0) {
|
||||
error("Input string is not valid UTF-8\n");
|
||||
if (output) {
|
||||
output->insert(
|
||||
output->end(), input.data() + inputIdx, input.data() + inputIdx + codepointLen
|
||||
);
|
||||
}
|
||||
|
||||
// Warn if this character is not mapped but any others are
|
||||
|
||||
@@ -29,9 +29,13 @@ println "\"{mid2}{mid1}\""
|
||||
; 4: invalid byte 0x81
|
||||
; 5: invalid byte 0xFF
|
||||
; 6: U+0020 space
|
||||
; 7: U+0042 B
|
||||
REDEF invalid EQUS "A <20><><EFBFBD> B"
|
||||
; 7: U+6F22 kanji (0xE6 0xBC 0xA2)
|
||||
REDEF invalid EQUS "A <20><><EFBFBD> 漢"
|
||||
|
||||
DEF n = strlen("{invalid}")
|
||||
DEF r = charlen("{invalid}")
|
||||
println "\"{#s:invalid}\": {d:n} != {d:r}"
|
||||
DEF n = STRLEN("{invalid}")
|
||||
DEF r = CHARLEN("{invalid}")
|
||||
println "\"{#s:invalid}\": {d:n} == {d:r}"
|
||||
|
||||
REDEF mid1 EQUS CHARSUB("{invalid}", 4)
|
||||
REDEF mid2 EQUS CHARSUB("{invalid}", 7)
|
||||
println "\"{mid2}{mid1}\""
|
||||
|
||||
@@ -50,4 +50,30 @@ error: invalid-utf-8-strings.asm(35):
|
||||
STRLEN: Invalid UTF-8 byte 0xFF
|
||||
error: invalid-utf-8-strings.asm(36):
|
||||
Input string is not valid UTF-8
|
||||
error: Assembly aborted (26 errors)!
|
||||
error: invalid-utf-8-strings.asm(36):
|
||||
Input string is not valid UTF-8
|
||||
error: invalid-utf-8-strings.asm(36):
|
||||
Input string is not valid UTF-8
|
||||
error: invalid-utf-8-strings.asm(39):
|
||||
Input string is not valid UTF-8
|
||||
error: invalid-utf-8-strings.asm(39):
|
||||
Input string is not valid UTF-8
|
||||
error: invalid-utf-8-strings.asm(39):
|
||||
Input string is not valid UTF-8
|
||||
error: invalid-utf-8-strings.asm(39):
|
||||
Input string is not valid UTF-8
|
||||
error: invalid-utf-8-strings.asm(39):
|
||||
Input string is not valid UTF-8
|
||||
error: invalid-utf-8-strings.asm(40):
|
||||
Input string is not valid UTF-8
|
||||
error: invalid-utf-8-strings.asm(40):
|
||||
Input string is not valid UTF-8
|
||||
error: invalid-utf-8-strings.asm(40):
|
||||
Input string is not valid UTF-8
|
||||
error: invalid-utf-8-strings.asm(40):
|
||||
Input string is not valid UTF-8
|
||||
error: invalid-utf-8-strings.asm(40):
|
||||
Input string is not valid UTF-8
|
||||
error: invalid-utf-8-strings.asm(40):
|
||||
Input string is not valid UTF-8
|
||||
error: Assembly aborted (39 errors)!
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
"aäb漢,a<><61>b<EFBFBD><62><EFBFBD>!" == "aäb漢,a<><61>b<EFBFBD><62><EFBFBD>!" (12)
|
||||
"b,a"
|
||||
"A <20><><EFBFBD> B": 7 != 2
|
||||
"A <20><><EFBFBD> 漢": 7 == 7
|
||||
"漢<>"
|
||||
|
||||
Reference in New Issue
Block a user