mirror of
https://github.com/gbdev/rgbds.git
synced 2025-11-20 18:22:07 +00:00
Fix string function behavior with NUL characters (#1746)
This commit is contained in:
@@ -2706,12 +2706,12 @@ static void errorInvalidUTF8Byte(uint8_t byte, char const *functionName) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static size_t strlenUTF8(std::string const &str, bool printErrors) {
|
static size_t strlenUTF8(std::string const &str, bool printErrors) {
|
||||||
char const *ptr = str.c_str();
|
|
||||||
size_t len = 0;
|
size_t len = 0;
|
||||||
uint32_t state = UTF8_ACCEPT;
|
uint32_t state = UTF8_ACCEPT;
|
||||||
|
uint32_t codepoint = 0;
|
||||||
|
|
||||||
for (uint32_t codepoint = 0; *ptr; ptr++) {
|
for (char c : str) {
|
||||||
uint8_t byte = *ptr;
|
uint8_t byte = static_cast<uint8_t>(c);
|
||||||
|
|
||||||
switch (decode(&state, &codepoint, byte)) {
|
switch (decode(&state, &codepoint, byte)) {
|
||||||
case UTF8_REJECT:
|
case UTF8_REJECT:
|
||||||
@@ -2738,17 +2738,17 @@ static size_t strlenUTF8(std::string const &str, bool printErrors) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static std::string strsliceUTF8(std::string const &str, uint32_t start, uint32_t stop) {
|
static std::string strsliceUTF8(std::string const &str, uint32_t start, uint32_t stop) {
|
||||||
char const *ptr = str.c_str();
|
size_t strLen = str.length();
|
||||||
size_t index = 0;
|
size_t index = 0;
|
||||||
uint32_t state = UTF8_ACCEPT;
|
uint32_t state = UTF8_ACCEPT;
|
||||||
uint32_t codepoint = 0;
|
uint32_t codepoint = 0;
|
||||||
uint32_t curIdx = 0;
|
uint32_t curIdx = 0;
|
||||||
|
|
||||||
// Advance to starting index in source string.
|
// Advance to starting index in source string.
|
||||||
while (ptr[index] && curIdx < start) {
|
while (index < strLen && curIdx < start) {
|
||||||
switch (decode(&state, &codepoint, ptr[index])) {
|
switch (decode(&state, &codepoint, str[index])) {
|
||||||
case UTF8_REJECT:
|
case UTF8_REJECT:
|
||||||
errorInvalidUTF8Byte(ptr[index], "STRSLICE");
|
errorInvalidUTF8Byte(str[index], "STRSLICE");
|
||||||
state = UTF8_ACCEPT;
|
state = UTF8_ACCEPT;
|
||||||
// fallthrough
|
// fallthrough
|
||||||
case UTF8_ACCEPT:
|
case UTF8_ACCEPT:
|
||||||
@@ -2760,7 +2760,7 @@ static std::string strsliceUTF8(std::string const &str, uint32_t start, uint32_t
|
|||||||
|
|
||||||
// An index 1 past the end of the string is allowed, but will trigger the
|
// An index 1 past the end of the string is allowed, but will trigger the
|
||||||
// "Length too big" warning below if the length is nonzero.
|
// "Length too big" warning below if the length is nonzero.
|
||||||
if (!ptr[index] && start > curIdx) {
|
if (index >= strLen && start > curIdx) {
|
||||||
warning(
|
warning(
|
||||||
WARNING_BUILTIN_ARG,
|
WARNING_BUILTIN_ARG,
|
||||||
"STRSLICE: Start index %" PRIu32 " is past the end of the string",
|
"STRSLICE: Start index %" PRIu32 " is past the end of the string",
|
||||||
@@ -2771,10 +2771,10 @@ static std::string strsliceUTF8(std::string const &str, uint32_t start, uint32_t
|
|||||||
size_t startIndex = index;
|
size_t startIndex = index;
|
||||||
|
|
||||||
// Advance to ending index in source string.
|
// Advance to ending index in source string.
|
||||||
while (ptr[index] && curIdx < stop) {
|
while (index < strLen && curIdx < stop) {
|
||||||
switch (decode(&state, &codepoint, ptr[index])) {
|
switch (decode(&state, &codepoint, str[index])) {
|
||||||
case UTF8_REJECT:
|
case UTF8_REJECT:
|
||||||
errorInvalidUTF8Byte(ptr[index], "STRSLICE");
|
errorInvalidUTF8Byte(str[index], "STRSLICE");
|
||||||
state = UTF8_ACCEPT;
|
state = UTF8_ACCEPT;
|
||||||
// fallthrough
|
// fallthrough
|
||||||
case UTF8_ACCEPT:
|
case UTF8_ACCEPT:
|
||||||
@@ -2798,21 +2798,21 @@ static std::string strsliceUTF8(std::string const &str, uint32_t start, uint32_t
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
return std::string(ptr + startIndex, ptr + index);
|
return str.substr(startIndex, index - startIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::string strsubUTF8(std::string const &str, uint32_t pos, uint32_t len) {
|
static std::string strsubUTF8(std::string const &str, uint32_t pos, uint32_t len) {
|
||||||
char const *ptr = str.c_str();
|
size_t strLen = str.length();
|
||||||
size_t index = 0;
|
size_t index = 0;
|
||||||
uint32_t state = UTF8_ACCEPT;
|
uint32_t state = UTF8_ACCEPT;
|
||||||
uint32_t codepoint = 0;
|
uint32_t codepoint = 0;
|
||||||
uint32_t curPos = 1;
|
uint32_t curPos = 1;
|
||||||
|
|
||||||
// Advance to starting position in source string.
|
// Advance to starting position in source string.
|
||||||
while (ptr[index] && curPos < pos) {
|
while (index < strLen && curPos < pos) {
|
||||||
switch (decode(&state, &codepoint, ptr[index])) {
|
switch (decode(&state, &codepoint, str[index])) {
|
||||||
case UTF8_REJECT:
|
case UTF8_REJECT:
|
||||||
errorInvalidUTF8Byte(ptr[index], "STRSUB");
|
errorInvalidUTF8Byte(str[index], "STRSUB");
|
||||||
state = UTF8_ACCEPT;
|
state = UTF8_ACCEPT;
|
||||||
// fallthrough
|
// fallthrough
|
||||||
case UTF8_ACCEPT:
|
case UTF8_ACCEPT:
|
||||||
@@ -2824,7 +2824,7 @@ static std::string strsubUTF8(std::string const &str, uint32_t pos, uint32_t len
|
|||||||
|
|
||||||
// A position 1 past the end of the string is allowed, but will trigger the
|
// A position 1 past the end of the string is allowed, but will trigger the
|
||||||
// "Length too big" warning below if the length is nonzero.
|
// "Length too big" warning below if the length is nonzero.
|
||||||
if (!ptr[index] && pos > curPos) {
|
if (index >= strLen && pos > curPos) {
|
||||||
warning(
|
warning(
|
||||||
WARNING_BUILTIN_ARG, "STRSUB: Position %" PRIu32 " is past the end of the string", pos
|
WARNING_BUILTIN_ARG, "STRSUB: Position %" PRIu32 " is past the end of the string", pos
|
||||||
);
|
);
|
||||||
@@ -2834,10 +2834,10 @@ static std::string strsubUTF8(std::string const &str, uint32_t pos, uint32_t len
|
|||||||
uint32_t curLen = 0;
|
uint32_t curLen = 0;
|
||||||
|
|
||||||
// Compute the result length in bytes.
|
// Compute the result length in bytes.
|
||||||
while (ptr[index] && curLen < len) {
|
while (index < strLen && curLen < len) {
|
||||||
switch (decode(&state, &codepoint, ptr[index])) {
|
switch (decode(&state, &codepoint, str[index])) {
|
||||||
case UTF8_REJECT:
|
case UTF8_REJECT:
|
||||||
errorInvalidUTF8Byte(ptr[index], "STRSUB");
|
errorInvalidUTF8Byte(str[index], "STRSUB");
|
||||||
state = UTF8_ACCEPT;
|
state = UTF8_ACCEPT;
|
||||||
// fallthrough
|
// fallthrough
|
||||||
case UTF8_ACCEPT:
|
case UTF8_ACCEPT:
|
||||||
@@ -2857,7 +2857,7 @@ static std::string strsubUTF8(std::string const &str, uint32_t pos, uint32_t len
|
|||||||
warning(WARNING_BUILTIN_ARG, "STRSUB: Length too big: %" PRIu32, len);
|
warning(WARNING_BUILTIN_ARG, "STRSUB: Length too big: %" PRIu32, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
return std::string(ptr + startIndex, ptr + index);
|
return str.substr(startIndex, index - startIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t charlenUTF8(std::string const &str) {
|
static size_t charlenUTF8(std::string const &str) {
|
||||||
|
|||||||
33
test/asm/null-char-functions.asm
Normal file
33
test/asm/null-char-functions.asm
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
def s equs "hello\0world"
|
||||||
|
|
||||||
|
println #s
|
||||||
|
|
||||||
|
MACRO assert_equal
|
||||||
|
assert !strcmp(\1, \2)
|
||||||
|
ENDM
|
||||||
|
|
||||||
|
assert strlen(#s) == 11
|
||||||
|
assert strfind(#s, "o\0w") == 4
|
||||||
|
assert strfind(#s, "orld") == 7
|
||||||
|
assert strrfind(#s, "o\0w") == 4
|
||||||
|
assert strrfind(#s, "o") == 7
|
||||||
|
|
||||||
|
assert_equal strcat(#s, "\0lol"), "hello\0world\0lol"
|
||||||
|
assert_equal #s ++ "\0lol", "hello\0world\0lol"
|
||||||
|
assert_equal strupr(#s), "HELLO\0WORLD"
|
||||||
|
assert_equal strlwr("HELLO\0WORLD"), #s
|
||||||
|
assert_equal strslice(#s, 4, 7), "o\0w"
|
||||||
|
assert_equal strslice(#s, 6), "world"
|
||||||
|
assert_equal strrpl(#s, "o", "XX"), "hellXX\0wXXrld"
|
||||||
|
assert_equal strrpl(#s, "\0", "0"), "hello0world"
|
||||||
|
assert_equal strfmt("%s", #s), #s
|
||||||
|
assert_equal strchar(#s, 5), "\0"
|
||||||
|
assert_equal strchar(#s, -1), "d"
|
||||||
|
|
||||||
|
assert strin(#s, "o\0w") == 5
|
||||||
|
assert strin(#s, "orld") == 8
|
||||||
|
assert strrin(#s, "o\0w") == 5
|
||||||
|
assert strrin(#s, "o") == 8
|
||||||
|
|
||||||
|
assert_equal strsub(#s, 5, 3), "o\0w"
|
||||||
|
assert_equal strsub(#s, 7), "world"
|
||||||
BIN
test/asm/null-char-functions.out
Normal file
BIN
test/asm/null-char-functions.out
Normal file
Binary file not shown.
Reference in New Issue
Block a user