diff --git a/src/asm/parser.y b/src/asm/parser.y index ec8a91dd..2efa0cdc 100644 --- a/src/asm/parser.y +++ b/src/asm/parser.y @@ -2706,12 +2706,12 @@ static void errorInvalidUTF8Byte(uint8_t byte, char const *functionName) { } static size_t strlenUTF8(std::string const &str, bool printErrors) { - char const *ptr = str.c_str(); size_t len = 0; uint32_t state = UTF8_ACCEPT; + uint32_t codepoint = 0; - for (uint32_t codepoint = 0; *ptr; ptr++) { - uint8_t byte = *ptr; + for (char c : str) { + uint8_t byte = static_cast(c); switch (decode(&state, &codepoint, byte)) { case UTF8_REJECT: @@ -2738,17 +2738,17 @@ static size_t strlenUTF8(std::string const &str, bool printErrors) { } static std::string strsliceUTF8(std::string const &str, uint32_t start, uint32_t stop) { - char const *ptr = str.c_str(); + size_t strLen = str.length(); size_t index = 0; uint32_t state = UTF8_ACCEPT; uint32_t codepoint = 0; uint32_t curIdx = 0; // Advance to starting index in source string. - while (ptr[index] && curIdx < start) { - switch (decode(&state, &codepoint, ptr[index])) { + while (index < strLen && curIdx < start) { + switch (decode(&state, &codepoint, str[index])) { case UTF8_REJECT: - errorInvalidUTF8Byte(ptr[index], "STRSLICE"); + errorInvalidUTF8Byte(str[index], "STRSLICE"); state = UTF8_ACCEPT; // fallthrough case UTF8_ACCEPT: @@ -2760,7 +2760,7 @@ static std::string strsliceUTF8(std::string const &str, uint32_t start, uint32_t // An index 1 past the end of the string is allowed, but will trigger the // "Length too big" warning below if the length is nonzero. - if (!ptr[index] && start > curIdx) { + if (index >= strLen && start > curIdx) { warning( WARNING_BUILTIN_ARG, "STRSLICE: Start index %" PRIu32 " is past the end of the string", @@ -2771,10 +2771,10 @@ static std::string strsliceUTF8(std::string const &str, uint32_t start, uint32_t size_t startIndex = index; // Advance to ending index in source string. - while (ptr[index] && curIdx < stop) { - switch (decode(&state, &codepoint, ptr[index])) { + while (index < strLen && curIdx < stop) { + switch (decode(&state, &codepoint, str[index])) { case UTF8_REJECT: - errorInvalidUTF8Byte(ptr[index], "STRSLICE"); + errorInvalidUTF8Byte(str[index], "STRSLICE"); state = UTF8_ACCEPT; // fallthrough case UTF8_ACCEPT: @@ -2798,21 +2798,21 @@ static std::string strsliceUTF8(std::string const &str, uint32_t start, uint32_t ); } - return std::string(ptr + startIndex, ptr + index); + return str.substr(startIndex, index - startIndex); } static std::string strsubUTF8(std::string const &str, uint32_t pos, uint32_t len) { - char const *ptr = str.c_str(); + size_t strLen = str.length(); size_t index = 0; uint32_t state = UTF8_ACCEPT; uint32_t codepoint = 0; uint32_t curPos = 1; // Advance to starting position in source string. - while (ptr[index] && curPos < pos) { - switch (decode(&state, &codepoint, ptr[index])) { + while (index < strLen && curPos < pos) { + switch (decode(&state, &codepoint, str[index])) { case UTF8_REJECT: - errorInvalidUTF8Byte(ptr[index], "STRSUB"); + errorInvalidUTF8Byte(str[index], "STRSUB"); state = UTF8_ACCEPT; // fallthrough case UTF8_ACCEPT: @@ -2824,7 +2824,7 @@ static std::string strsubUTF8(std::string const &str, uint32_t pos, uint32_t len // A position 1 past the end of the string is allowed, but will trigger the // "Length too big" warning below if the length is nonzero. - if (!ptr[index] && pos > curPos) { + if (index >= strLen && pos > curPos) { warning( WARNING_BUILTIN_ARG, "STRSUB: Position %" PRIu32 " is past the end of the string", pos ); @@ -2834,10 +2834,10 @@ static std::string strsubUTF8(std::string const &str, uint32_t pos, uint32_t len uint32_t curLen = 0; // Compute the result length in bytes. - while (ptr[index] && curLen < len) { - switch (decode(&state, &codepoint, ptr[index])) { + while (index < strLen && curLen < len) { + switch (decode(&state, &codepoint, str[index])) { case UTF8_REJECT: - errorInvalidUTF8Byte(ptr[index], "STRSUB"); + errorInvalidUTF8Byte(str[index], "STRSUB"); state = UTF8_ACCEPT; // fallthrough case UTF8_ACCEPT: @@ -2857,7 +2857,7 @@ static std::string strsubUTF8(std::string const &str, uint32_t pos, uint32_t len warning(WARNING_BUILTIN_ARG, "STRSUB: Length too big: %" PRIu32, len); } - return std::string(ptr + startIndex, ptr + index); + return str.substr(startIndex, index - startIndex); } static size_t charlenUTF8(std::string const &str) { diff --git a/test/asm/null-char-functions.asm b/test/asm/null-char-functions.asm new file mode 100644 index 00000000..13777eb3 --- /dev/null +++ b/test/asm/null-char-functions.asm @@ -0,0 +1,33 @@ +def s equs "hello\0world" + +println #s + +MACRO assert_equal + assert !strcmp(\1, \2) +ENDM + +assert strlen(#s) == 11 +assert strfind(#s, "o\0w") == 4 +assert strfind(#s, "orld") == 7 +assert strrfind(#s, "o\0w") == 4 +assert strrfind(#s, "o") == 7 + +assert_equal strcat(#s, "\0lol"), "hello\0world\0lol" +assert_equal #s ++ "\0lol", "hello\0world\0lol" +assert_equal strupr(#s), "HELLO\0WORLD" +assert_equal strlwr("HELLO\0WORLD"), #s +assert_equal strslice(#s, 4, 7), "o\0w" +assert_equal strslice(#s, 6), "world" +assert_equal strrpl(#s, "o", "XX"), "hellXX\0wXXrld" +assert_equal strrpl(#s, "\0", "0"), "hello0world" +assert_equal strfmt("%s", #s), #s +assert_equal strchar(#s, 5), "\0" +assert_equal strchar(#s, -1), "d" + +assert strin(#s, "o\0w") == 5 +assert strin(#s, "orld") == 8 +assert strrin(#s, "o\0w") == 5 +assert strrin(#s, "o") == 8 + +assert_equal strsub(#s, 5, 3), "o\0w" +assert_equal strsub(#s, 7), "world" diff --git a/test/asm/null-char-functions.out b/test/asm/null-char-functions.out new file mode 100644 index 00000000..8e5da76b Binary files /dev/null and b/test/asm/null-char-functions.out differ