Fix CHARLEN and CHARSUB on invalid UTF-8 (#1630)

This commit is contained in:
Rangi
2025-01-28 02:01:18 -05:00
committed by GitHub
parent d54619a453
commit 44caffe04a
4 changed files with 45 additions and 16 deletions

View File

@@ -29,9 +29,13 @@ println "\"{mid2}{mid1}\""
; 4: invalid byte 0x81
; 5: invalid byte 0xFF
; 6: U+0020 space
; 7: U+0042 B
REDEF invalid EQUS "A <20><><EFBFBD> B"
; 7: U+6F22 kanji (0xE6 0xBC 0xA2)
REDEF invalid EQUS "A <20><><EFBFBD> "
DEF n = strlen("{invalid}")
DEF r = charlen("{invalid}")
println "\"{#s:invalid}\": {d:n} != {d:r}"
DEF n = STRLEN("{invalid}")
DEF r = CHARLEN("{invalid}")
println "\"{#s:invalid}\": {d:n} == {d:r}"
REDEF mid1 EQUS CHARSUB("{invalid}", 4)
REDEF mid2 EQUS CHARSUB("{invalid}", 7)
println "\"{mid2}{mid1}\""

View File

@@ -50,4 +50,30 @@ error: invalid-utf-8-strings.asm(35):
STRLEN: Invalid UTF-8 byte 0xFF
error: invalid-utf-8-strings.asm(36):
Input string is not valid UTF-8
error: Assembly aborted (26 errors)!
error: invalid-utf-8-strings.asm(36):
Input string is not valid UTF-8
error: invalid-utf-8-strings.asm(36):
Input string is not valid UTF-8
error: invalid-utf-8-strings.asm(39):
Input string is not valid UTF-8
error: invalid-utf-8-strings.asm(39):
Input string is not valid UTF-8
error: invalid-utf-8-strings.asm(39):
Input string is not valid UTF-8
error: invalid-utf-8-strings.asm(39):
Input string is not valid UTF-8
error: invalid-utf-8-strings.asm(39):
Input string is not valid UTF-8
error: invalid-utf-8-strings.asm(40):
Input string is not valid UTF-8
error: invalid-utf-8-strings.asm(40):
Input string is not valid UTF-8
error: invalid-utf-8-strings.asm(40):
Input string is not valid UTF-8
error: invalid-utf-8-strings.asm(40):
Input string is not valid UTF-8
error: invalid-utf-8-strings.asm(40):
Input string is not valid UTF-8
error: invalid-utf-8-strings.asm(40):
Input string is not valid UTF-8
error: Assembly aborted (39 errors)!

View File

@@ -1,3 +1,4 @@
"aäb漢,a<><61>b<EFBFBD><62><EFBFBD>!" == "aäb漢,a<><61>b<EFBFBD><62><EFBFBD>!" (12)
"b,a"
"A <20><><EFBFBD> B": 7 != 2
"A <20><><EFBFBD> ": 7 == 7
"漢<>"