Fix STRLEN and STRSUB on incomplete UTF-8 (#1633)

This commit is contained in:
Rangi
2025-01-28 13:13:35 -05:00
committed by GitHub
parent 44caffe04a
commit 375adc6804
4 changed files with 41 additions and 39 deletions

View File

@@ -16,11 +16,11 @@ DEF invalid EQUS "aäb漢,a
DEF n = STRLEN("{invalid}")
DEF copy EQUS STRSUB("{invalid}", 1)
println "\"{invalid}\" == \"{copy}\" ({d:n})"
println "\"{#s:invalid}\" == \"{#s:copy}\" ({d:n})"
DEF mid1 EQUS STRSUB("{invalid}", 5, 2)
DEF mid2 EQUS STRSUB("{invalid}", 9, 1)
println "\"{mid2}{mid1}\""
println "\"{#s:mid2}{#s:mid1}\""
; characters:
; 1: U+0041 A
@@ -38,4 +38,18 @@ println "\"{#s:invalid}\": {d:n} == {d:r}"
REDEF mid1 EQUS CHARSUB("{invalid}", 4)
REDEF mid2 EQUS CHARSUB("{invalid}", 7)
println "\"{mid2}{mid1}\""
println "\"{#s:mid2}{#s:mid1}\""
; characters:
; 1: U+0061 a
; 2: U+0062 b
; 3: U+0063 c
; 4: incomplete U+6F22 kanji (0xE6 0xBC without 0xA2)
REDEF invalid EQUS "abc<62><63>"
DEF n = STRLEN("{invalid}")
DEF r = CHARLEN("{invalid}")
println "\"{#s:invalid}\": {d:n} == {d:r}"
DEF final EQUS STRSUB("{invalid}", 4, 1)
println "\"{#s:invalid}\" ends \"{#s:final}\""

View File

@@ -6,14 +6,6 @@ error: invalid-utf-8-strings.asm(16):
STRLEN: Invalid UTF-8 byte 0xF0
error: invalid-utf-8-strings.asm(16):
STRLEN: Invalid UTF-8 byte 0xA2
error: invalid-utf-8-strings.asm(17):
STRLEN: Invalid UTF-8 byte 0xA3
error: invalid-utf-8-strings.asm(17):
STRLEN: Invalid UTF-8 byte 0xA4
error: invalid-utf-8-strings.asm(17):
STRLEN: Invalid UTF-8 byte 0xF0
error: invalid-utf-8-strings.asm(17):
STRLEN: Invalid UTF-8 byte 0xA2
error: invalid-utf-8-strings.asm(17):
STRSUB: Invalid UTF-8 byte 0xA3
error: invalid-utf-8-strings.asm(17):
@@ -22,22 +14,6 @@ error: invalid-utf-8-strings.asm(17):
STRSUB: Invalid UTF-8 byte 0xF0
error: invalid-utf-8-strings.asm(17):
STRSUB: Invalid UTF-8 byte 0xA2
error: invalid-utf-8-strings.asm(21):
STRLEN: Invalid UTF-8 byte 0xA3
error: invalid-utf-8-strings.asm(21):
STRLEN: Invalid UTF-8 byte 0xA4
error: invalid-utf-8-strings.asm(21):
STRLEN: Invalid UTF-8 byte 0xF0
error: invalid-utf-8-strings.asm(21):
STRLEN: Invalid UTF-8 byte 0xA2
error: invalid-utf-8-strings.asm(22):
STRLEN: Invalid UTF-8 byte 0xA3
error: invalid-utf-8-strings.asm(22):
STRLEN: Invalid UTF-8 byte 0xA4
error: invalid-utf-8-strings.asm(22):
STRLEN: Invalid UTF-8 byte 0xF0
error: invalid-utf-8-strings.asm(22):
STRLEN: Invalid UTF-8 byte 0xA2
error: invalid-utf-8-strings.asm(22):
STRSUB: Invalid UTF-8 byte 0xA3
error: invalid-utf-8-strings.asm(22):
@@ -76,4 +52,8 @@ error: invalid-utf-8-strings.asm(40):
Input string is not valid UTF-8
error: invalid-utf-8-strings.asm(40):
Input string is not valid UTF-8
error: Assembly aborted (39 errors)!
error: invalid-utf-8-strings.asm(50):
STRLEN: Incomplete UTF-8 character
error: invalid-utf-8-strings.asm(54):
STRSUB: Incomplete UTF-8 character
error: Assembly aborted (29 errors)!

View File

@@ -2,3 +2,5 @@
"b,a"
"A <20><><EFBFBD> 漢": 7 == 7
"漢<>"
"abc<62><63>": 4 == 4
"abc<62><63>" ends "<22><>"