diff --git a/include/extern/utf8decoder.h b/include/extern/utf8decoder.h index a54ede26..e80165d6 100644 --- a/include/extern/utf8decoder.h +++ b/include/extern/utf8decoder.h @@ -9,6 +9,6 @@ #ifndef EXTERN_UTF8DECODER_H #define EXTERN_UTF8DECODER_H -uint32_t decode(uint32_t *state, uint32_t *codep, uint32_t byte); +uint32_t decode(uint32_t *state, uint32_t *codep, uint8_t byte); #endif /* EXTERN_UTF8DECODER_H */ diff --git a/src/asm/parser.y b/src/asm/parser.y index b8c5a88e..183adcbb 100644 --- a/src/asm/parser.y +++ b/src/asm/parser.y @@ -61,7 +61,7 @@ static size_t strlenUTF8(const char *s) uint32_t codep = 0; while (*s) { - switch (decode(&state, &codep, (uint8_t)*s)) { + switch (decode(&state, &codep, *s)) { case 1: fatalerror("STRLEN: Invalid UTF-8 character\n"); break; @@ -95,7 +95,7 @@ static void strsubUTF8(char *dest, const char *src, uint32_t pos, uint32_t len) /* Advance to starting position in source string. */ while (src[srcIndex] && curPos < pos) { - switch (decode(&state, &codep, (uint8_t)src[srcIndex])) { + switch (decode(&state, &codep, src[srcIndex])) { case 1: fatalerror("STRSUB: Invalid UTF-8 character\n"); break; @@ -113,7 +113,7 @@ static void strsubUTF8(char *dest, const char *src, uint32_t pos, uint32_t len) /* Copy from source to destination. */ while (src[srcIndex] && destIndex < MAXSTRLEN && curLen < len) { - switch (decode(&state, &codep, (uint8_t)src[srcIndex])) { + switch (decode(&state, &codep, src[srcIndex])) { case 1: fatalerror("STRSUB: Invalid UTF-8 character\n"); break; diff --git a/src/asm/util.c b/src/asm/util.c index 9af73ec0..c8ef1c77 100644 --- a/src/asm/util.c +++ b/src/asm/util.c @@ -69,7 +69,7 @@ size_t readUTF8Char(uint8_t *dest, char const *src) size_t i = 0; for (;;) { - if (decode(&state, &codep, (uint8_t)src[i]) == 1) + if (decode(&state, &codep, src[i]) == 1) fatalerror("invalid UTF-8 character\n"); dest[i] = src[i]; diff --git a/src/extern/utf8decoder.c b/src/extern/utf8decoder.c index 56c44cf2..f15a1002 100644 --- a/src/extern/utf8decoder.c +++ b/src/extern/utf8decoder.c @@ -38,7 +38,7 @@ static const uint8_t utf8d[] = { 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* s8 */ }; -uint32_t decode(uint32_t *state, uint32_t *codep, uint32_t byte) +uint32_t decode(uint32_t *state, uint32_t *codep, uint8_t byte) { uint32_t type = utf8d[byte]; diff --git a/test/asm/invalid-utf-8.asm b/test/asm/invalid-utf-8.asm new file mode 100644 index 00000000..fbc3745c --- /dev/null +++ b/test/asm/invalid-utf-8.asm @@ -0,0 +1,5 @@ +; This test tries to pass invalid UTF-8 through a macro argument +; to exercise the lexer's reportGarbageChar +m:MACRO \1 +ENDM + m ΟΣ diff --git a/test/asm/invalid-utf-8.err b/test/asm/invalid-utf-8.err new file mode 100644 index 00000000..e342cc92 --- /dev/null +++ b/test/asm/invalid-utf-8.err @@ -0,0 +1,5 @@ +ERROR: invalid-utf-8.asm(4) -> invalid-utf-8.asm::m(3): + Unknown character 0xCF +ERROR: invalid-utf-8.asm(4) -> invalid-utf-8.asm::m(3): + Unknown character 0xD3 +error: Assembly aborted (2 errors)! diff --git a/test/asm/invalid-utf-8.out b/test/asm/invalid-utf-8.out new file mode 100644 index 00000000..e69de29b