utf8decoder: Use byte-sized byte argument

This prevents passing a negative value out of a signed char by accident.
Also renders some casts in the code superfluous.
This commit is contained in:
Jakub Kądziołka
2020-10-12 01:06:32 +02:00
parent 2dd9015dc6
commit 6767d11c23
7 changed files with 16 additions and 6 deletions

View File

@@ -9,6 +9,6 @@
#ifndef EXTERN_UTF8DECODER_H
#define EXTERN_UTF8DECODER_H
uint32_t decode(uint32_t *state, uint32_t *codep, uint32_t byte);
uint32_t decode(uint32_t *state, uint32_t *codep, uint8_t byte);
#endif /* EXTERN_UTF8DECODER_H */

View File

@@ -61,7 +61,7 @@ static size_t strlenUTF8(const char *s)
uint32_t codep = 0;
while (*s) {
switch (decode(&state, &codep, (uint8_t)*s)) {
switch (decode(&state, &codep, *s)) {
case 1:
fatalerror("STRLEN: Invalid UTF-8 character\n");
break;
@@ -95,7 +95,7 @@ static void strsubUTF8(char *dest, const char *src, uint32_t pos, uint32_t len)
/* Advance to starting position in source string. */
while (src[srcIndex] && curPos < pos) {
switch (decode(&state, &codep, (uint8_t)src[srcIndex])) {
switch (decode(&state, &codep, src[srcIndex])) {
case 1:
fatalerror("STRSUB: Invalid UTF-8 character\n");
break;
@@ -113,7 +113,7 @@ static void strsubUTF8(char *dest, const char *src, uint32_t pos, uint32_t len)
/* Copy from source to destination. */
while (src[srcIndex] && destIndex < MAXSTRLEN && curLen < len) {
switch (decode(&state, &codep, (uint8_t)src[srcIndex])) {
switch (decode(&state, &codep, src[srcIndex])) {
case 1:
fatalerror("STRSUB: Invalid UTF-8 character\n");
break;

View File

@@ -69,7 +69,7 @@ size_t readUTF8Char(uint8_t *dest, char const *src)
size_t i = 0;
for (;;) {
if (decode(&state, &codep, (uint8_t)src[i]) == 1)
if (decode(&state, &codep, src[i]) == 1)
fatalerror("invalid UTF-8 character\n");
dest[i] = src[i];

View File

@@ -38,7 +38,7 @@ static const uint8_t utf8d[] = {
1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* s8 */
};
uint32_t decode(uint32_t *state, uint32_t *codep, uint32_t byte)
uint32_t decode(uint32_t *state, uint32_t *codep, uint8_t byte)
{
uint32_t type = utf8d[byte];

View File

@@ -0,0 +1,5 @@
; This test tries to pass invalid UTF-8 through a macro argument
; to exercise the lexer's reportGarbageChar
m:MACRO \1
ENDM
m <EFBFBD><EFBFBD>

View File

@@ -0,0 +1,5 @@
ERROR: invalid-utf-8.asm(4) -> invalid-utf-8.asm::m(3):
Unknown character 0xCF
ERROR: invalid-utf-8.asm(4) -> invalid-utf-8.asm::m(3):
Unknown character 0xD3
error: Assembly aborted (2 errors)!

View File