diff --git a/Makefile b/Makefile index c21d4a6c..3d254fa5 100644 --- a/Makefile +++ b/Makefile @@ -52,6 +52,7 @@ rgbasm_obj := \ src/extern/reallocarray.o \ src/extern/strlcpy.o \ src/extern/strlcat.o \ + src/extern/utf8decoder.o \ src/extern/version.o diff --git a/include/extern/utf8decoder.h b/include/extern/utf8decoder.h new file mode 100644 index 00000000..dc032f06 --- /dev/null +++ b/include/extern/utf8decoder.h @@ -0,0 +1,6 @@ +#ifndef EXTERN_UTF8DECODER_H +#define EXTERN_UTF8DECODER_H + +uint32_t decode(uint32_t *state, uint32_t *codep, uint32_t byte); + +#endif /* EXTERN_UTF8DECODER_H */ diff --git a/src/asm/charmap.c b/src/asm/charmap.c index 1c0f4070..de43a5dd 100644 --- a/src/asm/charmap.c +++ b/src/asm/charmap.c @@ -1,71 +1,3 @@ -/* - * UTF-8 decoder copyright © 2008–2009 Björn Höhrmann - * http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include - -static const uint8_t utf8d[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00..0f */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10..1f */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 20..2f */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 30..3f */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 40..4f */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 50..5f */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 60..6f */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 70..7f */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 80..8f */ - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, /* 90..9f */ - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, /* a0..af */ - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, /* b0..bf */ - 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* c0..cf */ - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* d0..df */ - 0xa, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, /* e0..e7 */ - 0x3, 0x3, 0x3, 0x3, 0x3, 0x4, 0x3, 0x3, /* e8..ef */ - 0xb, 0x6, 0x6, 0x6, 0x5, 0x8, 0x8, 0x8, /* f0..f7 */ - 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, /* f8..ff */ - 0x0, 0x1, 0x2, 0x3, 0x5, 0x8, 0x7, 0x1, /* s0.. */ - 0x1, 0x1, 0x4, 0x6, 0x1, 0x1, 0x1, 0x1, /* ..s0 */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* s1 */ - 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, /* s1 */ - 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, /* s3 */ - 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, /* s4 */ - 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, /* s5 */ - 1, 1, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, /* s6 */ - 1, 3, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, /* s7 */ - 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* s8 */ -}; - -uint32_t decode(uint32_t *state, uint32_t *codep, uint32_t byte) -{ - uint32_t type = utf8d[byte]; - - *codep = (*state != 0) ? - (byte & 0x3fu) | (*codep << 6) : - (0xff >> type) & (byte); - - *state = utf8d[256 + *state * 16 + type]; - return *state; -} - /* * Copyright © 2013 stag019 * @@ -82,6 +14,7 @@ uint32_t decode(uint32_t *state, uint32_t *codep, uint32_t byte) * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#include #include #include #include @@ -91,6 +24,8 @@ uint32_t decode(uint32_t *state, uint32_t *codep, uint32_t byte) #include "asm/main.h" #include "asm/output.h" +#include "extern/utf8decoder.h" + struct Charmap globalCharmap = {0}; int32_t readUTF8Char(char *dest, char *src) diff --git a/src/extern/utf8decoder.c b/src/extern/utf8decoder.c new file mode 100644 index 00000000..36cca79b --- /dev/null +++ b/src/extern/utf8decoder.c @@ -0,0 +1,67 @@ +/* + * UTF-8 decoder copyright © 2008–2009 Björn Höhrmann + * http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +static const uint8_t utf8d[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00..0f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10..1f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 20..2f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 30..3f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 40..4f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 50..5f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 60..6f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 70..7f */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 80..8f */ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, /* 90..9f */ + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, /* a0..af */ + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, /* b0..bf */ + 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* c0..cf */ + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* d0..df */ + 0xa, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, /* e0..e7 */ + 0x3, 0x3, 0x3, 0x3, 0x3, 0x4, 0x3, 0x3, /* e8..ef */ + 0xb, 0x6, 0x6, 0x6, 0x5, 0x8, 0x8, 0x8, /* f0..f7 */ + 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, /* f8..ff */ + 0x0, 0x1, 0x2, 0x3, 0x5, 0x8, 0x7, 0x1, /* s0.. */ + 0x1, 0x1, 0x4, 0x6, 0x1, 0x1, 0x1, 0x1, /* ..s0 */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* s1 */ + 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, /* s1 */ + 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, /* s3 */ + 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, /* s4 */ + 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, /* s5 */ + 1, 1, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, /* s6 */ + 1, 3, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, /* s7 */ + 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* s8 */ +}; + +uint32_t decode(uint32_t *state, uint32_t *codep, uint32_t byte) +{ + uint32_t type = utf8d[byte]; + + *codep = (*state != 0) ? + (byte & 0x3fu) | (*codep << 6) : + (0xff >> type) & (byte); + + *state = utf8d[256 + *state * 16 + type]; + return *state; +}