diff --git a/include/asm/charmap.hpp b/include/asm/charmap.hpp index 49815d7e..52db3d19 100644 --- a/include/asm/charmap.hpp +++ b/include/asm/charmap.hpp @@ -4,6 +4,7 @@ #define RGBDS_ASM_CHARMAP_H #include +#include #define DEFAULT_CHARMAP_NAME "main" @@ -13,7 +14,7 @@ void charmap_Push(void); void charmap_Pop(void); void charmap_Add(char *mapping, uint8_t value); bool charmap_HasChar(char const *input); -size_t charmap_Convert(char const *input, uint8_t *output); -size_t charmap_ConvertNext(char const **input, uint8_t **output); +void charmap_Convert(char const *input, std::vector &output); +size_t charmap_ConvertNext(char const **input, std::vector *output); #endif // RGBDS_ASM_CHARMAP_H diff --git a/include/util.hpp b/include/util.hpp index d0020d7c..60d2187d 100644 --- a/include/util.hpp +++ b/include/util.hpp @@ -5,12 +5,13 @@ #include #include +#include char const *printChar(int c); /* * @return The number of bytes read, or 0 if invalid data was found */ -size_t readUTF8Char(uint8_t *dest, char const *src); +size_t readUTF8Char(std::vector *dest, char const *src); #endif // RGBDS_UTIL_H diff --git a/src/asm/charmap.cpp b/src/asm/charmap.cpp index fd17f615..d43bf1bf 100644 --- a/src/asm/charmap.cpp +++ b/src/asm/charmap.cpp @@ -139,17 +139,13 @@ bool charmap_HasChar(char const *input) return charmap.nodes[nodeIdx].isTerminal; } -size_t charmap_Convert(char const *input, uint8_t *output) +void charmap_Convert(char const *input, std::vector &output) { - uint8_t *start = output; - while (charmap_ConvertNext(&input, &output)) ; - - return output - start; } -size_t charmap_ConvertNext(char const **input, uint8_t **output) +size_t charmap_ConvertNext(char const **input, std::vector *output) { // The goal is to match the longest mapping possible. // For that, advance through the trie with each character read. @@ -181,22 +177,20 @@ size_t charmap_ConvertNext(char const **input, uint8_t **output) if (matchIdx) { // A match was found, use it if (output) - *(*output)++ = charmap.nodes[matchIdx].value; + output->push_back(charmap.nodes[matchIdx].value); return 1; } else if (**input) { // No match found, but there is some input left int firstChar = **input; // This will write the codepoint's value to `output`, little-endian - size_t codepointLen = readUTF8Char(output ? *output : NULL, *input); + size_t codepointLen = readUTF8Char(output, *input); if (codepointLen == 0) error("Input string is not valid UTF-8\n"); // OK because UTF-8 has no NUL in multi-byte chars *input += codepointLen; - if (output) - *output += codepointLen; // Warn if this character is not mapped but any others are if (charmap.nodes.size() > 1) diff --git a/src/asm/parser.y b/src/asm/parser.y index ab549f4d..a76d3d11 100644 --- a/src/asm/parser.y +++ b/src/asm/parser.y @@ -46,8 +46,10 @@ static void lowerstring(char *dest, char const *src) *dest = '\0'; } -static uint32_t str2int2(uint8_t *s, uint32_t length) +static uint32_t str2int2(std::vector const &s) { + uint32_t length = s.size(); + if (length > 4) warning(WARNING_NUMERIC_STRING_1, "Treating string as a number ignores first %" PRIu32 " character%s\n", @@ -1343,11 +1345,10 @@ constlist_8bit_entry : reloc_8bit_no_str { sect_RelByte(&$1, 0); } | string { - uint8_t *output = (uint8_t *)malloc(strlen($1)); // Cannot be larger than that - size_t length = charmap_Convert($1, output); + std::vector output; - sect_AbsByteGroup(output, length); - free(output); + charmap_Convert($1, output); + sect_AbsByteGroup(output.data(), output.size()); } ; @@ -1359,11 +1360,10 @@ constlist_16bit_entry : reloc_16bit_no_str { sect_RelWord(&$1, 0); } | string { - uint8_t *output = (uint8_t *)malloc(strlen($1)); // Cannot be larger than that - size_t length = charmap_Convert($1, output); + std::vector output; - sect_AbsWordGroup(output, length); - free(output); + charmap_Convert($1, output); + sect_AbsWordGroup(output.data(), output.size()); } ; @@ -1375,12 +1375,10 @@ constlist_32bit_entry : relocexpr_no_str { sect_RelLong(&$1, 0); } | string { - // Charmaps cannot increase the length of a string - uint8_t *output = (uint8_t *)malloc(strlen($1)); - size_t length = charmap_Convert($1, output); + std::vector output; - sect_AbsLongGroup(output, length); - free(output); + charmap_Convert($1, output); + sect_AbsLongGroup(output.data(), output.size()); } ; @@ -1421,13 +1419,10 @@ reloc_16bit_no_str : relocexpr_no_str { relocexpr : relocexpr_no_str | string { - // Charmaps cannot increase the length of a string - uint8_t *output = (uint8_t *)malloc(strlen($1)); - uint32_t length = charmap_Convert($1, output); - uint32_t r = str2int2(output, length); + std::vector output; - free(output); - rpn_Number(&$$, r); + charmap_Convert($1, output); + rpn_Number(&$$, str2int2(output)); } ; diff --git a/src/util.cpp b/src/util.cpp index f7a192ac..6edfd47d 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include "util.hpp" @@ -50,7 +51,7 @@ char const *printChar(int c) return buf; } -size_t readUTF8Char(uint8_t *dest, char const *src) +size_t readUTF8Char(std::vector *dest, char const *src) { uint32_t state = 0; uint32_t codep; @@ -61,7 +62,7 @@ size_t readUTF8Char(uint8_t *dest, char const *src) return 0; if (dest) - dest[i] = src[i]; + dest->push_back(src[i]); i++; if (state == 0) diff --git a/test/asm/empty-strings.asm b/test/asm/empty-strings.asm new file mode 100644 index 00000000..c5cf44a8 --- /dev/null +++ b/test/asm/empty-strings.asm @@ -0,0 +1,6 @@ +SECTION "test", ROM0 +db "" +dw "" +dl "" +assert ("") == 0 +assert SIZEOF("test") == 0