Use std::vector for charmap output

This commit is contained in:
Rangi42
2024-02-26 13:07:51 -05:00
committed by Sylvie
parent 52ac98c294
commit a24df27cd8
6 changed files with 33 additions and 35 deletions

View File

@@ -4,6 +4,7 @@
#define RGBDS_ASM_CHARMAP_H
#include <stdint.h>
#include <vector>
#define DEFAULT_CHARMAP_NAME "main"
@@ -13,7 +14,7 @@ void charmap_Push(void);
void charmap_Pop(void);
void charmap_Add(char *mapping, uint8_t value);
bool charmap_HasChar(char const *input);
size_t charmap_Convert(char const *input, uint8_t *output);
size_t charmap_ConvertNext(char const **input, uint8_t **output);
void charmap_Convert(char const *input, std::vector<uint8_t> &output);
size_t charmap_ConvertNext(char const **input, std::vector<uint8_t> *output);
#endif // RGBDS_ASM_CHARMAP_H

View File

@@ -5,12 +5,13 @@
#include <stddef.h>
#include <stdint.h>
#include <vector>
char const *printChar(int c);
/*
* @return The number of bytes read, or 0 if invalid data was found
*/
size_t readUTF8Char(uint8_t *dest, char const *src);
size_t readUTF8Char(std::vector<uint8_t> *dest, char const *src);
#endif // RGBDS_UTIL_H

View File

@@ -139,17 +139,13 @@ bool charmap_HasChar(char const *input)
return charmap.nodes[nodeIdx].isTerminal;
}
size_t charmap_Convert(char const *input, uint8_t *output)
void charmap_Convert(char const *input, std::vector<uint8_t> &output)
{
uint8_t *start = output;
while (charmap_ConvertNext(&input, &output))
;
return output - start;
}
size_t charmap_ConvertNext(char const **input, uint8_t **output)
size_t charmap_ConvertNext(char const **input, std::vector<uint8_t> *output)
{
// The goal is to match the longest mapping possible.
// For that, advance through the trie with each character read.
@@ -181,22 +177,20 @@ size_t charmap_ConvertNext(char const **input, uint8_t **output)
if (matchIdx) { // A match was found, use it
if (output)
*(*output)++ = charmap.nodes[matchIdx].value;
output->push_back(charmap.nodes[matchIdx].value);
return 1;
} else if (**input) { // No match found, but there is some input left
int firstChar = **input;
// This will write the codepoint's value to `output`, little-endian
size_t codepointLen = readUTF8Char(output ? *output : NULL, *input);
size_t codepointLen = readUTF8Char(output, *input);
if (codepointLen == 0)
error("Input string is not valid UTF-8\n");
// OK because UTF-8 has no NUL in multi-byte chars
*input += codepointLen;
if (output)
*output += codepointLen;
// Warn if this character is not mapped but any others are
if (charmap.nodes.size() > 1)

View File

@@ -46,8 +46,10 @@ static void lowerstring(char *dest, char const *src)
*dest = '\0';
}
static uint32_t str2int2(uint8_t *s, uint32_t length)
static uint32_t str2int2(std::vector<uint8_t> const &s)
{
uint32_t length = s.size();
if (length > 4)
warning(WARNING_NUMERIC_STRING_1,
"Treating string as a number ignores first %" PRIu32 " character%s\n",
@@ -1343,11 +1345,10 @@ constlist_8bit_entry : reloc_8bit_no_str {
sect_RelByte(&$1, 0);
}
| string {
uint8_t *output = (uint8_t *)malloc(strlen($1)); // Cannot be larger than that
size_t length = charmap_Convert($1, output);
std::vector<uint8_t> output;
sect_AbsByteGroup(output, length);
free(output);
charmap_Convert($1, output);
sect_AbsByteGroup(output.data(), output.size());
}
;
@@ -1359,11 +1360,10 @@ constlist_16bit_entry : reloc_16bit_no_str {
sect_RelWord(&$1, 0);
}
| string {
uint8_t *output = (uint8_t *)malloc(strlen($1)); // Cannot be larger than that
size_t length = charmap_Convert($1, output);
std::vector<uint8_t> output;
sect_AbsWordGroup(output, length);
free(output);
charmap_Convert($1, output);
sect_AbsWordGroup(output.data(), output.size());
}
;
@@ -1375,12 +1375,10 @@ constlist_32bit_entry : relocexpr_no_str {
sect_RelLong(&$1, 0);
}
| string {
// Charmaps cannot increase the length of a string
uint8_t *output = (uint8_t *)malloc(strlen($1));
size_t length = charmap_Convert($1, output);
std::vector<uint8_t> output;
sect_AbsLongGroup(output, length);
free(output);
charmap_Convert($1, output);
sect_AbsLongGroup(output.data(), output.size());
}
;
@@ -1421,13 +1419,10 @@ reloc_16bit_no_str : relocexpr_no_str {
relocexpr : relocexpr_no_str
| string {
// Charmaps cannot increase the length of a string
uint8_t *output = (uint8_t *)malloc(strlen($1));
uint32_t length = charmap_Convert($1, output);
uint32_t r = str2int2(output, length);
std::vector<uint8_t> output;
free(output);
rpn_Number(&$$, r);
charmap_Convert($1, output);
rpn_Number(&$$, str2int2(output));
}
;

View File

@@ -3,6 +3,7 @@
#include <ctype.h>
#include <stdint.h>
#include <stdio.h>
#include <vector>
#include "util.hpp"
@@ -50,7 +51,7 @@ char const *printChar(int c)
return buf;
}
size_t readUTF8Char(uint8_t *dest, char const *src)
size_t readUTF8Char(std::vector<uint8_t> *dest, char const *src)
{
uint32_t state = 0;
uint32_t codep;
@@ -61,7 +62,7 @@ size_t readUTF8Char(uint8_t *dest, char const *src)
return 0;
if (dest)
dest[i] = src[i];
dest->push_back(src[i]);
i++;
if (state == 0)

View File

@@ -0,0 +1,6 @@
SECTION "test", ROM0
db ""
dw ""
dl ""
assert ("") == 0
assert SIZEOF("test") == 0