mirror of
https://github.com/gbdev/rgbds.git
synced 2025-11-20 18:22:07 +00:00
Refactor readUTF8Char into charmap_ConvertNext
This commit is contained in:
@@ -3,13 +3,6 @@
|
|||||||
#ifndef RGBDS_UTIL_HPP
|
#ifndef RGBDS_UTIL_HPP
|
||||||
#define RGBDS_UTIL_HPP
|
#define RGBDS_UTIL_HPP
|
||||||
|
|
||||||
#include <stddef.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
char const *printChar(int c);
|
char const *printChar(int c);
|
||||||
|
|
||||||
// @return The number of bytes read, or 0 if invalid data was found
|
|
||||||
size_t readUTF8Char(std::vector<int32_t> *dest, char const *src);
|
|
||||||
|
|
||||||
#endif // RGBDS_UTIL_HPP
|
#endif // RGBDS_UTIL_HPP
|
||||||
|
|||||||
@@ -11,6 +11,7 @@
|
|||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
|
#include "extern/utf8decoder.hpp"
|
||||||
#include "helpers.hpp"
|
#include "helpers.hpp"
|
||||||
#include "util.hpp"
|
#include "util.hpp"
|
||||||
|
|
||||||
@@ -224,16 +225,30 @@ size_t charmap_ConvertNext(std::string_view &input, std::vector<int32_t> *output
|
|||||||
|
|
||||||
matchLen = value.size();
|
matchLen = value.size();
|
||||||
} else if (inputIdx < input.length()) { // No match found, but there is some input left
|
} else if (inputIdx < input.length()) { // No match found, but there is some input left
|
||||||
int firstChar = input[inputIdx];
|
size_t codepointLen = 0;
|
||||||
// This will write the codepoint's value to `output`, little-endian
|
// This will write the codepoint's value to `output`, little-endian
|
||||||
size_t codepointLen = readUTF8Char(output, input.data() + inputIdx);
|
for (uint32_t state = 0, codepoint = 0;;) {
|
||||||
|
if (decode(&state, &codepoint, input[inputIdx + codepointLen]) == 1) {
|
||||||
|
codepointLen = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (output) {
|
||||||
|
output->push_back(input[inputIdx + codepointLen]);
|
||||||
|
}
|
||||||
|
codepointLen++;
|
||||||
|
|
||||||
|
if (state == 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (codepointLen == 0) {
|
if (codepointLen == 0) {
|
||||||
error("Input string is not valid UTF-8\n");
|
error("Input string is not valid UTF-8\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Warn if this character is not mapped but any others are
|
// Warn if this character is not mapped but any others are
|
||||||
if (charmap.nodes.size() > 1) {
|
if (int firstChar = input[inputIdx]; charmap.nodes.size() > 1) {
|
||||||
warning(WARNING_UNMAPPED_CHAR_1, "Unmapped character %s\n", printChar(firstChar));
|
warning(WARNING_UNMAPPED_CHAR_1, "Unmapped character %s\n", printChar(firstChar));
|
||||||
} else if (charmap.name != DEFAULT_CHARMAP_NAME) {
|
} else if (charmap.name != DEFAULT_CHARMAP_NAME) {
|
||||||
warning(
|
warning(
|
||||||
|
|||||||
@@ -2527,10 +2527,10 @@ static size_t strlenUTF8(std::string const &str) {
|
|||||||
size_t len = 0;
|
size_t len = 0;
|
||||||
uint32_t state = 0;
|
uint32_t state = 0;
|
||||||
|
|
||||||
for (uint32_t codep = 0; *ptr; ptr++) {
|
for (uint32_t codepoint = 0; *ptr; ptr++) {
|
||||||
uint8_t byte = *ptr;
|
uint8_t byte = *ptr;
|
||||||
|
|
||||||
switch (decode(&state, &codep, byte)) {
|
switch (decode(&state, &codepoint, byte)) {
|
||||||
case 1:
|
case 1:
|
||||||
errorInvalidUTF8Byte(byte, "STRLEN");
|
errorInvalidUTF8Byte(byte, "STRLEN");
|
||||||
state = 0;
|
state = 0;
|
||||||
@@ -2553,12 +2553,12 @@ static std::string strsubUTF8(std::string const &str, uint32_t pos, uint32_t len
|
|||||||
char const *ptr = str.c_str();
|
char const *ptr = str.c_str();
|
||||||
size_t index = 0;
|
size_t index = 0;
|
||||||
uint32_t state = 0;
|
uint32_t state = 0;
|
||||||
uint32_t codep = 0;
|
uint32_t codepoint = 0;
|
||||||
uint32_t curPos = 1; // RGBASM strings are 1-indexed!
|
uint32_t curPos = 1; // RGBASM strings are 1-indexed!
|
||||||
|
|
||||||
// Advance to starting position in source string.
|
// Advance to starting position in source string.
|
||||||
while (ptr[index] && curPos < pos) {
|
while (ptr[index] && curPos < pos) {
|
||||||
switch (decode(&state, &codep, ptr[index])) {
|
switch (decode(&state, &codepoint, ptr[index])) {
|
||||||
case 1:
|
case 1:
|
||||||
errorInvalidUTF8Byte(ptr[index], "STRSUB");
|
errorInvalidUTF8Byte(ptr[index], "STRSUB");
|
||||||
state = 0;
|
state = 0;
|
||||||
@@ -2583,7 +2583,7 @@ static std::string strsubUTF8(std::string const &str, uint32_t pos, uint32_t len
|
|||||||
|
|
||||||
// Compute the result length in bytes.
|
// Compute the result length in bytes.
|
||||||
while (ptr[index] && curLen < len) {
|
while (ptr[index] && curLen < len) {
|
||||||
switch (decode(&state, &codep, ptr[index])) {
|
switch (decode(&state, &codepoint, ptr[index])) {
|
||||||
case 1:
|
case 1:
|
||||||
errorInvalidUTF8Byte(ptr[index], "STRSUB");
|
errorInvalidUTF8Byte(ptr[index], "STRSUB");
|
||||||
state = 0;
|
state = 0;
|
||||||
|
|||||||
23
src/util.cpp
23
src/util.cpp
@@ -5,9 +5,6 @@
|
|||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "extern/utf8decoder.hpp"
|
|
||||||
|
|
||||||
char const *printChar(int c) {
|
char const *printChar(int c) {
|
||||||
// "'A'" + '\0': 4 bytes
|
// "'A'" + '\0': 4 bytes
|
||||||
@@ -53,23 +50,3 @@ char const *printChar(int c) {
|
|||||||
buf[4] = '\0';
|
buf[4] = '\0';
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t readUTF8Char(std::vector<int32_t> *dest, char const *src) {
|
|
||||||
uint32_t state = 0, codepoint;
|
|
||||||
size_t i = 0;
|
|
||||||
|
|
||||||
for (;;) {
|
|
||||||
if (decode(&state, &codepoint, src[i]) == 1) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (dest) {
|
|
||||||
dest->push_back(src[i]);
|
|
||||||
}
|
|
||||||
i++;
|
|
||||||
|
|
||||||
if (state == 0) {
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
Reference in New Issue
Block a user