mirror of
https://github.com/gbdev/rgbds.git
synced 2025-11-27 05:22:07 +00:00
Implement multi-value charmaps (#1429)
This commit is contained in:
@@ -8,6 +8,7 @@
|
||||
#include <string.h>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "helpers.hpp"
|
||||
#include "util.hpp"
|
||||
|
||||
#include "asm/warning.hpp"
|
||||
@@ -16,10 +17,11 @@
|
||||
// Essentially a tree, where each nodes stores a single character's worth of info:
|
||||
// whether there exists a mapping that ends at the current character,
|
||||
struct CharmapNode {
|
||||
bool isTerminal; // Whether there exists a mapping that ends here
|
||||
uint8_t value; // If the above is true, its corresponding value
|
||||
std::vector<int32_t> value; // The mapped value, if there exists a mapping that ends here
|
||||
// This MUST be indexes and not pointers, because pointers get invalidated by reallocation!
|
||||
size_t next[256]; // Indexes of where to go next, 0 = nowhere
|
||||
size_t next[256]; // Indexes of where to go next, 0 = nowhere
|
||||
|
||||
bool isTerminal() const { return !value.empty(); }
|
||||
};
|
||||
|
||||
struct Charmap {
|
||||
@@ -84,7 +86,7 @@ void charmap_Pop() {
|
||||
charmapStack.pop();
|
||||
}
|
||||
|
||||
void charmap_Add(std::string const &mapping, uint8_t value) {
|
||||
void charmap_Add(std::string const &mapping, std::vector<int32_t> &&value) {
|
||||
Charmap &charmap = *currentCharmap;
|
||||
size_t nodeIdx = 0;
|
||||
|
||||
@@ -106,11 +108,10 @@ void charmap_Add(std::string const &mapping, uint8_t value) {
|
||||
|
||||
CharmapNode &node = charmap.nodes[nodeIdx];
|
||||
|
||||
if (node.isTerminal)
|
||||
if (node.isTerminal())
|
||||
warning(WARNING_CHARMAP_REDEF, "Overriding charmap mapping\n");
|
||||
|
||||
node.isTerminal = true;
|
||||
node.value = value;
|
||||
std::swap(node.value, value);
|
||||
}
|
||||
|
||||
bool charmap_HasChar(std::string const &input) {
|
||||
@@ -124,17 +125,17 @@ bool charmap_HasChar(std::string const &input) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return charmap.nodes[nodeIdx].isTerminal;
|
||||
return charmap.nodes[nodeIdx].isTerminal();
|
||||
}
|
||||
|
||||
std::vector<uint8_t> charmap_Convert(std::string const &input) {
|
||||
std::vector<uint8_t> output;
|
||||
std::vector<int32_t> charmap_Convert(std::string const &input) {
|
||||
std::vector<int32_t> output;
|
||||
for (std::string_view inputView = input; charmap_ConvertNext(inputView, &output);)
|
||||
;
|
||||
return output;
|
||||
}
|
||||
|
||||
size_t charmap_ConvertNext(std::string_view &input, std::vector<uint8_t> *output) {
|
||||
size_t charmap_ConvertNext(std::string_view &input, std::vector<int32_t> *output) {
|
||||
// The goal is to match the longest mapping possible.
|
||||
// For that, advance through the trie with each character read.
|
||||
// If that would lead to a dead end, rewind characters until the last match, and output.
|
||||
@@ -152,7 +153,7 @@ size_t charmap_ConvertNext(std::string_view &input, std::vector<uint8_t> *output
|
||||
|
||||
inputIdx++; // Consume that char
|
||||
|
||||
if (charmap.nodes[nodeIdx].isTerminal) {
|
||||
if (charmap.nodes[nodeIdx].isTerminal()) {
|
||||
matchIdx = nodeIdx; // This node matches, register it
|
||||
rewindDistance = 0; // If no longer match is found, rewind here
|
||||
} else {
|
||||
@@ -166,11 +167,12 @@ size_t charmap_ConvertNext(std::string_view &input, std::vector<uint8_t> *output
|
||||
|
||||
size_t matchLen = 0;
|
||||
if (matchIdx) { // A match was found, use it
|
||||
std::vector<int32_t> const &value = charmap.nodes[matchIdx].value;
|
||||
|
||||
if (output)
|
||||
output->push_back(charmap.nodes[matchIdx].value);
|
||||
|
||||
matchLen = 1;
|
||||
output->insert(output->end(), RANGE(value));
|
||||
|
||||
matchLen = value.size();
|
||||
} else if (inputIdx < input.length()) { // No match found, but there is some input left
|
||||
int firstChar = input[inputIdx];
|
||||
// This will write the codepoint's value to `output`, little-endian
|
||||
|
||||
@@ -70,7 +70,7 @@
|
||||
|
||||
yy::parser::symbol_type yylex(); // Provided by lexer.cpp
|
||||
|
||||
static uint32_t str2int2(std::vector<uint8_t> const &s);
|
||||
static uint32_t str2int2(std::vector<int32_t> const &s);
|
||||
static void errorInvalidUTF8Byte(uint8_t byte, char const *functionName);
|
||||
static size_t strlenUTF8(std::string const &str);
|
||||
static std::string strsubUTF8(std::string const &str, uint32_t pos, uint32_t len);
|
||||
@@ -105,7 +105,6 @@
|
||||
%type <Expression> relocexpr_no_str
|
||||
%type <int32_t> const
|
||||
%type <int32_t> const_no_str
|
||||
%type <int32_t> const_8bit
|
||||
%type <int32_t> uconst
|
||||
%type <int32_t> rs_uconst
|
||||
%type <int32_t> shift_const
|
||||
@@ -264,6 +263,7 @@
|
||||
|
||||
%type <std::vector<Expression>> ds_args
|
||||
%type <std::vector<std::string>> purge_args
|
||||
%type <std::vector<int32_t>> charmap_args
|
||||
%type <ForArgs> for_args
|
||||
|
||||
%token Z80_ADC "adc" Z80_ADD "add" Z80_AND "and"
|
||||
@@ -1083,8 +1083,18 @@ incbin:
|
||||
;
|
||||
|
||||
charmap:
|
||||
POP_CHARMAP string COMMA const_8bit {
|
||||
charmap_Add($2, (uint8_t)$4);
|
||||
POP_CHARMAP string COMMA charmap_args trailing_comma {
|
||||
charmap_Add($2, std::move($4));
|
||||
}
|
||||
;
|
||||
|
||||
charmap_args:
|
||||
const {
|
||||
$$.push_back(std::move($1));
|
||||
}
|
||||
| charmap_args COMMA const {
|
||||
$$ = std::move($1);
|
||||
$$.push_back(std::move($3));
|
||||
}
|
||||
;
|
||||
|
||||
@@ -1170,7 +1180,7 @@ constlist_8bit_entry:
|
||||
sect_RelByte($1, 0);
|
||||
}
|
||||
| string {
|
||||
std::vector<uint8_t> output = charmap_Convert($1);
|
||||
std::vector<int32_t> output = charmap_Convert($1);
|
||||
sect_AbsByteString(output);
|
||||
}
|
||||
;
|
||||
@@ -1185,7 +1195,7 @@ constlist_16bit_entry:
|
||||
sect_RelWord($1, 0);
|
||||
}
|
||||
| string {
|
||||
std::vector<uint8_t> output = charmap_Convert($1);
|
||||
std::vector<int32_t> output = charmap_Convert($1);
|
||||
sect_AbsWordString(output);
|
||||
}
|
||||
;
|
||||
@@ -1200,7 +1210,7 @@ constlist_32bit_entry:
|
||||
sect_RelLong($1, 0);
|
||||
}
|
||||
| string {
|
||||
std::vector<uint8_t> output = charmap_Convert($1);
|
||||
std::vector<int32_t> output = charmap_Convert($1);
|
||||
sect_AbsLongString(output);
|
||||
}
|
||||
;
|
||||
@@ -1250,7 +1260,7 @@ relocexpr:
|
||||
$$ = std::move($1);
|
||||
}
|
||||
| string {
|
||||
std::vector<uint8_t> output = charmap_Convert($1);
|
||||
std::vector<int32_t> output = charmap_Convert($1);
|
||||
$$.makeNumber(str2int2(output));
|
||||
}
|
||||
;
|
||||
@@ -1465,12 +1475,6 @@ const_no_str:
|
||||
}
|
||||
;
|
||||
|
||||
const_8bit:
|
||||
reloc_8bit {
|
||||
$$ = $1.getConstVal();
|
||||
}
|
||||
;
|
||||
|
||||
opt_q_arg:
|
||||
%empty {
|
||||
$$ = fix_Precision();
|
||||
@@ -2374,26 +2378,37 @@ void yy::parser::error(std::string const &str) {
|
||||
::error("%s\n", str.c_str());
|
||||
}
|
||||
|
||||
static uint32_t str2int2(std::vector<uint8_t> const &s) {
|
||||
static uint32_t str2int2(std::vector<int32_t> const &s) {
|
||||
uint32_t length = s.size();
|
||||
|
||||
if (length == 1) {
|
||||
// The string is a single character with a single value,
|
||||
// which can be used directly as a number.
|
||||
return (uint32_t)s[0];
|
||||
}
|
||||
|
||||
for (int32_t v : s) {
|
||||
if (!checkNBit(v, 8, "All character units"))
|
||||
break;
|
||||
}
|
||||
|
||||
if (length > 4)
|
||||
warning(
|
||||
WARNING_NUMERIC_STRING_1,
|
||||
"Treating string as a number ignores first %" PRIu32 " character%s\n",
|
||||
"Treating string as a number ignores first %" PRIu32 " byte%s\n",
|
||||
length - 4,
|
||||
length == 5 ? "" : "s"
|
||||
);
|
||||
else if (length > 1)
|
||||
warning(
|
||||
WARNING_NUMERIC_STRING_2, "Treating %" PRIu32 "-character string as a number\n", length
|
||||
WARNING_NUMERIC_STRING_2, "Treating %" PRIu32 "-byte string as a number\n", length
|
||||
);
|
||||
|
||||
uint32_t r = 0;
|
||||
|
||||
for (uint32_t i = length < 4 ? 0 : length - 4; i < length; i++) {
|
||||
r <<= 8;
|
||||
r |= s[i];
|
||||
r |= static_cast<uint8_t>(s[i]);
|
||||
}
|
||||
|
||||
return r;
|
||||
|
||||
@@ -516,13 +516,22 @@ void Expression::makeCheckRST() {
|
||||
|
||||
// Checks that an RPN expression's value fits within N bits (signed or unsigned)
|
||||
void Expression::checkNBit(uint8_t n) const {
|
||||
if (isKnown())
|
||||
::checkNBit(value(), n, "Expression");
|
||||
}
|
||||
|
||||
bool checkNBit(int32_t v, uint8_t n, char const *name) {
|
||||
assume(n != 0); // That doesn't make sense
|
||||
assume(n < CHAR_BIT * sizeof(int)); // Otherwise `1 << n` is UB
|
||||
|
||||
if (isKnown()) {
|
||||
if (int32_t val = value(); val < -(1 << n) || val >= 1 << n)
|
||||
warning(WARNING_TRUNCATION_1, "Expression must be %u-bit\n", n);
|
||||
else if (val < -(1 << (n - 1)))
|
||||
warning(WARNING_TRUNCATION_2, "Expression must be %u-bit\n", n);
|
||||
if (v < -(1 << n) || v >= 1 << n) {
|
||||
warning(WARNING_TRUNCATION_1, "%s must be %u-bit\n", name, n);
|
||||
return false;
|
||||
}
|
||||
if (v < -(1 << (n - 1))) {
|
||||
warning(WARNING_TRUNCATION_2, "%s must be %u-bit\n", name, n);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -671,34 +671,44 @@ void sect_AbsByte(uint8_t b) {
|
||||
writebyte(b);
|
||||
}
|
||||
|
||||
void sect_AbsByteString(std::vector<uint8_t> const &s) {
|
||||
void sect_AbsByteString(std::vector<int32_t> const &s) {
|
||||
if (!checkcodesection())
|
||||
return;
|
||||
if (!reserveSpace(s.size()))
|
||||
return;
|
||||
|
||||
for (uint8_t v : s)
|
||||
writebyte(v);
|
||||
for (int32_t v : s) {
|
||||
if (!checkNBit(v, 8, "All character units"))
|
||||
break;
|
||||
}
|
||||
|
||||
for (int32_t v : s)
|
||||
writebyte(static_cast<uint8_t>(v));
|
||||
}
|
||||
|
||||
void sect_AbsWordString(std::vector<uint8_t> const &s) {
|
||||
void sect_AbsWordString(std::vector<int32_t> const &s) {
|
||||
if (!checkcodesection())
|
||||
return;
|
||||
if (!reserveSpace(s.size() * 2))
|
||||
return;
|
||||
|
||||
for (uint8_t v : s)
|
||||
writeword(v);
|
||||
for (int32_t v : s) {
|
||||
if (!checkNBit(v, 16, "All character units"))
|
||||
break;
|
||||
}
|
||||
|
||||
for (int32_t v : s)
|
||||
writeword(static_cast<uint16_t>(v));
|
||||
}
|
||||
|
||||
void sect_AbsLongString(std::vector<uint8_t> const &s) {
|
||||
void sect_AbsLongString(std::vector<int32_t> const &s) {
|
||||
if (!checkcodesection())
|
||||
return;
|
||||
if (!reserveSpace(s.size() * 4))
|
||||
return;
|
||||
|
||||
for (uint8_t v : s)
|
||||
writelong(v);
|
||||
for (int32_t v : s)
|
||||
writelong(static_cast<uint32_t>(v));
|
||||
}
|
||||
|
||||
// Skip this many bytes
|
||||
|
||||
Reference in New Issue
Block a user