Use a vector of RPN values (#1820)

This is instead of byte-encoding them in a different way than the actual object output's RPN buffer
This commit is contained in:
Rangi
2025-09-03 14:42:37 -04:00
committed by GitHub
parent ee1db0a582
commit 590d113e94
3 changed files with 74 additions and 168 deletions

View File

@@ -12,15 +12,18 @@
struct Symbol; struct Symbol;
struct RPNValue {
RPNCommand command;
std::variant<std::monostate, uint8_t, uint32_t, std::string> data;
};
struct Expression { struct Expression {
std::variant< std::variant<
int32_t, // If the expression's value is known, it's here int32_t, // If the expression's value is known, it's here
std::string // Why the expression is not known, if it isn't std::string // Why the expression is not known, if it isn't
> >
data = 0; data = 0;
bool isSymbol = false; // Whether the expression represents a symbol suitable for const diffing std::vector<RPNValue> rpn{}; // Values to be serialized into the RPN expression
std::vector<uint8_t> rpn{}; // Bytes serializing the RPN expression
uint32_t rpnPatchSize = 0; // Size the expression will take in the object file
bool isKnown() const { return std::holds_alternative<int32_t>(data); } bool isKnown() const { return std::holds_alternative<int32_t>(data); }
int32_t value() const { return std::get<int32_t>(data); } int32_t value() const { return std::get<int32_t>(data); }
@@ -45,11 +48,6 @@ struct Expression {
void makeCheckBitIndex(uint8_t mask); void makeCheckBitIndex(uint8_t mask);
void checkNBit(uint8_t n) const; void checkNBit(uint8_t n) const;
private:
void clear();
uint8_t *reserveSpace(uint32_t size);
uint8_t *reserveSpace(uint32_t size, uint32_t patchSize);
}; };
bool checkNBit(int32_t v, uint8_t n, char const *name); bool checkNBit(int32_t v, uint8_t n, char const *name);

View File

@@ -12,6 +12,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <string> #include <string>
#include <variant>
#include <vector> #include <vector>
#include "helpers.hpp" // assume, Defer #include "helpers.hpp" // assume, Defer
@@ -154,46 +155,37 @@ static void initPatch(Patch &patch, uint32_t type, Expression const &expr, uint3
return; return;
} }
// If the RPN expr's value is not known, its RPN patch buffer size is known // If the RPN expr's value is not known, serialize its RPN values
patch.rpn.resize(expr.rpnPatchSize); patch.rpn.clear();
patch.rpn.reserve(expr.rpn.size() * 2); // Rough estimate of the serialized size
for (size_t exprIdx = 0, patchIdx = 0; exprIdx < expr.rpn.size();) { for (RPNValue const &value : expr.rpn) {
// Every command starts with its own ID // Every command starts with its own ID
assume(patchIdx < patch.rpn.size()); patch.rpn.push_back(value.command);
uint8_t cmd = expr.rpn[exprIdx++];
patch.rpn[patchIdx++] = cmd;
switch (cmd) { switch (value.command) {
case RPN_CONST: case RPN_CONST: {
// The command ID is followed by a four-byte integer // The command ID is followed by a four-byte integer
assume(exprIdx + 3 < expr.rpn.size()); assume(std::holds_alternative<uint32_t>(value.data));
assume(patchIdx + 3 < patch.rpn.size()); uint32_t v = std::get<uint32_t>(value.data);
patch.rpn[patchIdx++] = expr.rpn[exprIdx++]; patch.rpn.push_back(v & 0xFF);
patch.rpn[patchIdx++] = expr.rpn[exprIdx++]; patch.rpn.push_back(v >> 8);
patch.rpn[patchIdx++] = expr.rpn[exprIdx++]; patch.rpn.push_back(v >> 16);
patch.rpn[patchIdx++] = expr.rpn[exprIdx++]; patch.rpn.push_back(v >> 24);
break; break;
}
case RPN_SYM: case RPN_SYM:
case RPN_BANK_SYM: { case RPN_BANK_SYM: {
// The command ID is followed by a four-byte symbol ID // The command ID is followed by a four-byte symbol ID
std::string symName; assume(std::holds_alternative<std::string>(value.data));
for (;;) {
assume(exprIdx < expr.rpn.size());
uint8_t c = expr.rpn[exprIdx++];
if (!c) {
break;
}
symName += c;
}
// The symbol name is always written expanded // The symbol name is always written expanded
Symbol *sym = sym_FindExactSymbol(symName); Symbol *sym = sym_FindExactSymbol(std::get<std::string>(value.data));
registerUnregisteredSymbol(*sym); // Ensure that `sym->ID` is set registerUnregisteredSymbol(*sym); // Ensure that `sym->ID` is set
assume(patchIdx + 3 < patch.rpn.size()); patch.rpn.push_back(sym->ID & 0xFF);
patch.rpn[patchIdx++] = sym->ID & 0xFF; patch.rpn.push_back(sym->ID >> 8);
patch.rpn[patchIdx++] = sym->ID >> 8; patch.rpn.push_back(sym->ID >> 16);
patch.rpn[patchIdx++] = sym->ID >> 16; patch.rpn.push_back(sym->ID >> 24);
patch.rpn[patchIdx++] = sym->ID >> 24;
break; break;
} }
@@ -201,15 +193,11 @@ static void initPatch(Patch &patch, uint32_t type, Expression const &expr, uint3
case RPN_SIZEOF_SECT: case RPN_SIZEOF_SECT:
case RPN_STARTOF_SECT: { case RPN_STARTOF_SECT: {
// The command ID is followed by a NUL-terminated section name string // The command ID is followed by a NUL-terminated section name string
for (;;) { assume(std::holds_alternative<std::string>(value.data));
assume(exprIdx < expr.rpn.size()); for (char c : std::get<std::string>(value.data)) {
assume(patchIdx < patch.rpn.size()); patch.rpn.push_back(c);
uint8_t b = expr.rpn[exprIdx++];
patch.rpn[patchIdx++] = b;
if (!b) {
break;
}
} }
patch.rpn.push_back('\0');
break; break;
} }
@@ -217,9 +205,13 @@ static void initPatch(Patch &patch, uint32_t type, Expression const &expr, uint3
case RPN_STARTOF_SECTTYPE: case RPN_STARTOF_SECTTYPE:
case RPN_BIT_INDEX: case RPN_BIT_INDEX:
// The command ID is followed by a byte value // The command ID is followed by a byte value
assume(exprIdx < expr.rpn.size()); assume(std::holds_alternative<uint8_t>(value.data));
assume(patchIdx < patch.rpn.size()); patch.rpn.push_back(std::get<uint8_t>(value.data));
patch.rpn[patchIdx++] = expr.rpn[exprIdx++]; break;
default:
// Other command IDs are not followed by anything
assume(std::holds_alternative<std::monostate>(value.data));
break; break;
} }
} }

View File

@@ -12,6 +12,7 @@
#include <string> #include <string>
#include <string_view> #include <string_view>
#include <utility> #include <utility>
#include <variant>
#include "helpers.hpp" // assume #include "helpers.hpp" // assume
#include "linkdefs.hpp" #include "linkdefs.hpp"
@@ -24,24 +25,6 @@
using namespace std::literals; using namespace std::literals;
void Expression::clear() {
data = 0;
isSymbol = false;
rpn.clear();
rpnPatchSize = 0;
}
uint8_t *Expression::reserveSpace(uint32_t size) {
return reserveSpace(size, size);
}
uint8_t *Expression::reserveSpace(uint32_t size, uint32_t patchSize) {
rpnPatchSize += patchSize;
size_t curSize = rpn.size();
rpn.resize(curSize + size);
return &rpn[curSize];
}
int32_t Expression::getConstVal() const { int32_t Expression::getConstVal() const {
if (!isKnown()) { if (!isKnown()) {
error("Expected constant expression: %s", std::get<std::string>(data).c_str()); error("Expected constant expression: %s", std::get<std::string>(data).c_str());
@@ -51,10 +34,10 @@ int32_t Expression::getConstVal() const {
} }
Symbol const *Expression::symbolOf() const { Symbol const *Expression::symbolOf() const {
if (!isSymbol) { if (rpn.size() != 1 || rpn[0].command != RPN_SYM) {
return nullptr; return nullptr;
} }
return sym_FindScopedSymbol(reinterpret_cast<char const *>(&rpn[1])); return sym_FindScopedSymbol(std::get<std::string>(rpn[0].data));
} }
bool Expression::isDiffConstant(Symbol const *sym) const { bool Expression::isDiffConstant(Symbol const *sym) const {
@@ -71,12 +54,12 @@ bool Expression::isDiffConstant(Symbol const *sym) const {
} }
void Expression::makeNumber(uint32_t value) { void Expression::makeNumber(uint32_t value) {
clear(); rpn.clear();
data = static_cast<int32_t>(value); data = static_cast<int32_t>(value);
} }
void Expression::makeSymbol(std::string const &symName) { void Expression::makeSymbol(std::string const &symName) {
clear(); rpn.clear();
if (Symbol *sym = sym_FindScopedSymbol(symName); sym_IsPC(sym) && !sect_GetSymbolSection()) { if (Symbol *sym = sym_FindScopedSymbol(symName); sym_IsPC(sym) && !sect_GetSymbolSection()) {
error("PC has no value outside of a section"); error("PC has no value outside of a section");
data = 0; data = 0;
@@ -84,28 +67,20 @@ void Expression::makeSymbol(std::string const &symName) {
error("`%s` is not a numeric symbol", symName.c_str()); error("`%s` is not a numeric symbol", symName.c_str());
data = 0; data = 0;
} else if (!sym || !sym->isConstant()) { } else if (!sym || !sym->isConstant()) {
isSymbol = true;
data = sym_IsPC(sym) ? "PC is not constant at assembly time" data = sym_IsPC(sym) ? "PC is not constant at assembly time"
: (sym && sym->isDefined() : (sym && sym->isDefined()
? "`"s + symName + "` is not constant at assembly time" ? "`"s + symName + "` is not constant at assembly time"
: "undefined symbol `"s + symName + "`") : "undefined symbol `"s + symName + "`")
+ (sym_IsPurgedScoped(symName) ? "; it was purged" : ""); + (sym_IsPurgedScoped(symName) ? "; it was purged" : "");
sym = sym_Ref(symName); sym = sym_Ref(symName);
rpn.push_back({.command = RPN_SYM, .data = sym->name});
size_t nameLen = sym->name.length() + 1; // Don't forget NUL!
// 1-byte opcode + 4-byte symbol ID
uint8_t *ptr = reserveSpace(nameLen + 1, 5);
*ptr++ = RPN_SYM;
memcpy(ptr, sym->name.c_str(), nameLen);
} else { } else {
data = static_cast<int32_t>(sym->getConstantValue()); data = static_cast<int32_t>(sym->getConstantValue());
} }
} }
void Expression::makeBankSymbol(std::string const &symName) { void Expression::makeBankSymbol(std::string const &symName) {
clear(); rpn.clear();
if (Symbol const *sym = sym_FindScopedSymbol(symName); sym_IsPC(sym)) { if (Symbol const *sym = sym_FindScopedSymbol(symName); sym_IsPC(sym)) {
// The @ symbol is treated differently. // The @ symbol is treated differently.
if (std::optional<uint32_t> outputBank = sect_GetOutputBank(); !outputBank) { if (std::optional<uint32_t> outputBank = sect_GetOutputBank(); !outputBank) {
@@ -113,19 +88,16 @@ void Expression::makeBankSymbol(std::string const &symName) {
data = 1; data = 1;
} else if (*outputBank == UINT32_MAX) { } else if (*outputBank == UINT32_MAX) {
data = "Current section's bank is not known"; data = "Current section's bank is not known";
rpn.push_back({.command = RPN_BANK_SELF, .data = std::monostate{}});
*reserveSpace(1) = RPN_BANK_SELF;
} else { } else {
data = static_cast<int32_t>(*outputBank); data = static_cast<int32_t>(*outputBank);
} }
return;
} else if (sym && !sym->isLabel()) { } else if (sym && !sym->isLabel()) {
error("`BANK` argument must be a label"); error("`BANK` argument must be a label");
data = 1; data = 1;
} else { } else {
sym = sym_Ref(symName); sym = sym_Ref(symName);
assume(sym); // If the symbol didn't exist, it should have been created assume(sym); // If the symbol didn't exist, it should have been created
if (sym->getSection() && sym->getSection()->bank != UINT32_MAX) { if (sym->getSection() && sym->getSection()->bank != UINT32_MAX) {
// Symbol's section is known and bank is fixed // Symbol's section is known and bank is fixed
data = static_cast<int32_t>(sym->getSection()->bank); data = static_cast<int32_t>(sym->getSection()->bank);
@@ -133,78 +105,51 @@ void Expression::makeBankSymbol(std::string const &symName) {
data = sym_IsPurgedScoped(symName) data = sym_IsPurgedScoped(symName)
? "`"s + symName + "`'s bank is not known; it was purged" ? "`"s + symName + "`'s bank is not known; it was purged"
: "`"s + symName + "`'s bank is not known"; : "`"s + symName + "`'s bank is not known";
rpn.push_back({.command = RPN_BANK_SYM, .data = sym->name});
size_t nameLen = sym->name.length() + 1; // Room for NUL!
// 1-byte opcode + 4-byte sect ID
uint8_t *ptr = reserveSpace(nameLen + 1, 5);
*ptr++ = RPN_BANK_SYM;
memcpy(ptr, sym->name.c_str(), nameLen);
} }
} }
} }
void Expression::makeBankSection(std::string const &sectName) { void Expression::makeBankSection(std::string const &sectName) {
clear(); rpn.clear();
if (Section *sect = sect_FindSectionByName(sectName); sect && sect->bank != UINT32_MAX) { if (Section *sect = sect_FindSectionByName(sectName); sect && sect->bank != UINT32_MAX) {
data = static_cast<int32_t>(sect->bank); data = static_cast<int32_t>(sect->bank);
} else { } else {
data = "Section \""s + sectName + "\"'s bank is not known"; data = "Section \""s + sectName + "\"'s bank is not known";
rpn.push_back({.command = RPN_BANK_SECT, .data = sectName});
size_t nameLen = sectName.length() + 1; // Room for NUL!
uint8_t *ptr = reserveSpace(nameLen + 1);
*ptr++ = RPN_BANK_SECT;
memcpy(ptr, sectName.data(), nameLen);
} }
} }
void Expression::makeSizeOfSection(std::string const &sectName) { void Expression::makeSizeOfSection(std::string const &sectName) {
clear(); rpn.clear();
if (Section *sect = sect_FindSectionByName(sectName); sect && sect->isSizeKnown()) { if (Section *sect = sect_FindSectionByName(sectName); sect && sect->isSizeKnown()) {
data = static_cast<int32_t>(sect->size); data = static_cast<int32_t>(sect->size);
} else { } else {
data = "Section \""s + sectName + "\"'s size is not known"; data = "Section \""s + sectName + "\"'s size is not known";
rpn.push_back({.command = RPN_SIZEOF_SECT, .data = sectName});
size_t nameLen = sectName.length() + 1; // Room for NUL!
uint8_t *ptr = reserveSpace(nameLen + 1);
*ptr++ = RPN_SIZEOF_SECT;
memcpy(ptr, sectName.data(), nameLen);
} }
} }
void Expression::makeStartOfSection(std::string const &sectName) { void Expression::makeStartOfSection(std::string const &sectName) {
clear(); rpn.clear();
if (Section *sect = sect_FindSectionByName(sectName); sect && sect->org != UINT32_MAX) { if (Section *sect = sect_FindSectionByName(sectName); sect && sect->org != UINT32_MAX) {
data = static_cast<int32_t>(sect->org); data = static_cast<int32_t>(sect->org);
} else { } else {
data = "Section \""s + sectName + "\"'s start is not known"; data = "Section \""s + sectName + "\"'s start is not known";
rpn.push_back({.command = RPN_STARTOF_SECT, .data = sectName});
size_t nameLen = sectName.length() + 1; // Room for NUL!
uint8_t *ptr = reserveSpace(nameLen + 1);
*ptr++ = RPN_STARTOF_SECT;
memcpy(ptr, sectName.data(), nameLen);
} }
} }
void Expression::makeSizeOfSectionType(SectionType type) { void Expression::makeSizeOfSectionType(SectionType type) {
clear(); rpn.clear();
data = "Section type's size is not known"; data = "Section type's size is not known";
rpn.push_back({.command = RPN_SIZEOF_SECTTYPE, .data = static_cast<uint8_t>(type)});
uint8_t *ptr = reserveSpace(2);
*ptr++ = RPN_SIZEOF_SECTTYPE;
*ptr = type;
} }
void Expression::makeStartOfSectionType(SectionType type) { void Expression::makeStartOfSectionType(SectionType type) {
clear(); rpn.clear();
data = "Section type's start is not known"; data = "Section type's start is not known";
rpn.push_back({.command = RPN_STARTOF_SECTTYPE, .data = static_cast<uint8_t>(type)});
uint8_t *ptr = reserveSpace(2);
*ptr++ = RPN_STARTOF_SECTTYPE;
*ptr = type;
} }
static bool tryConstZero(Expression const &lhs, Expression const &rhs) { static bool tryConstZero(Expression const &lhs, Expression const &rhs) {
@@ -302,7 +247,7 @@ static int32_t tryConstMask(Expression const &lhs, Expression const &rhs) {
} }
void Expression::makeUnaryOp(RPNCommand op, Expression &&src) { void Expression::makeUnaryOp(RPNCommand op, Expression &&src) {
clear(); rpn.clear();
// First, check if the expression is known // First, check if the expression is known
if (src.isKnown()) { if (src.isKnown()) {
// If the expressions is known, just compute the value // If the expressions is known, just compute the value
@@ -339,16 +284,15 @@ void Expression::makeUnaryOp(RPNCommand op, Expression &&src) {
} else if (int32_t constVal; op == RPN_LOW && (constVal = tryConstLow(src)) != -1) { } else if (int32_t constVal; op == RPN_LOW && (constVal = tryConstLow(src)) != -1) {
data = constVal; data = constVal;
} else { } else {
// If it's not known, just reuse its RPN buffer and append the operator // If it's not known, just reuse its RPN vector and append the operator
rpnPatchSize = src.rpnPatchSize;
std::swap(rpn, src.rpn);
data = std::move(src.data); data = std::move(src.data);
*reserveSpace(1) = op; std::swap(rpn, src.rpn);
rpn.push_back({.command = op, .data = std::monostate{}});
} }
} }
void Expression::makeBinaryOp(RPNCommand op, Expression &&src1, Expression const &src2) { void Expression::makeBinaryOp(RPNCommand op, Expression &&src1, Expression const &src2) {
clear(); rpn.clear();
// First, check if the expressions are known // First, check if the expressions are known
if (src1.isKnown() && src2.isKnown()) { if (src1.isKnown() && src2.isKnown()) {
// If both expressions are known, just compute the value // If both expressions are known, just compute the value
@@ -480,57 +424,32 @@ void Expression::makeBinaryOp(RPNCommand op, Expression &&src1, Expression const
// Convert the left-hand expression if it's constant // Convert the left-hand expression if it's constant
if (src1.isKnown()) { if (src1.isKnown()) {
uint32_t lval = src1.value(); uint32_t lval = src1.value();
uint8_t bytes[] = {
RPN_CONST,
static_cast<uint8_t>(lval),
static_cast<uint8_t>(lval >> 8),
static_cast<uint8_t>(lval >> 16),
static_cast<uint8_t>(lval >> 24),
};
rpn.clear();
rpnPatchSize = 0;
memcpy(reserveSpace(sizeof(bytes)), bytes, sizeof(bytes));
// Use the other expression's un-const reason // Use the other expression's un-const reason
data = std::move(src2.data); data = std::move(src2.data);
rpn.push_back({.command = RPN_CONST, .data = lval});
} else { } else {
// Otherwise just reuse its RPN buffer // Otherwise just reuse its RPN vector
rpnPatchSize = src1.rpnPatchSize;
std::swap(rpn, src1.rpn);
data = std::move(src1.data); data = std::move(src1.data);
std::swap(rpn, src1.rpn);
} }
// Now, merge the right expression into the left one // Now, merge the right expression into the left one
if (src2.isKnown()) { if (src2.isKnown()) {
// If the right expression is constant, append a shim instead // If the right expression is constant, append its value
uint32_t rval = src2.value(); uint32_t rval = src2.value();
uint8_t bytes[] = { rpn.push_back({.command = RPN_CONST, .data = rval});
RPN_CONST,
static_cast<uint8_t>(rval),
static_cast<uint8_t>(rval >> 8),
static_cast<uint8_t>(rval >> 16),
static_cast<uint8_t>(rval >> 24),
};
uint8_t *ptr = reserveSpace(sizeof(bytes) + 1, sizeof(bytes) + 1);
memcpy(ptr, bytes, sizeof(bytes));
ptr[sizeof(bytes)] = op;
} else { } else {
// Copy the right RPN and append the operator // Otherwise just extend with its RPN vector
uint32_t rightRpnSize = src2.rpn.size(); rpn.insert(rpn.end(), RANGE(src2.rpn));
uint8_t *ptr = reserveSpace(rightRpnSize + 1, src2.rpnPatchSize + 1);
if (rightRpnSize > 0) {
// If `rightRpnSize == 0`, then `memcpy(ptr, nullptr, rightRpnSize)` would be UB
memcpy(ptr, src2.rpn.data(), rightRpnSize);
}
ptr[rightRpnSize] = op;
} }
// Append the operator
rpn.push_back({.command = op, .data = std::monostate{}});
} }
} }
void Expression::makeCheckHRAM() { void Expression::makeCheckHRAM() {
isSymbol = false;
if (!isKnown()) { if (!isKnown()) {
*reserveSpace(1) = RPN_HRAM; rpn.push_back({.command = RPN_HRAM, .data = std::monostate{}});
} else if (int32_t val = value(); val >= 0xFF00 && val <= 0xFFFF) { } else if (int32_t val = value(); val >= 0xFF00 && val <= 0xFFFF) {
// That range is valid; only keep the lower byte // That range is valid; only keep the lower byte
data = val & 0xFF; data = val & 0xFF;
@@ -541,7 +460,7 @@ void Expression::makeCheckHRAM() {
void Expression::makeCheckRST() { void Expression::makeCheckRST() {
if (!isKnown()) { if (!isKnown()) {
*reserveSpace(1) = RPN_RST; rpn.push_back({.command = RPN_RST, .data = std::monostate{}});
} else if (int32_t val = value(); val & ~0x38) { } else if (int32_t val = value(); val & ~0x38) {
// A valid RST address must be masked with 0x38 // A valid RST address must be masked with 0x38
error("Invalid address $%" PRIx32 " for `RST`", val); error("Invalid address $%" PRIx32 " for `RST`", val);
@@ -550,11 +469,8 @@ void Expression::makeCheckRST() {
void Expression::makeCheckBitIndex(uint8_t mask) { void Expression::makeCheckBitIndex(uint8_t mask) {
assume((mask & 0xC0) != 0x00); // The high two bits must correspond to BIT, RES, or SET assume((mask & 0xC0) != 0x00); // The high two bits must correspond to BIT, RES, or SET
if (!isKnown()) { if (!isKnown()) {
uint8_t *ptr = reserveSpace(2); rpn.push_back({.command = RPN_BIT_INDEX, .data = mask});
*ptr++ = RPN_BIT_INDEX;
*ptr = mask;
} else if (int32_t val = value(); val & ~0x07) { } else if (int32_t val = value(); val & ~0x07) {
// A valid bit index must be masked with 0x07 // A valid bit index must be masked with 0x07
static char const *instructions[4] = {"instruction", "`BIT`", "`RES`", "`SET`"}; static char const *instructions[4] = {"instruction", "`BIT`", "`RES`", "`SET`"};