// SPDX-License-Identifier: MIT #include "asm/rpn.hpp" #include #include #include #include #include #include #include #include #include #include #include #include "helpers.hpp" // assume #include "linkdefs.hpp" #include "opmath.hpp" #include "asm/output.hpp" #include "asm/section.hpp" #include "asm/symbol.hpp" #include "asm/warning.hpp" using namespace std::literals; int32_t Expression::getConstVal() const { if (!isKnown()) { error("Expected constant expression: %s", std::get(data).c_str()); return 0; } return value(); } Symbol const *Expression::symbolOf() const { if (rpn.size() != 1 || rpn[0].command != RPN_SYM) { return nullptr; } return sym_FindScopedSymbol(std::get(rpn[0].data)); } bool Expression::isDiffConstant(Symbol const *sym) const { // Check if both expressions only refer to a single symbol Symbol const *sym1 = symbolOf(); if (!sym1 || !sym || sym1->type != SYM_LABEL || sym->type != SYM_LABEL) { return false; } Section const *sect1 = sym1->getSection(); Section const *sect2 = sym->getSection(); return sect1 && (sect1 == sect2); } void Expression::makeNumber(uint32_t value) { assume(rpn.empty()); data = static_cast(value); } void Expression::makeSymbol(InternedStr symName) { assume(rpn.empty()); if (Symbol *sym = sym_FindScopedSymbol(symName); sym_IsPC(sym) && !sect_GetSymbolSection()) { error("PC has no value outside of a section"); data = 0; } else if (sym && !sym->isNumeric() && !sym->isLabel()) { error("`%s` is not a numeric symbol", symName.c_str()); data = 0; } else if (!sym || !sym->isConstant()) { data = sym_IsPC(sym) ? "PC is not constant at assembly time" : (sym && sym->isDefined() ? "`"s + symName.str() + "` is not constant at assembly time" : "undefined symbol `"s + symName.str() + "`") + (sym_IsPurgedScoped(symName) ? "; it was purged" : ""); sym = sym_Ref(symName); rpn.emplace_back(RPN_SYM, sym->name); } else { data = static_cast(sym->getConstantValue()); } } void Expression::makeBankSymbol(InternedStr symName) { assume(rpn.empty()); if (Symbol const *sym = sym_FindScopedSymbol(symName); sym_IsPC(sym)) { // The @ symbol is treated differently. if (std::optional outputBank = sect_GetOutputBank(); !outputBank) { error("PC has no bank outside of a section"); data = 1; } else if (*outputBank == UINT32_MAX) { data = "Current section's bank is not known"; rpn.emplace_back(RPN_BANK_SELF); } else { data = static_cast(*outputBank); } } else if (sym && !sym->isLabel()) { error("`BANK` argument must be a label"); data = 1; } else { sym = sym_Ref(symName); assume(sym); // If the symbol didn't exist, it should have been created if (sym->getSection() && sym->getSection()->bank != UINT32_MAX) { // Symbol's section is known and bank is fixed data = static_cast(sym->getSection()->bank); } else { data = sym_IsPurgedScoped(symName) ? "`"s + symName.str() + "`'s bank is not known; it was purged" : "`"s + symName.str() + "`'s bank is not known"; rpn.emplace_back(RPN_BANK_SYM, sym->name); } } } void Expression::makeBankSection(std::string const §Name) { assume(rpn.empty()); if (Section *sect = sect_FindSectionByName(sectName); sect && sect->bank != UINT32_MAX) { data = static_cast(sect->bank); } else { data = "Section \""s + sectName + "\"'s bank is not known"; rpn.emplace_back(RPN_BANK_SECT, intern(sectName)); } } void Expression::makeSizeOfSection(std::string const §Name) { assume(rpn.empty()); if (Section *sect = sect_FindSectionByName(sectName); sect && sect->isSizeKnown()) { data = static_cast(sect->size); } else { data = "Section \""s + sectName + "\"'s size is not known"; rpn.emplace_back(RPN_SIZEOF_SECT, intern(sectName)); } } void Expression::makeStartOfSection(std::string const §Name) { assume(rpn.empty()); if (Section *sect = sect_FindSectionByName(sectName); sect && sect->org != UINT32_MAX) { data = static_cast(sect->org); } else { data = "Section \""s + sectName + "\"'s start is not known"; rpn.emplace_back(RPN_STARTOF_SECT, intern(sectName)); } } void Expression::makeSizeOfSectionType(SectionType type) { assume(rpn.empty()); data = "Section type's size is not known"; rpn.emplace_back(RPN_SIZEOF_SECTTYPE, static_cast(type)); } void Expression::makeStartOfSectionType(SectionType type) { assume(rpn.empty()); data = "Section type's start is not known"; rpn.emplace_back(RPN_STARTOF_SECTTYPE, static_cast(type)); } static bool tryConstZero(Expression const &lhs, Expression const &rhs) { Expression const &expr = lhs.isKnown() ? lhs : rhs; return expr.isKnown() && expr.value() == 0; } static bool tryConstNonzero(Expression const &lhs, Expression const &rhs) { Expression const &expr = lhs.isKnown() ? lhs : rhs; return expr.isKnown() && expr.value() != 0; } static bool tryConstLogNot(Expression const &expr) { Symbol const *sym = expr.symbolOf(); if (!sym || !sym->getSection() || !sym->isDefined()) { return false; } assume(sym->isNumeric()); Section const § = *sym->getSection(); int32_t unknownBits = (1 << 16) - (1 << sect.align); // `sym->getValue()` attempts to add the section's address, but that's `UINT32_MAX` // because the section is floating (otherwise we wouldn't be here) assume(sect.org == UINT32_MAX); int32_t symbolOfs = sym->getValue() + 1; int32_t knownBits = (symbolOfs + sect.alignOfs) & ~unknownBits; return knownBits != 0; } // Returns a constant LOW() from non-constant argument, or -1 if it cannot be computed. // This is possible if the argument is a symbol belonging to an `ALIGN[8]` section. static int32_t tryConstLow(Expression const &expr) { Symbol const *sym = expr.symbolOf(); if (!sym || !sym->getSection() || !sym->isDefined()) { return -1; } assume(sym->isNumeric()); // The low byte must not cover any unknown bits Section const § = *sym->getSection(); if (sect.align < 8) { return -1; } // `sym->getValue()` attempts to add the section's address, but that's `UINT32_MAX` // because the section is floating (otherwise we wouldn't be here) assume(sect.org == UINT32_MAX); int32_t symbolOfs = sym->getValue() + 1; return op_low(symbolOfs + sect.alignOfs); } // Returns a constant binary AND with one non-constant operand, or -1 if it cannot be computed. // This is possible if one operand is a symbol belonging to an `ALIGN[N]` section, and the other is // a constant that only keeps (some of) the lower N bits. static int32_t tryConstMask(Expression const &lhs, Expression const &rhs) { Symbol const *lhsSymbol = lhs.symbolOf(); Symbol const *rhsSymbol = lhsSymbol ? nullptr : rhs.symbolOf(); bool lhsIsSymbol = lhsSymbol && lhsSymbol->getSection(); bool rhsIsSymbol = rhsSymbol && rhsSymbol->getSection(); if (!lhsIsSymbol && !rhsIsSymbol) { return -1; } // If the lhs isn't a symbol, try again the other way around Symbol const &sym = lhsIsSymbol ? *lhsSymbol : *rhsSymbol; Expression const &expr = lhsIsSymbol ? rhs : lhs; // Opposite side of `sym` if (!sym.isDefined() || !expr.isKnown()) { return -1; } assume(sym.isNumeric()); // We can now safely use `expr.value()` int32_t mask = expr.value(); // The mask must not cover any unknown bits Section const § = *sym.getSection(); if (int32_t unknownBits = (1 << 16) - (1 << sect.align); (unknownBits & mask) != 0) { return -1; } // `sym.getValue()` attempts to add the section's address, but that's `UINT32_MAX` // because the section is floating (otherwise we wouldn't be here) assume(sect.org == UINT32_MAX); int32_t symbolOfs = sym.getValue() + 1; return (symbolOfs + sect.alignOfs) & mask; } void Expression::makeUnaryOp(RPNCommand op, Expression &&src) { assume(rpn.empty()); // First, check if the expression is known if (src.isKnown()) { // If the expressions is known, just compute the value switch (int32_t val = src.value(); op) { case RPN_NEG: data = op_neg(val); break; case RPN_NOT: data = ~val; break; case RPN_LOGNOT: data = !val; break; case RPN_HIGH: data = op_high(val); break; case RPN_LOW: data = op_low(val); break; case RPN_BITWIDTH: data = op_bitwidth(val); break; case RPN_TZCOUNT: data = op_tzcount(val); break; // LCOV_EXCL_START default: // `makeUnaryOp` should never be called with a non-unary operator! unreachable_(); } // LCOV_EXCL_STOP } else if (op == RPN_LOGNOT && tryConstLogNot(src)) { data = 0; } else if (int32_t constVal; op == RPN_LOW && (constVal = tryConstLow(src)) != -1) { data = constVal; } else { // If it's not known, just reuse its RPN vector and append the operator data = std::move(src.data); std::swap(rpn, src.rpn); rpn.emplace_back(op); } } void Expression::makeBinaryOp(RPNCommand op, Expression &&src1, Expression const &src2) { assume(rpn.empty()); // First, check if the expressions are known if (src1.isKnown() && src2.isKnown()) { // If both expressions are known, just compute the value int32_t lval = src1.value(), rval = src2.value(); uint32_t ulval = static_cast(lval), urval = static_cast(rval); switch (op) { case RPN_LOGOR: data = lval || rval; break; case RPN_LOGAND: data = lval && rval; break; case RPN_LOGEQ: data = lval == rval; break; case RPN_LOGGT: data = lval > rval; break; case RPN_LOGLT: data = lval < rval; break; case RPN_LOGGE: data = lval >= rval; break; case RPN_LOGLE: data = lval <= rval; break; case RPN_LOGNE: data = lval != rval; break; case RPN_ADD: data = static_cast(ulval + urval); break; case RPN_SUB: data = static_cast(ulval - urval); break; case RPN_XOR: data = lval ^ rval; break; case RPN_OR: data = lval | rval; break; case RPN_AND: data = lval & rval; break; case RPN_SHL: if (rval < 0) { warning(WARNING_SHIFT_AMOUNT, "Shifting left by negative amount %" PRId32, rval); } if (rval >= 32) { warning(WARNING_SHIFT_AMOUNT, "Shifting left by large amount %" PRId32, rval); } data = op_shift_left(lval, rval); break; case RPN_SHR: if (lval < 0) { warning(WARNING_SHIFT, "Shifting right negative value %" PRId32, lval); } if (rval < 0) { warning(WARNING_SHIFT_AMOUNT, "Shifting right by negative amount %" PRId32, rval); } if (rval >= 32) { warning(WARNING_SHIFT_AMOUNT, "Shifting right by large amount %" PRId32, rval); } data = op_shift_right(lval, rval); break; case RPN_USHR: if (rval < 0) { warning(WARNING_SHIFT_AMOUNT, "Shifting right by negative amount %" PRId32, rval); } if (rval >= 32) { warning(WARNING_SHIFT_AMOUNT, "Shifting right by large amount %" PRId32, rval); } data = op_shift_right_unsigned(lval, rval); break; case RPN_MUL: data = static_cast(ulval * urval); break; case RPN_DIV: if (rval == 0) { fatal("Division by zero"); } if (lval == INT32_MIN && rval == -1) { warning( WARNING_DIV, "Division of %" PRId32 " by -1 yields %" PRId32, INT32_MIN, INT32_MIN ); data = INT32_MIN; } else { data = op_divide(lval, rval); } break; case RPN_MOD: if (rval == 0) { fatal("Modulo by zero"); } if (lval == INT32_MIN && rval == -1) { data = 0; } else { data = op_modulo(lval, rval); } break; case RPN_EXP: if (rval < 0) { fatal("Exponentiation by negative power"); } data = op_exponent(lval, rval); break; // LCOV_EXCL_START default: // `makeBinaryOp` should never be called with a non-binary operator! unreachable_(); } // LCOV_EXCL_STOP } else if (op == RPN_SUB && src1.isDiffConstant(src2.symbolOf())) { data = src1.symbolOf()->getValue() - src2.symbolOf()->getValue(); } else if ((op == RPN_LOGAND || op == RPN_AND) && tryConstZero(src1, src2)) { data = 0; } else if (op == RPN_LOGOR && tryConstNonzero(src1, src2)) { data = 1; } else if (int32_t constVal; op == RPN_AND && (constVal = tryConstMask(src1, src2)) != -1) { data = constVal; } else { // If it's not known, start computing the RPN expression // Convert the left-hand expression if it's constant if (src1.isKnown()) { uint32_t lval = src1.value(); // Use the other expression's un-const reason data = std::move(src2.data); rpn.emplace_back(RPN_CONST, lval); } else { // Otherwise just reuse its RPN vector data = std::move(src1.data); std::swap(rpn, src1.rpn); } // Now, merge the right expression into the left one if (src2.isKnown()) { // If the right expression is constant, append its value uint32_t rval = src2.value(); rpn.emplace_back(RPN_CONST, rval); } else { // Otherwise just extend with its RPN vector rpn.insert(rpn.end(), RANGE(src2.rpn)); } // Append the operator rpn.emplace_back(op); } } void Expression::addCheckHRAM() { if (!isKnown()) { rpn.emplace_back(RPN_HRAM); } else if (int32_t val = value(); val >= 0xFF00 && val <= 0xFFFF) { // That range is valid; only keep the lower byte data = val & 0xFF; } else { error("Source address $%" PRIx32 " not between $FF00 to $FFFF", val); } } void Expression::addCheckRST() { if (!isKnown()) { rpn.emplace_back(RPN_RST); } else if (int32_t val = value(); val & ~0x38) { // A valid RST address must be masked with 0x38 error("Invalid address $%" PRIx32 " for `RST`", val); } } void Expression::addCheckBitIndex(uint8_t mask) { assume((mask & 0xC0) != 0x00); // The high two bits must correspond to BIT, RES, or SET if (!isKnown()) { rpn.emplace_back(RPN_BIT_INDEX, mask); } else if (int32_t val = value(); val & ~0x07) { // A valid bit index must be masked with 0x07 static char const *instructions[4] = {"instruction", "`BIT`", "`RES`", "`SET`"}; error("Invalid bit index %" PRId32 " for %s", val, instructions[mask >> 6]); } } // Checks that an RPN expression's value fits within N bits (signed or unsigned) void Expression::checkNBit(uint8_t n) const { if (isKnown()) { ::checkNBit(value(), n, nullptr); } } bool checkNBit(int32_t v, uint8_t n, char const *name) { assume(n != 0); // That doesn't make sense assume(n < CHAR_BIT * sizeof(int)); // Otherwise `1 << n` is UB if (v < -(1 << n) || v >= 1 << n) { warning( WARNING_TRUNCATION_1, "%s must be %u-bit%s", name ? name : "Expression", n, n == 8 && !name ? "; use `LOW()` to force 8-bit" : "" ); return false; } if (v < -(1 << (n - 1))) { warning( WARNING_TRUNCATION_2, "%s must be %u-bit%s", name ? name : "Expression", n, n == 8 && !name ? "; use `LOW()` to force 8-bit" : "" ); return false; } return true; } void Expression::encode(std::vector &buffer) const { assume(buffer.empty()); if (isKnown()) { // If the RPN expression's value is known, output a constant directly uint32_t val = value(); buffer.resize(5); buffer[0] = RPN_CONST; buffer[1] = val & 0xFF; buffer[2] = val >> 8; buffer[3] = val >> 16; buffer[4] = val >> 24; } else { // If the RPN expression's value is not known, serialize its RPN values buffer.reserve(rpn.size() * 2); // Rough estimate of the serialized size for (RPNValue const &val : rpn) { val.appendEncoded(buffer); } } } RPNValue::RPNValue(RPNCommand cmd) : command(cmd), data(std::monostate{}) { assume( cmd != RPN_SIZEOF_SECTTYPE && cmd != RPN_STARTOF_SECTTYPE && cmd != RPN_BIT_INDEX && cmd != RPN_CONST && cmd != RPN_SYM && cmd != RPN_BANK_SYM && cmd != RPN_BANK_SECT && cmd != RPN_SIZEOF_SECT && cmd != RPN_STARTOF_SECT ); } RPNValue::RPNValue(RPNCommand cmd, uint8_t val) : command(cmd), data(val) { assume(cmd == RPN_SIZEOF_SECTTYPE || cmd == RPN_STARTOF_SECTTYPE || cmd == RPN_BIT_INDEX); } RPNValue::RPNValue(RPNCommand cmd, uint32_t val) : command(cmd), data(val) { assume(cmd == RPN_CONST); } RPNValue::RPNValue(RPNCommand cmd, InternedStr name) : command(cmd), data(name) { assume( cmd == RPN_SYM || cmd == RPN_BANK_SYM || cmd == RPN_BANK_SECT || cmd == RPN_SIZEOF_SECT || cmd == RPN_STARTOF_SECT ); } void RPNValue::appendEncoded(std::vector &buffer) const { // Every command starts with its own ID buffer.push_back(command); switch (command) { case RPN_CONST: { // The command ID is followed by a four-byte integer assume(std::holds_alternative(data)); uint32_t val = std::get(data); buffer.push_back(val & 0xFF); buffer.push_back(val >> 8); buffer.push_back(val >> 16); buffer.push_back(val >> 24); break; } case RPN_SYM: case RPN_BANK_SYM: { // The command ID is followed by a four-byte symbol ID assume(std::holds_alternative(data)); // The symbol name is always written expanded Symbol *sym = sym_FindExactSymbol(std::get(data)); out_RegisterSymbol(*sym); // Ensure that `sym->ID` is set buffer.push_back(sym->ID & 0xFF); buffer.push_back(sym->ID >> 8); buffer.push_back(sym->ID >> 16); buffer.push_back(sym->ID >> 24); break; } case RPN_BANK_SECT: case RPN_SIZEOF_SECT: case RPN_STARTOF_SECT: { // The command ID is followed by a NUL-terminated section name string assume(std::holds_alternative(data)); std::string const &name = std::get(data).str(); buffer.reserve(buffer.size() + name.length() + 1); buffer.insert(buffer.end(), RANGE(name)); buffer.push_back('\0'); break; } case RPN_SIZEOF_SECTTYPE: case RPN_STARTOF_SECTTYPE: case RPN_BIT_INDEX: // The command ID is followed by a byte value assume(std::holds_alternative(data)); buffer.push_back(std::get(data)); break; default: // Other command IDs are not followed by anything assume(std::holds_alternative(data)); break; } }