rgbds/src/asm/rpn.cpp

// SPDX-License-Identifier: MIT

#include "asm/rpn.hpp"

#include <inttypes.h>
#include <limits.h>
#include <optional>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <string_view>
#include <utility>
#include <variant>
#include <vector>

#include "helpers.hpp" // assume
#include "linkdefs.hpp"
#include "opmath.hpp"

#include "asm/output.hpp"
#include "asm/section.hpp"
#include "asm/symbol.hpp"
#include "asm/warning.hpp"

using namespace std::literals;

int32_t Expression::getConstVal() const {
	if (!isKnown()) {
		error("Expected constant expression: %s", std::get<std::string>(data).c_str());
		return 0;
	}
	return value();
}

Symbol const *Expression::symbolOf() const {
	if (rpn.size() != 1 || rpn[0].command != RPN_SYM) {
		return nullptr;
	}
	return sym_FindScopedSymbol(std::get<InternedStr>(rpn[0].data));
}

bool Expression::isDiffConstant(Symbol const *sym) const {
	// Check if both expressions only refer to a single symbol
	Symbol const *sym1 = symbolOf();

	if (!sym1 || !sym || sym1->type != SYM_LABEL || sym->type != SYM_LABEL) {
		return false;
	}

	Section const *sect1 = sym1->getSection();
	Section const *sect2 = sym->getSection();
	return sect1 && (sect1 == sect2);
}

void Expression::makeNumber(uint32_t value) {
	assume(rpn.empty());
	data = static_cast<int32_t>(value);
}

void Expression::makeSymbol(InternedStr symName) {
	assume(rpn.empty());
	if (Symbol *sym = sym_FindScopedSymbol(symName); sym_IsPC(sym) && !sect_GetSymbolSection()) {
		error("PC has no value outside of a section");
		data = 0;
	} else if (sym && !sym->isNumeric() && !sym->isLabel()) {
		error("`%s` is not a numeric symbol", symName.c_str());
		data = 0;
	} else if (!sym || !sym->isConstant()) {
		data = sym_IsPC(sym) ? "PC is not constant at assembly time"
		                     : (sym && sym->isDefined()
		                            ? "`"s + symName.str() + "` is not constant at assembly time"
		                            : "undefined symbol `"s + symName.str() + "`")
		                           + (sym_IsPurgedScoped(symName) ? "; it was purged" : "");
		sym = sym_Ref(symName);
		rpn.emplace_back(RPN_SYM, sym->name);
	} else {
		data = static_cast<int32_t>(sym->getConstantValue());
	}
}

void Expression::makeBankSymbol(InternedStr symName) {
	assume(rpn.empty());
	if (Symbol const *sym = sym_FindScopedSymbol(symName); sym_IsPC(sym)) {
		// The @ symbol is treated differently.
		if (std::optional<uint32_t> outputBank = sect_GetOutputBank(); !outputBank) {
			error("PC has no bank outside of a section");
			data = 1;
		} else if (*outputBank == UINT32_MAX) {
			data = "Current section's bank is not known";
			rpn.emplace_back(RPN_BANK_SELF);
		} else {
			data = static_cast<int32_t>(*outputBank);
		}
	} else if (sym && !sym->isLabel()) {
		error("`BANK` argument must be a label");
		data = 1;
	} else {
		sym = sym_Ref(symName);
		assume(sym); // If the symbol didn't exist, it should have been created
		if (sym->getSection() && sym->getSection()->bank != UINT32_MAX) {
			// Symbol's section is known and bank is fixed
			data = static_cast<int32_t>(sym->getSection()->bank);
		} else {
			data = sym_IsPurgedScoped(symName)
			           ? "`"s + symName.str() + "`'s bank is not known; it was purged"
			           : "`"s + symName.str() + "`'s bank is not known";
			rpn.emplace_back(RPN_BANK_SYM, sym->name);
		}
	}
}

void Expression::makeBankSection(std::string const &sectName) {
	assume(rpn.empty());
	if (Section *sect = sect_FindSectionByName(sectName); sect && sect->bank != UINT32_MAX) {
		data = static_cast<int32_t>(sect->bank);
	} else {
		data = "Section \""s + sectName + "\"'s bank is not known";
		rpn.emplace_back(RPN_BANK_SECT, intern(sectName));
	}
}

void Expression::makeSizeOfSection(std::string const &sectName) {
	assume(rpn.empty());
	if (Section *sect = sect_FindSectionByName(sectName); sect && sect->isSizeKnown()) {
		data = static_cast<int32_t>(sect->size);
	} else {
		data = "Section \""s + sectName + "\"'s size is not known";
		rpn.emplace_back(RPN_SIZEOF_SECT, intern(sectName));
	}
}

void Expression::makeStartOfSection(std::string const &sectName) {
	assume(rpn.empty());
	if (Section *sect = sect_FindSectionByName(sectName); sect && sect->org != UINT32_MAX) {
		data = static_cast<int32_t>(sect->org);
	} else {
		data = "Section \""s + sectName + "\"'s start is not known";
		rpn.emplace_back(RPN_STARTOF_SECT, intern(sectName));
	}
}

void Expression::makeSizeOfSectionType(SectionType type) {
	assume(rpn.empty());
	data = "Section type's size is not known";
	rpn.emplace_back(RPN_SIZEOF_SECTTYPE, static_cast<uint8_t>(type));
}

void Expression::makeStartOfSectionType(SectionType type) {
	assume(rpn.empty());
	data = "Section type's start is not known";
	rpn.emplace_back(RPN_STARTOF_SECTTYPE, static_cast<uint8_t>(type));
}

static bool tryConstZero(Expression const &lhs, Expression const &rhs) {
	Expression const &expr = lhs.isKnown() ? lhs : rhs;
	return expr.isKnown() && expr.value() == 0;
}

static bool tryConstNonzero(Expression const &lhs, Expression const &rhs) {
	Expression const &expr = lhs.isKnown() ? lhs : rhs;
	return expr.isKnown() && expr.value() != 0;
}

static bool tryConstLogNot(Expression const &expr) {
	Symbol const *sym = expr.symbolOf();
	if (!sym || !sym->getSection() || !sym->isDefined()) {
		return false;
	}

	assume(sym->isNumeric());

	Section const &sect = *sym->getSection();
	int32_t unknownBits = (1 << 16) - (1 << sect.align);

	// `sym->getValue()` attempts to add the section's address, but that's `UINT32_MAX`
	// because the section is floating (otherwise we wouldn't be here)
	assume(sect.org == UINT32_MAX);
	int32_t symbolOfs = sym->getValue() + 1;

	int32_t knownBits = (symbolOfs + sect.alignOfs) & ~unknownBits;
	return knownBits != 0;
}

// Returns a constant LOW() from non-constant argument, or -1 if it cannot be computed.
// This is possible if the argument is a symbol belonging to an `ALIGN[8]` section.
static int32_t tryConstLow(Expression const &expr) {
	Symbol const *sym = expr.symbolOf();
	if (!sym || !sym->getSection() || !sym->isDefined()) {
		return -1;
	}

	assume(sym->isNumeric());

	// The low byte must not cover any unknown bits
	Section const &sect = *sym->getSection();
	if (sect.align < 8) {
		return -1;
	}

	// `sym->getValue()` attempts to add the section's address, but that's `UINT32_MAX`
	// because the section is floating (otherwise we wouldn't be here)
	assume(sect.org == UINT32_MAX);
	int32_t symbolOfs = sym->getValue() + 1;

	return op_low(symbolOfs + sect.alignOfs);
}

// Returns a constant binary AND with one non-constant operand, or -1 if it cannot be computed.
// This is possible if one operand is a symbol belonging to an `ALIGN[N]` section, and the other is
// a constant that only keeps (some of) the lower N bits.
static int32_t tryConstMask(Expression const &lhs, Expression const &rhs) {
	Symbol const *lhsSymbol = lhs.symbolOf();
	Symbol const *rhsSymbol = lhsSymbol ? nullptr : rhs.symbolOf();
	bool lhsIsSymbol = lhsSymbol && lhsSymbol->getSection();
	bool rhsIsSymbol = rhsSymbol && rhsSymbol->getSection();

	if (!lhsIsSymbol && !rhsIsSymbol) {
		return -1;
	}

	// If the lhs isn't a symbol, try again the other way around
	Symbol const &sym = lhsIsSymbol ? *lhsSymbol : *rhsSymbol;
	Expression const &expr = lhsIsSymbol ? rhs : lhs; // Opposite side of `sym`

	if (!sym.isDefined() || !expr.isKnown()) {
		return -1;
	}

	assume(sym.isNumeric());

	// We can now safely use `expr.value()`
	int32_t mask = expr.value();

	// The mask must not cover any unknown bits
	Section const &sect = *sym.getSection();
	if (int32_t unknownBits = (1 << 16) - (1 << sect.align); (unknownBits & mask) != 0) {
		return -1;
	}

	// `sym.getValue()` attempts to add the section's address, but that's `UINT32_MAX`
	// because the section is floating (otherwise we wouldn't be here)
	assume(sect.org == UINT32_MAX);
	int32_t symbolOfs = sym.getValue() + 1;

	return (symbolOfs + sect.alignOfs) & mask;
}

void Expression::makeUnaryOp(RPNCommand op, Expression &&src) {
	assume(rpn.empty());
	// First, check if the expression is known
	if (src.isKnown()) {
		// If the expressions is known, just compute the value
		switch (int32_t val = src.value(); op) {
		case RPN_NEG:
			data = op_neg(val);
			break;
		case RPN_NOT:
			data = ~val;
			break;
		case RPN_LOGNOT:
			data = !val;
			break;
		case RPN_HIGH:
			data = op_high(val);
			break;
		case RPN_LOW:
			data = op_low(val);
			break;
		case RPN_BITWIDTH:
			data = op_bitwidth(val);
			break;
		case RPN_TZCOUNT:
			data = op_tzcount(val);
			break;
		// LCOV_EXCL_START
		default:
			// `makeUnaryOp` should never be called with a non-unary operator!
			unreachable_();
		}
		// LCOV_EXCL_STOP
	} else if (op == RPN_LOGNOT && tryConstLogNot(src)) {
		data = 0;
	} else if (int32_t constVal; op == RPN_LOW && (constVal = tryConstLow(src)) != -1) {
		data = constVal;
	} else {
		// If it's not known, just reuse its RPN vector and append the operator
		data = std::move(src.data);
		std::swap(rpn, src.rpn);
		rpn.emplace_back(op);
	}
}

void Expression::makeBinaryOp(RPNCommand op, Expression &&src1, Expression const &src2) {
	assume(rpn.empty());
	// First, check if the expressions are known
	if (src1.isKnown() && src2.isKnown()) {
		// If both expressions are known, just compute the value
		int32_t lval = src1.value(), rval = src2.value();
		uint32_t ulval = static_cast<uint32_t>(lval), urval = static_cast<uint32_t>(rval);

		switch (op) {
		case RPN_LOGOR:
			data = lval || rval;
			break;
		case RPN_LOGAND:
			data = lval && rval;
			break;
		case RPN_LOGEQ:
			data = lval == rval;
			break;
		case RPN_LOGGT:
			data = lval > rval;
			break;
		case RPN_LOGLT:
			data = lval < rval;
			break;
		case RPN_LOGGE:
			data = lval >= rval;
			break;
		case RPN_LOGLE:
			data = lval <= rval;
			break;
		case RPN_LOGNE:
			data = lval != rval;
			break;
		case RPN_ADD:
			data = static_cast<int32_t>(ulval + urval);
			break;
		case RPN_SUB:
			data = static_cast<int32_t>(ulval - urval);
			break;
		case RPN_XOR:
			data = lval ^ rval;
			break;
		case RPN_OR:
			data = lval | rval;
			break;
		case RPN_AND:
			data = lval & rval;
			break;
		case RPN_SHL:
			if (rval < 0) {
				warning(WARNING_SHIFT_AMOUNT, "Shifting left by negative amount %" PRId32, rval);
			}
			if (rval >= 32) {
				warning(WARNING_SHIFT_AMOUNT, "Shifting left by large amount %" PRId32, rval);
			}
			data = op_shift_left(lval, rval);
			break;
		case RPN_SHR:
			if (lval < 0) {
				warning(WARNING_SHIFT, "Shifting right negative value %" PRId32, lval);
			}
			if (rval < 0) {
				warning(WARNING_SHIFT_AMOUNT, "Shifting right by negative amount %" PRId32, rval);
			}
			if (rval >= 32) {
				warning(WARNING_SHIFT_AMOUNT, "Shifting right by large amount %" PRId32, rval);
			}
			data = op_shift_right(lval, rval);
			break;
		case RPN_USHR:
			if (rval < 0) {
				warning(WARNING_SHIFT_AMOUNT, "Shifting right by negative amount %" PRId32, rval);
			}
			if (rval >= 32) {
				warning(WARNING_SHIFT_AMOUNT, "Shifting right by large amount %" PRId32, rval);
			}
			data = op_shift_right_unsigned(lval, rval);
			break;
		case RPN_MUL:
			data = static_cast<int32_t>(ulval * urval);
			break;
		case RPN_DIV:
			if (rval == 0) {
				fatal("Division by zero");
			}
			if (lval == INT32_MIN && rval == -1) {
				warning(
				    WARNING_DIV,
				    "Division of %" PRId32 " by -1 yields %" PRId32,
				    INT32_MIN,
				    INT32_MIN
				);
				data = INT32_MIN;
			} else {
				data = op_divide(lval, rval);
			}
			break;
		case RPN_MOD:
			if (rval == 0) {
				fatal("Modulo by zero");
			}
			if (lval == INT32_MIN && rval == -1) {
				data = 0;
			} else {
				data = op_modulo(lval, rval);
			}
			break;
		case RPN_EXP:
			if (rval < 0) {
				fatal("Exponentiation by negative power");
			}
			data = op_exponent(lval, rval);
			break;
		// LCOV_EXCL_START
		default:
			// `makeBinaryOp` should never be called with a non-binary operator!
			unreachable_();
		}
		// LCOV_EXCL_STOP
	} else if (op == RPN_SUB && src1.isDiffConstant(src2.symbolOf())) {
		data = src1.symbolOf()->getValue() - src2.symbolOf()->getValue();
	} else if ((op == RPN_LOGAND || op == RPN_AND) && tryConstZero(src1, src2)) {
		data = 0;
	} else if (op == RPN_LOGOR && tryConstNonzero(src1, src2)) {
		data = 1;
	} else if (int32_t constVal; op == RPN_AND && (constVal = tryConstMask(src1, src2)) != -1) {
		data = constVal;
	} else {
		// If it's not known, start computing the RPN expression

		// Convert the left-hand expression if it's constant
		if (src1.isKnown()) {
			uint32_t lval = src1.value();
			// Use the other expression's un-const reason
			data = std::move(src2.data);
			rpn.emplace_back(RPN_CONST, lval);
		} else {
			// Otherwise just reuse its RPN vector
			data = std::move(src1.data);
			std::swap(rpn, src1.rpn);
		}

		// Now, merge the right expression into the left one
		if (src2.isKnown()) {
			// If the right expression is constant, append its value
			uint32_t rval = src2.value();
			rpn.emplace_back(RPN_CONST, rval);
		} else {
			// Otherwise just extend with its RPN vector
			rpn.insert(rpn.end(), RANGE(src2.rpn));
		}
		// Append the operator
		rpn.emplace_back(op);
	}
}

void Expression::addCheckHRAM() {
	if (!isKnown()) {
		rpn.emplace_back(RPN_HRAM);
	} else if (int32_t val = value(); val >= 0xFF00 && val <= 0xFFFF) {
		// That range is valid; only keep the lower byte
		data = val & 0xFF;
	} else {
		error("Source address $%" PRIx32 " not between $FF00 to $FFFF", val);
	}
}

void Expression::addCheckRST() {
	if (!isKnown()) {
		rpn.emplace_back(RPN_RST);
	} else if (int32_t val = value(); val & ~0x38) {
		// A valid RST address must be masked with 0x38
		error("Invalid address $%" PRIx32 " for `RST`", val);
	}
}

void Expression::addCheckBitIndex(uint8_t mask) {
	assume((mask & 0xC0) != 0x00); // The high two bits must correspond to BIT, RES, or SET
	if (!isKnown()) {
		rpn.emplace_back(RPN_BIT_INDEX, mask);
	} else if (int32_t val = value(); val & ~0x07) {
		// A valid bit index must be masked with 0x07
		static char const *instructions[4] = {"instruction", "`BIT`", "`RES`", "`SET`"};
		error("Invalid bit index %" PRId32 " for %s", val, instructions[mask >> 6]);
	}
}

// Checks that an RPN expression's value fits within N bits (signed or unsigned)
void Expression::checkNBit(uint8_t n) const {
	if (isKnown()) {
		::checkNBit(value(), n, nullptr);
	}
}

bool checkNBit(int32_t v, uint8_t n, char const *name) {
	assume(n != 0);                     // That doesn't make sense
	assume(n < CHAR_BIT * sizeof(int)); // Otherwise `1 << n` is UB

	if (v < -(1 << n) || v >= 1 << n) {
		warning(
		    WARNING_TRUNCATION_1,
		    "%s must be %u-bit%s",
		    name ? name : "Expression",
		    n,
		    n == 8 && !name ? "; use `LOW()` to force 8-bit" : ""
		);
		return false;
	}
	if (v < -(1 << (n - 1))) {
		warning(
		    WARNING_TRUNCATION_2,
		    "%s must be %u-bit%s",
		    name ? name : "Expression",
		    n,
		    n == 8 && !name ? "; use `LOW()` to force 8-bit" : ""
		);
		return false;
	}

	return true;
}

void Expression::encode(std::vector<uint8_t> &buffer) const {
	assume(buffer.empty());

	if (isKnown()) {
		// If the RPN expression's value is known, output a constant directly
		uint32_t val = value();
		buffer.resize(5);
		buffer[0] = RPN_CONST;
		buffer[1] = val & 0xFF;
		buffer[2] = val >> 8;
		buffer[3] = val >> 16;
		buffer[4] = val >> 24;
	} else {
		// If the RPN expression's value is not known, serialize its RPN values
		buffer.reserve(rpn.size() * 2); // Rough estimate of the serialized size
		for (RPNValue const &val : rpn) {
			val.appendEncoded(buffer);
		}
	}
}

RPNValue::RPNValue(RPNCommand cmd) : command(cmd), data(std::monostate{}) {
	assume(
	    cmd != RPN_SIZEOF_SECTTYPE && cmd != RPN_STARTOF_SECTTYPE && cmd != RPN_BIT_INDEX
	    && cmd != RPN_CONST && cmd != RPN_SYM && cmd != RPN_BANK_SYM && cmd != RPN_BANK_SECT
	    && cmd != RPN_SIZEOF_SECT && cmd != RPN_STARTOF_SECT
	);
}

RPNValue::RPNValue(RPNCommand cmd, uint8_t val) : command(cmd), data(val) {
	assume(cmd == RPN_SIZEOF_SECTTYPE || cmd == RPN_STARTOF_SECTTYPE || cmd == RPN_BIT_INDEX);
}

RPNValue::RPNValue(RPNCommand cmd, uint32_t val) : command(cmd), data(val) {
	assume(cmd == RPN_CONST);
}

RPNValue::RPNValue(RPNCommand cmd, InternedStr name) : command(cmd), data(name) {
	assume(
	    cmd == RPN_SYM || cmd == RPN_BANK_SYM || cmd == RPN_BANK_SECT || cmd == RPN_SIZEOF_SECT
	    || cmd == RPN_STARTOF_SECT
	);
}

void RPNValue::appendEncoded(std::vector<uint8_t> &buffer) const {
	// Every command starts with its own ID
	buffer.push_back(command);

	switch (command) {
	case RPN_CONST: {
		// The command ID is followed by a four-byte integer
		assume(std::holds_alternative<uint32_t>(data));
		uint32_t val = std::get<uint32_t>(data);
		buffer.push_back(val & 0xFF);
		buffer.push_back(val >> 8);
		buffer.push_back(val >> 16);
		buffer.push_back(val >> 24);
		break;
	}

	case RPN_SYM:
	case RPN_BANK_SYM: {
		// The command ID is followed by a four-byte symbol ID
		assume(std::holds_alternative<InternedStr>(data));
		// The symbol name is always written expanded
		Symbol *sym = sym_FindExactSymbol(std::get<InternedStr>(data));
		out_RegisterSymbol(*sym); // Ensure that `sym->ID` is set
		buffer.push_back(sym->ID & 0xFF);
		buffer.push_back(sym->ID >> 8);
		buffer.push_back(sym->ID >> 16);
		buffer.push_back(sym->ID >> 24);
		break;
	}

	case RPN_BANK_SECT:
	case RPN_SIZEOF_SECT:
	case RPN_STARTOF_SECT: {
		// The command ID is followed by a NUL-terminated section name string
		assume(std::holds_alternative<InternedStr>(data));
		std::string const &name = std::get<InternedStr>(data).str();
		buffer.reserve(buffer.size() + name.length() + 1);
		buffer.insert(buffer.end(), RANGE(name));
		buffer.push_back('\0');
		break;
	}

	case RPN_SIZEOF_SECTTYPE:
	case RPN_STARTOF_SECTTYPE:
	case RPN_BIT_INDEX:
		// The command ID is followed by a byte value
		assume(std::holds_alternative<uint8_t>(data));
		buffer.push_back(std::get<uint8_t>(data));
		break;

	default:
		// Other command IDs are not followed by anything
		assume(std::holds_alternative<std::monostate>(data));
		break;
	}
}