Split RGBASM parser action functions into their own file

This commit is contained in:
Rangi42
2025-07-26 15:35:48 -04:00
parent 3bc8b1ff7c
commit f3cbfcecf4
7 changed files with 559 additions and 523 deletions

View File

@@ -55,6 +55,7 @@ common_obj := \
rgbasm_obj := \ rgbasm_obj := \
${common_obj} \ ${common_obj} \
src/asm/actions.o \
src/asm/charmap.o \ src/asm/charmap.o \
src/asm/fixpoint.o \ src/asm/fixpoint.o \
src/asm/format.o \ src/asm/format.o \

42
include/asm/actions.hpp Normal file
View File

@@ -0,0 +1,42 @@
// SPDX-License-Identifier: MIT
#ifndef RGBDS_ASM_ACTIONS_HPP
#define RGBDS_ASM_ACTIONS_HPP
#include <optional>
#include <stdint.h>
#include <string>
#include <string_view>
#include <variant>
#include <vector>
#include "asm/output.hpp" // AssertionType
#include "asm/rpn.hpp" // RPNCommand
std::optional<std::string> act_ReadFile(std::string const &name, uint32_t maxLen);
uint32_t act_StringToNum(std::vector<int32_t> const &str);
size_t act_StringLen(std::string const &str, bool printErrors);
std::string act_StringSlice(std::string const &str, uint32_t start, uint32_t stop);
std::string act_StringSub(std::string const &str, uint32_t pos, uint32_t len);
size_t act_CharLen(std::string const &str);
std::string act_StringChar(std::string const &str, uint32_t idx);
std::string act_CharSub(std::string const &str, uint32_t pos);
int32_t act_CharCmp(std::string_view str1, std::string_view str2);
uint32_t act_AdjustNegativeIndex(int32_t idx, size_t len, char const *functionName);
uint32_t act_AdjustNegativePos(int32_t pos, size_t len, char const *functionName);
std::string act_StringReplace(std::string_view str, std::string const &old, std::string const &rep);
std::string act_StringFormat(
std::string const &spec, std::vector<std::variant<uint32_t, std::string>> const &args
);
void act_CompoundAssignment(std::string const &symName, RPNCommand op, int32_t constValue);
void act_FailAssert(AssertionType type);
void act_FailAssertMsg(AssertionType type, std::string const &message);
#endif // RGBDS_ASM_ACTIONS_HPP

View File

@@ -84,9 +84,9 @@ void sect_EndUnion();
void sect_CheckUnionClosed(); void sect_CheckUnionClosed();
void sect_ConstByte(uint8_t byte); void sect_ConstByte(uint8_t byte);
void sect_ByteString(std::vector<int32_t> const &string); void sect_ByteString(std::vector<int32_t> const &str);
void sect_WordString(std::vector<int32_t> const &string); void sect_WordString(std::vector<int32_t> const &str);
void sect_LongString(std::vector<int32_t> const &string); void sect_LongString(std::vector<int32_t> const &str);
void sect_Skip(uint32_t skip, bool ds); void sect_Skip(uint32_t skip, bool ds);
void sect_RelByte(Expression const &expr, uint32_t pcShift); void sect_RelByte(Expression const &expr, uint32_t pcShift);
void sect_RelBytes(uint32_t n, std::vector<Expression> const &exprs); void sect_RelBytes(uint32_t n, std::vector<Expression> const &exprs);

View File

@@ -34,6 +34,7 @@ BISON_TARGET(LINKER_SCRIPT_PARSER "link/script.y"
set(rgbasm_src set(rgbasm_src
"${BISON_ASM_PARSER_OUTPUT_SOURCE}" "${BISON_ASM_PARSER_OUTPUT_SOURCE}"
"asm/actions.cpp"
"asm/charmap.cpp" "asm/charmap.cpp"
"asm/fixpoint.cpp" "asm/fixpoint.cpp"
"asm/format.cpp" "asm/format.cpp"

467
src/asm/actions.cpp Normal file
View File

@@ -0,0 +1,467 @@
#include "asm/actions.hpp"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "extern/utf8decoder.hpp"
#include "helpers.hpp"
#include "asm/charmap.hpp"
#include "asm/format.hpp"
#include "asm/fstack.hpp"
#include "asm/symbol.hpp"
#include "asm/warning.hpp"
std::optional<std::string> act_ReadFile(std::string const &name, uint32_t maxLen) {
FILE *file = nullptr;
if (std::optional<std::string> fullPath = fstk_FindFile(name); fullPath) {
file = fopen(fullPath->c_str(), "rb");
}
if (!file) {
if (fstk_FileError(name, "READFILE")) {
// If `fstk_FileError` returned true due to `-MG`, we should abort due to a
// missing file, so return `std::nullopt`, which tells the caller to `YYACCEPT`
return std::nullopt;
}
return "";
}
Defer closeFile{[&] { fclose(file); }};
size_t readSize = maxLen;
if (fseek(file, 0, SEEK_END) == 0) {
// If the file is seekable and shorter than the max length,
// just read as many bytes as there are
if (long fileSize = ftell(file); static_cast<size_t>(fileSize) < readSize) {
readSize = fileSize;
}
fseek(file, 0, SEEK_SET);
} else if (errno != ESPIPE) {
error("Error determining size of READFILE file '%s': %s", name.c_str(), strerror(errno));
}
std::string contents;
contents.resize(readSize);
if (fread(&contents[0], 1, readSize, file) < readSize || ferror(file)) {
error("Error reading READFILE file '%s': %s", name.c_str(), strerror(errno));
return "";
}
return contents;
}
uint32_t act_StringToNum(std::vector<int32_t> const &str) {
uint32_t length = str.size();
if (length == 1) {
// The string is a single character with a single value,
// which can be used directly as a number.
return static_cast<uint32_t>(str[0]);
}
warning(WARNING_OBSOLETE, "Treating multi-unit strings as numbers is deprecated");
for (int32_t v : str) {
if (!checkNBit(v, 8, "All character units")) {
break;
}
}
uint32_t r = 0;
for (uint32_t i = length < 4 ? 0 : length - 4; i < length; ++i) {
r <<= 8;
r |= static_cast<uint8_t>(str[i]);
}
return r;
}
static void errorInvalidUTF8Byte(uint8_t byte, char const *functionName) {
error("%s: Invalid UTF-8 byte 0x%02hhX", functionName, byte);
}
size_t act_StringLen(std::string const &str, bool printErrors) {
size_t len = 0;
uint32_t state = UTF8_ACCEPT;
uint32_t codepoint = 0;
for (char c : str) {
uint8_t byte = static_cast<uint8_t>(c);
switch (decode(&state, &codepoint, byte)) {
case UTF8_REJECT:
if (printErrors) {
errorInvalidUTF8Byte(byte, "STRLEN");
}
state = UTF8_ACCEPT;
// fallthrough
case UTF8_ACCEPT:
++len;
break;
}
}
// Check for partial code point.
if (state != UTF8_ACCEPT) {
if (printErrors) {
error("STRLEN: Incomplete UTF-8 character");
}
++len;
}
return len;
}
std::string act_StringSlice(std::string const &str, uint32_t start, uint32_t stop) {
size_t strLen = str.length();
size_t index = 0;
uint32_t state = UTF8_ACCEPT;
uint32_t codepoint = 0;
uint32_t curIdx = 0;
// Advance to starting index in source string.
while (index < strLen && curIdx < start) {
switch (decode(&state, &codepoint, str[index])) {
case UTF8_REJECT:
errorInvalidUTF8Byte(str[index], "STRSLICE");
state = UTF8_ACCEPT;
// fallthrough
case UTF8_ACCEPT:
++curIdx;
break;
}
++index;
}
// An index 1 past the end of the string is allowed, but will trigger the
// "Length too big" warning below if the length is nonzero.
if (index >= strLen && start > curIdx) {
warning(
WARNING_BUILTIN_ARG,
"STRSLICE: Start index %" PRIu32 " is past the end of the string",
start
);
}
size_t startIndex = index;
// Advance to ending index in source string.
while (index < strLen && curIdx < stop) {
switch (decode(&state, &codepoint, str[index])) {
case UTF8_REJECT:
errorInvalidUTF8Byte(str[index], "STRSLICE");
state = UTF8_ACCEPT;
// fallthrough
case UTF8_ACCEPT:
++curIdx;
break;
}
++index;
}
// Check for partial code point.
if (state != UTF8_ACCEPT) {
error("STRSLICE: Incomplete UTF-8 character");
++curIdx;
}
if (curIdx < stop) {
warning(
WARNING_BUILTIN_ARG,
"STRSLICE: Stop index %" PRIu32 " is past the end of the string",
stop
);
}
return str.substr(startIndex, index - startIndex);
}
std::string act_StringSub(std::string const &str, uint32_t pos, uint32_t len) {
size_t strLen = str.length();
size_t index = 0;
uint32_t state = UTF8_ACCEPT;
uint32_t codepoint = 0;
uint32_t curPos = 1;
// Advance to starting position in source string.
while (index < strLen && curPos < pos) {
switch (decode(&state, &codepoint, str[index])) {
case UTF8_REJECT:
errorInvalidUTF8Byte(str[index], "STRSUB");
state = UTF8_ACCEPT;
// fallthrough
case UTF8_ACCEPT:
++curPos;
break;
}
++index;
}
// A position 1 past the end of the string is allowed, but will trigger the
// "Length too big" warning below if the length is nonzero.
if (index >= strLen && pos > curPos) {
warning(
WARNING_BUILTIN_ARG, "STRSUB: Position %" PRIu32 " is past the end of the string", pos
);
}
size_t startIndex = index;
uint32_t curLen = 0;
// Compute the result length in bytes.
while (index < strLen && curLen < len) {
switch (decode(&state, &codepoint, str[index])) {
case UTF8_REJECT:
errorInvalidUTF8Byte(str[index], "STRSUB");
state = UTF8_ACCEPT;
// fallthrough
case UTF8_ACCEPT:
++curLen;
break;
}
++index;
}
// Check for partial code point.
if (state != UTF8_ACCEPT) {
error("STRSUB: Incomplete UTF-8 character");
++curLen;
}
if (curLen < len) {
warning(WARNING_BUILTIN_ARG, "STRSUB: Length too big: %" PRIu32, len);
}
return str.substr(startIndex, index - startIndex);
}
size_t act_CharLen(std::string const &str) {
std::string_view view = str;
size_t len;
for (len = 0; charmap_ConvertNext(view, nullptr); ++len) {}
return len;
}
std::string act_StringChar(std::string const &str, uint32_t idx) {
std::string_view view = str;
size_t charLen = 1;
// Advance to starting index in source string.
for (uint32_t curIdx = 0; charLen && curIdx < idx; ++curIdx) {
charLen = charmap_ConvertNext(view, nullptr);
}
std::string_view start = view;
if (!charmap_ConvertNext(view, nullptr)) {
warning(
WARNING_BUILTIN_ARG, "STRCHAR: Index %" PRIu32 " is past the end of the string", idx
);
}
start = start.substr(0, start.length() - view.length());
return std::string(start);
}
std::string act_CharSub(std::string const &str, uint32_t pos) {
std::string_view view = str;
size_t charLen = 1;
// Advance to starting position in source string.
for (uint32_t curPos = 1; charLen && curPos < pos; ++curPos) {
charLen = charmap_ConvertNext(view, nullptr);
}
std::string_view start = view;
if (!charmap_ConvertNext(view, nullptr)) {
warning(
WARNING_BUILTIN_ARG, "CHARSUB: Position %" PRIu32 " is past the end of the string", pos
);
}
start = start.substr(0, start.length() - view.length());
return std::string(start);
}
int32_t act_CharCmp(std::string_view str1, std::string_view str2) {
std::vector<int32_t> seq1, seq2;
size_t idx1 = 0, idx2 = 0;
for (;;) {
if (idx1 >= seq1.size()) {
idx1 = 0;
seq1.clear();
charmap_ConvertNext(str1, &seq1);
}
if (idx2 >= seq2.size()) {
idx2 = 0;
seq2.clear();
charmap_ConvertNext(str2, &seq2);
}
if (seq1.empty() != seq2.empty()) {
return seq1.empty() ? -1 : 1;
} else if (seq1.empty()) {
return 0;
} else {
int32_t value1 = seq1[idx1++], value2 = seq2[idx2++];
if (value1 != value2) {
return (value1 > value2) - (value1 < value2);
}
}
}
}
uint32_t act_AdjustNegativeIndex(int32_t idx, size_t len, char const *functionName) {
// String functions adjust negative index arguments the same way,
// such that position -1 is the last character of a string.
if (idx < 0) {
idx += len;
}
if (idx < 0) {
warning(WARNING_BUILTIN_ARG, "%s: Index starts at 0", functionName);
idx = 0;
}
return static_cast<uint32_t>(idx);
}
uint32_t act_AdjustNegativePos(int32_t pos, size_t len, char const *functionName) {
// STRSUB and CHARSUB adjust negative position arguments the same way,
// such that position -1 is the last character of a string.
if (pos < 0) {
pos += len + 1;
}
if (pos < 1) {
warning(WARNING_BUILTIN_ARG, "%s: Position starts at 1", functionName);
pos = 1;
}
return static_cast<uint32_t>(pos);
}
std::string
act_StringReplace(std::string_view str, std::string const &old, std::string const &rep) {
if (old.empty()) {
warning(WARNING_EMPTY_STRRPL, "STRRPL: Cannot replace an empty string");
return std::string(str);
}
std::string rpl;
while (!str.empty()) {
auto pos = str.find(old);
if (pos == str.npos) {
rpl.append(str);
break;
}
rpl.append(str, 0, pos);
rpl.append(rep);
str.remove_prefix(pos + old.size());
}
return rpl;
}
std::string act_StringFormat(
std::string const &spec, std::vector<std::variant<uint32_t, std::string>> const &args
) {
std::string str;
size_t argIndex = 0;
for (size_t i = 0; spec[i] != '\0'; ++i) {
int c = spec[i];
if (c != '%') {
str += c;
continue;
}
c = spec[++i];
if (c == '%') {
str += c;
continue;
}
FormatSpec fmt{};
while (c != '\0') {
fmt.useCharacter(c);
if (fmt.isFinished()) {
break;
}
c = spec[++i];
}
if (fmt.isEmpty()) {
error("STRFMT: Illegal '%%' at end of format string");
str += '%';
break;
}
if (!fmt.isValid()) {
error("STRFMT: Invalid format spec for argument %zu", argIndex + 1);
str += '%';
} else if (argIndex >= args.size()) {
// Will warn after formatting is done.
str += '%';
} else if (std::holds_alternative<uint32_t>(args[argIndex])) {
fmt.appendNumber(str, std::get<uint32_t>(args[argIndex]));
} else {
fmt.appendString(str, std::get<std::string>(args[argIndex]));
}
++argIndex;
}
if (argIndex < args.size()) {
error("STRFMT: %zu unformatted argument(s)", args.size() - argIndex);
} else if (argIndex > args.size()) {
error(
"STRFMT: Not enough arguments for format spec, got: %zu, need: %zu",
args.size(),
argIndex
);
}
return str;
}
void act_CompoundAssignment(std::string const &symName, RPNCommand op, int32_t constValue) {
Expression oldExpr, constExpr, newExpr;
int32_t newValue;
oldExpr.makeSymbol(symName);
constExpr.makeNumber(constValue);
newExpr.makeBinaryOp(op, std::move(oldExpr), constExpr);
newValue = newExpr.getConstVal();
sym_AddVar(symName, newValue);
}
void act_FailAssert(AssertionType type) {
switch (type) {
case ASSERT_FATAL:
fatal("Assertion failed");
case ASSERT_ERROR:
error("Assertion failed");
break;
case ASSERT_WARN:
warning(WARNING_ASSERT, "Assertion failed");
break;
}
}
void act_FailAssertMsg(AssertionType type, std::string const &message) {
switch (type) {
case ASSERT_FATAL:
fatal("Assertion failed: %s", message.c_str());
case ASSERT_ERROR:
error("Assertion failed: %s", message.c_str());
break;
case ASSERT_WARN:
warning(WARNING_ASSERT, "Assertion failed: %s", message.c_str());
break;
}
}

View File

@@ -47,9 +47,9 @@
#include "extern/utf8decoder.hpp" #include "extern/utf8decoder.hpp"
#include "helpers.hpp" #include "helpers.hpp"
#include "asm/actions.hpp"
#include "asm/charmap.hpp" #include "asm/charmap.hpp"
#include "asm/fixpoint.hpp" #include "asm/fixpoint.hpp"
#include "asm/format.hpp"
#include "asm/fstack.hpp" #include "asm/fstack.hpp"
#include "asm/main.hpp" #include "asm/main.hpp"
#include "asm/opt.hpp" #include "asm/opt.hpp"
@@ -62,26 +62,6 @@
yy::parser::symbol_type yylex(); // Provided by lexer.cpp yy::parser::symbol_type yylex(); // Provided by lexer.cpp
static std::optional<std::string> readFile(std::string const &name, uint32_t maxLen);
static uint32_t strToNum(std::vector<int32_t> const &s);
static void errorInvalidUTF8Byte(uint8_t byte, char const *functionName);
static size_t strlenUTF8(std::string const &str, bool printErrors);
static std::string strsliceUTF8(std::string const &str, uint32_t start, uint32_t stop);
static std::string strsubUTF8(std::string const &str, uint32_t pos, uint32_t len);
static size_t charlenUTF8(std::string const &str);
static std::string strcharUTF8(std::string const &str, uint32_t idx);
static std::string charsubUTF8(std::string const &str, uint32_t pos);
static int32_t charcmp(std::string_view str1, std::string_view str2);
static uint32_t adjustNegativeIndex(int32_t idx, size_t len, char const *functionName);
static uint32_t adjustNegativePos(int32_t pos, size_t len, char const *functionName);
static std::string strrpl(std::string_view str, std::string const &old, std::string const &rep);
static std::string strfmt(
std::string const &spec, std::vector<std::variant<uint32_t, std::string>> const &args
);
static void compoundAssignment(std::string const &symName, RPNCommand op, int32_t constValue);
static void failAssert(AssertionType type);
static void failAssertMsg(AssertionType type, std::string const &message);
template <typename N, typename S> template <typename N, typename S>
static auto handleSymbolByType(std::string const &symName, N numCallback, S strCallback) { static auto handleSymbolByType(std::string const &symName, N numCallback, S strCallback) {
if (Symbol *sym = sym_FindScopedSymbol(symName); sym && sym->type == SYM_EQUS) { if (Symbol *sym = sym_FindScopedSymbol(symName); sym && sym->type == SYM_EQUS) {
@@ -819,24 +799,24 @@ assert:
if (!$3.isKnown()) { if (!$3.isKnown()) {
out_CreateAssert($2, $3, "", sect_GetOutputOffset()); out_CreateAssert($2, $3, "", sect_GetOutputOffset());
} else if ($3.value() == 0) { } else if ($3.value() == 0) {
failAssert($2); act_FailAssert($2);
} }
} }
| POP_ASSERT assert_type relocexpr COMMA string { | POP_ASSERT assert_type relocexpr COMMA string {
if (!$3.isKnown()) { if (!$3.isKnown()) {
out_CreateAssert($2, $3, $5, sect_GetOutputOffset()); out_CreateAssert($2, $3, $5, sect_GetOutputOffset());
} else if ($3.value() == 0) { } else if ($3.value() == 0) {
failAssertMsg($2, $5); act_FailAssertMsg($2, $5);
} }
} }
| POP_STATIC_ASSERT assert_type iconst { | POP_STATIC_ASSERT assert_type iconst {
if ($3 == 0) { if ($3 == 0) {
failAssert($2); act_FailAssert($2);
} }
} }
| POP_STATIC_ASSERT assert_type iconst COMMA string { | POP_STATIC_ASSERT assert_type iconst COMMA string {
if ($3 == 0) { if ($3 == 0) {
failAssertMsg($2, $5); act_FailAssertMsg($2, $5);
} }
} }
; ;
@@ -1051,11 +1031,11 @@ def_set:
} }
| def_id compound_eq iconst { | def_id compound_eq iconst {
$$ = std::move($1); $$ = std::move($1);
compoundAssignment($$, $2, $3); act_CompoundAssignment($$, $2, $3);
} }
| redef_id compound_eq iconst { | redef_id compound_eq iconst {
$$ = std::move($1); $$ = std::move($1);
compoundAssignment($$, $2, $3); act_CompoundAssignment($$, $2, $3);
} }
; ;
@@ -1384,7 +1364,7 @@ relocexpr:
} }
| string_literal { | string_literal {
std::vector<int32_t> output = charmap_Convert($1); std::vector<int32_t> output = charmap_Convert($1);
$$.makeNumber(strToNum(output)); $$.makeNumber(act_StringToNum(output));
} }
| scoped_sym { | scoped_sym {
$$ = handleSymbolByType( $$ = handleSymbolByType(
@@ -1393,7 +1373,7 @@ relocexpr:
[](std::string const &str) { [](std::string const &str) {
std::vector<int32_t> output = charmap_Convert(str); std::vector<int32_t> output = charmap_Convert(str);
Expression expr; Expression expr;
expr.makeNumber(strToNum(output)); expr.makeNumber(act_StringToNum(output));
return expr; return expr;
} }
); );
@@ -1596,19 +1576,19 @@ relocexpr_no_str:
$$.makeNumber(pos != std::string::npos ? pos + 1 : 0); $$.makeNumber(pos != std::string::npos ? pos + 1 : 0);
} }
| OP_STRLEN LPAREN string RPAREN { | OP_STRLEN LPAREN string RPAREN {
$$.makeNumber(strlenUTF8($3, true)); $$.makeNumber(act_StringLen($3, true));
} }
| OP_BYTELEN LPAREN string RPAREN { | OP_BYTELEN LPAREN string RPAREN {
$$.makeNumber($3.length()); $$.makeNumber($3.length());
} }
| OP_CHARLEN LPAREN string RPAREN { | OP_CHARLEN LPAREN string RPAREN {
$$.makeNumber(charlenUTF8($3)); $$.makeNumber(act_CharLen($3));
} }
| OP_INCHARMAP LPAREN string RPAREN { | OP_INCHARMAP LPAREN string RPAREN {
$$.makeNumber(charmap_HasChar($3)); $$.makeNumber(charmap_HasChar($3));
} }
| OP_CHARCMP LPAREN string COMMA string RPAREN { | OP_CHARCMP LPAREN string COMMA string RPAREN {
$$.makeNumber(charcmp($3, $5)); $$.makeNumber(act_CharCmp($3, $5));
} }
| OP_CHARSIZE LPAREN string RPAREN { | OP_CHARSIZE LPAREN string RPAREN {
size_t charSize = charmap_CharSize($3); size_t charSize = charmap_CharSize($3);
@@ -1619,7 +1599,7 @@ relocexpr_no_str:
} }
| OP_CHARVAL LPAREN string COMMA iconst RPAREN { | OP_CHARVAL LPAREN string COMMA iconst RPAREN {
if (size_t len = charmap_CharSize($3); len != 0) { if (size_t len = charmap_CharSize($3); len != 0) {
uint32_t idx = adjustNegativeIndex($5, len, "CHARVAL"); uint32_t idx = act_AdjustNegativeIndex($5, len, "CHARVAL");
if (std::optional<int32_t> val = charmap_CharValue($3, idx); val.has_value()) { if (std::optional<int32_t> val = charmap_CharValue($3, idx); val.has_value()) {
$$.makeNumber(*val); $$.makeNumber(*val);
} else { } else {
@@ -1637,7 +1617,7 @@ relocexpr_no_str:
} }
| OP_STRBYTE LPAREN string COMMA iconst RPAREN { | OP_STRBYTE LPAREN string COMMA iconst RPAREN {
size_t len = $3.length(); size_t len = $3.length();
uint32_t idx = adjustNegativeIndex($5, len, "STRBYTE"); uint32_t idx = act_AdjustNegativeIndex($5, len, "STRBYTE");
if (idx < len) { if (idx < len) {
$$.makeNumber(static_cast<uint8_t>($3[idx])); $$.makeNumber(static_cast<uint8_t>($3[idx]));
} else { } else {
@@ -1691,49 +1671,49 @@ string_literal:
$$.append($3); $$.append($3);
} }
| OP_READFILE LPAREN string RPAREN { | OP_READFILE LPAREN string RPAREN {
if (std::optional<std::string> contents = readFile($3, UINT32_MAX); contents) { if (std::optional<std::string> contents = act_ReadFile($3, UINT32_MAX); contents) {
$$ = std::move(*contents); $$ = std::move(*contents);
} else { } else {
YYACCEPT; YYACCEPT;
} }
} }
| OP_READFILE LPAREN string COMMA uconst RPAREN { | OP_READFILE LPAREN string COMMA uconst RPAREN {
if (std::optional<std::string> contents = readFile($3, $5); contents) { if (std::optional<std::string> contents = act_ReadFile($3, $5); contents) {
$$ = std::move(*contents); $$ = std::move(*contents);
} else { } else {
YYACCEPT; YYACCEPT;
} }
} }
| OP_STRSLICE LPAREN string COMMA iconst COMMA iconst RPAREN { | OP_STRSLICE LPAREN string COMMA iconst COMMA iconst RPAREN {
size_t len = strlenUTF8($3, false); size_t len = act_StringLen($3, false);
uint32_t start = adjustNegativeIndex($5, len, "STRSLICE"); uint32_t start = act_AdjustNegativeIndex($5, len, "STRSLICE");
uint32_t stop = adjustNegativeIndex($7, len, "STRSLICE"); uint32_t stop = act_AdjustNegativeIndex($7, len, "STRSLICE");
$$ = strsliceUTF8($3, start, stop); $$ = act_StringSlice($3, start, stop);
} }
| OP_STRSLICE LPAREN string COMMA iconst RPAREN { | OP_STRSLICE LPAREN string COMMA iconst RPAREN {
size_t len = strlenUTF8($3, false); size_t len = act_StringLen($3, false);
uint32_t start = adjustNegativeIndex($5, len, "STRSLICE"); uint32_t start = act_AdjustNegativeIndex($5, len, "STRSLICE");
$$ = strsliceUTF8($3, start, len); $$ = act_StringSlice($3, start, len);
} }
| OP_STRSUB LPAREN string COMMA iconst COMMA uconst RPAREN { | OP_STRSUB LPAREN string COMMA iconst COMMA uconst RPAREN {
size_t len = strlenUTF8($3, false); size_t len = act_StringLen($3, false);
uint32_t pos = adjustNegativePos($5, len, "STRSUB"); uint32_t pos = act_AdjustNegativePos($5, len, "STRSUB");
$$ = strsubUTF8($3, pos, $7); $$ = act_StringSub($3, pos, $7);
} }
| OP_STRSUB LPAREN string COMMA iconst RPAREN { | OP_STRSUB LPAREN string COMMA iconst RPAREN {
size_t len = strlenUTF8($3, false); size_t len = act_StringLen($3, false);
uint32_t pos = adjustNegativePos($5, len, "STRSUB"); uint32_t pos = act_AdjustNegativePos($5, len, "STRSUB");
$$ = strsubUTF8($3, pos, pos > len ? 0 : len + 1 - pos); $$ = act_StringSub($3, pos, pos > len ? 0 : len + 1 - pos);
} }
| OP_STRCHAR LPAREN string COMMA iconst RPAREN { | OP_STRCHAR LPAREN string COMMA iconst RPAREN {
size_t len = charlenUTF8($3); size_t len = act_CharLen($3);
uint32_t idx = adjustNegativeIndex($5, len, "STRCHAR"); uint32_t idx = act_AdjustNegativeIndex($5, len, "STRCHAR");
$$ = strcharUTF8($3, idx); $$ = act_StringChar($3, idx);
} }
| OP_CHARSUB LPAREN string COMMA iconst RPAREN { | OP_CHARSUB LPAREN string COMMA iconst RPAREN {
size_t len = charlenUTF8($3); size_t len = act_CharLen($3);
uint32_t pos = adjustNegativePos($5, len, "CHARSUB"); uint32_t pos = act_AdjustNegativePos($5, len, "CHARSUB");
$$ = charsubUTF8($3, pos); $$ = act_CharSub($3, pos);
} }
| OP_REVCHAR LPAREN charmap_args RPAREN { | OP_REVCHAR LPAREN charmap_args RPAREN {
bool unique; bool unique;
@@ -1759,10 +1739,10 @@ string_literal:
std::transform(RANGE($$), $$.begin(), [](char c) { return tolower(c); }); std::transform(RANGE($$), $$.begin(), [](char c) { return tolower(c); });
} }
| OP_STRRPL LPAREN string COMMA string COMMA string RPAREN { | OP_STRRPL LPAREN string COMMA string COMMA string RPAREN {
$$ = strrpl($3, $5, $7); $$ = act_StringReplace($3, $5, $7);
} }
| OP_STRFMT LPAREN strfmt_args RPAREN { | OP_STRFMT LPAREN strfmt_args RPAREN {
$$ = strfmt($3.format, $3.args); $$ = act_StringFormat($3.format, $3.args);
} }
| POP_SECTION LPAREN scoped_sym RPAREN { | POP_SECTION LPAREN scoped_sym RPAREN {
Symbol *sym = sym_FindScopedValidSymbol($3); Symbol *sym = sym_FindScopedValidSymbol($3);
@@ -2744,458 +2724,3 @@ hl_ind_dec:
void yy::parser::error(std::string const &str) { void yy::parser::error(std::string const &str) {
::error("%s", str.c_str()); ::error("%s", str.c_str());
} }
static std::optional<std::string> readFile(std::string const &name, uint32_t maxLen) {
FILE *file = nullptr;
if (std::optional<std::string> fullPath = fstk_FindFile(name); fullPath) {
file = fopen(fullPath->c_str(), "rb");
}
if (!file) {
if (fstk_FileError(name, "READFILE")) {
// If `fstk_FileError` returned true due to `-MG`, we should abort due to a
// missing file, so return `std::nullopt`, which tells the caller to `YYACCEPT`
return std::nullopt;
}
return "";
}
Defer closeFile{[&] { fclose(file); }};
size_t readSize = maxLen;
if (fseek(file, 0, SEEK_END) == 0) {
// If the file is seekable and shorter than the max length,
// just read as many bytes as there are
if (long fileSize = ftell(file); static_cast<size_t>(fileSize) < readSize) {
readSize = fileSize;
}
fseek(file, 0, SEEK_SET);
} else if (errno != ESPIPE) {
error("Error determining size of READFILE file '%s': %s", name.c_str(), strerror(errno));
}
std::string contents;
contents.resize(readSize);
if (fread(&contents[0], 1, readSize, file) < readSize || ferror(file)) {
error("Error reading READFILE file '%s': %s", name.c_str(), strerror(errno));
return "";
}
return contents;
}
static uint32_t strToNum(std::vector<int32_t> const &s) {
uint32_t length = s.size();
if (length == 1) {
// The string is a single character with a single value,
// which can be used directly as a number.
return static_cast<uint32_t>(s[0]);
}
warning(WARNING_OBSOLETE, "Treating multi-unit strings as numbers is deprecated");
for (int32_t v : s) {
if (!checkNBit(v, 8, "All character units")) {
break;
}
}
uint32_t r = 0;
for (uint32_t i = length < 4 ? 0 : length - 4; i < length; ++i) {
r <<= 8;
r |= static_cast<uint8_t>(s[i]);
}
return r;
}
static void errorInvalidUTF8Byte(uint8_t byte, char const *functionName) {
error("%s: Invalid UTF-8 byte 0x%02hhX", functionName, byte);
}
static size_t strlenUTF8(std::string const &str, bool printErrors) {
size_t len = 0;
uint32_t state = UTF8_ACCEPT;
uint32_t codepoint = 0;
for (char c : str) {
uint8_t byte = static_cast<uint8_t>(c);
switch (decode(&state, &codepoint, byte)) {
case UTF8_REJECT:
if (printErrors) {
errorInvalidUTF8Byte(byte, "STRLEN");
}
state = UTF8_ACCEPT;
// fallthrough
case UTF8_ACCEPT:
++len;
break;
}
}
// Check for partial code point.
if (state != UTF8_ACCEPT) {
if (printErrors) {
error("STRLEN: Incomplete UTF-8 character");
}
++len;
}
return len;
}
static std::string strsliceUTF8(std::string const &str, uint32_t start, uint32_t stop) {
size_t strLen = str.length();
size_t index = 0;
uint32_t state = UTF8_ACCEPT;
uint32_t codepoint = 0;
uint32_t curIdx = 0;
// Advance to starting index in source string.
while (index < strLen && curIdx < start) {
switch (decode(&state, &codepoint, str[index])) {
case UTF8_REJECT:
errorInvalidUTF8Byte(str[index], "STRSLICE");
state = UTF8_ACCEPT;
// fallthrough
case UTF8_ACCEPT:
++curIdx;
break;
}
++index;
}
// An index 1 past the end of the string is allowed, but will trigger the
// "Length too big" warning below if the length is nonzero.
if (index >= strLen && start > curIdx) {
warning(
WARNING_BUILTIN_ARG,
"STRSLICE: Start index %" PRIu32 " is past the end of the string",
start
);
}
size_t startIndex = index;
// Advance to ending index in source string.
while (index < strLen && curIdx < stop) {
switch (decode(&state, &codepoint, str[index])) {
case UTF8_REJECT:
errorInvalidUTF8Byte(str[index], "STRSLICE");
state = UTF8_ACCEPT;
// fallthrough
case UTF8_ACCEPT:
++curIdx;
break;
}
++index;
}
// Check for partial code point.
if (state != UTF8_ACCEPT) {
error("STRSLICE: Incomplete UTF-8 character");
++curIdx;
}
if (curIdx < stop) {
warning(
WARNING_BUILTIN_ARG,
"STRSLICE: Stop index %" PRIu32 " is past the end of the string",
stop
);
}
return str.substr(startIndex, index - startIndex);
}
static std::string strsubUTF8(std::string const &str, uint32_t pos, uint32_t len) {
size_t strLen = str.length();
size_t index = 0;
uint32_t state = UTF8_ACCEPT;
uint32_t codepoint = 0;
uint32_t curPos = 1;
// Advance to starting position in source string.
while (index < strLen && curPos < pos) {
switch (decode(&state, &codepoint, str[index])) {
case UTF8_REJECT:
errorInvalidUTF8Byte(str[index], "STRSUB");
state = UTF8_ACCEPT;
// fallthrough
case UTF8_ACCEPT:
++curPos;
break;
}
++index;
}
// A position 1 past the end of the string is allowed, but will trigger the
// "Length too big" warning below if the length is nonzero.
if (index >= strLen && pos > curPos) {
warning(
WARNING_BUILTIN_ARG, "STRSUB: Position %" PRIu32 " is past the end of the string", pos
);
}
size_t startIndex = index;
uint32_t curLen = 0;
// Compute the result length in bytes.
while (index < strLen && curLen < len) {
switch (decode(&state, &codepoint, str[index])) {
case UTF8_REJECT:
errorInvalidUTF8Byte(str[index], "STRSUB");
state = UTF8_ACCEPT;
// fallthrough
case UTF8_ACCEPT:
++curLen;
break;
}
++index;
}
// Check for partial code point.
if (state != UTF8_ACCEPT) {
error("STRSUB: Incomplete UTF-8 character");
++curLen;
}
if (curLen < len) {
warning(WARNING_BUILTIN_ARG, "STRSUB: Length too big: %" PRIu32, len);
}
return str.substr(startIndex, index - startIndex);
}
static size_t charlenUTF8(std::string const &str) {
std::string_view view = str;
size_t len;
for (len = 0; charmap_ConvertNext(view, nullptr); ++len) {}
return len;
}
static std::string strcharUTF8(std::string const &str, uint32_t idx) {
std::string_view view = str;
size_t charLen = 1;
// Advance to starting index in source string.
for (uint32_t curIdx = 0; charLen && curIdx < idx; ++curIdx) {
charLen = charmap_ConvertNext(view, nullptr);
}
std::string_view start = view;
if (!charmap_ConvertNext(view, nullptr)) {
warning(
WARNING_BUILTIN_ARG,
"STRCHAR: Index %" PRIu32 " is past the end of the string",
idx
);
}
start = start.substr(0, start.length() - view.length());
return std::string(start);
}
static std::string charsubUTF8(std::string const &str, uint32_t pos) {
std::string_view view = str;
size_t charLen = 1;
// Advance to starting position in source string.
for (uint32_t curPos = 1; charLen && curPos < pos; ++curPos) {
charLen = charmap_ConvertNext(view, nullptr);
}
std::string_view start = view;
if (!charmap_ConvertNext(view, nullptr)) {
warning(
WARNING_BUILTIN_ARG,
"CHARSUB: Position %" PRIu32 " is past the end of the string",
pos
);
}
start = start.substr(0, start.length() - view.length());
return std::string(start);
}
static int32_t charcmp(std::string_view str1, std::string_view str2) {
std::vector<int32_t> seq1, seq2;
size_t idx1 = 0, idx2 = 0;
for (;;) {
if (idx1 >= seq1.size()) {
idx1 = 0;
seq1.clear();
charmap_ConvertNext(str1, &seq1);
}
if (idx2 >= seq2.size()) {
idx2 = 0;
seq2.clear();
charmap_ConvertNext(str2, &seq2);
}
if (seq1.empty() != seq2.empty()) {
return seq1.empty() ? -1 : 1;
} else if (seq1.empty()) {
return 0;
} else {
int32_t value1 = seq1[idx1++], value2 = seq2[idx2++];
if (value1 != value2) {
return (value1 > value2) - (value1 < value2);
}
}
}
}
static uint32_t adjustNegativeIndex(int32_t idx, size_t len, char const *functionName) {
// String functions adjust negative index arguments the same way,
// such that position -1 is the last character of a string.
if (idx < 0) {
idx += len;
}
if (idx < 0) {
warning(WARNING_BUILTIN_ARG, "%s: Index starts at 0", functionName);
idx = 0;
}
return static_cast<uint32_t>(idx);
}
static uint32_t adjustNegativePos(int32_t pos, size_t len, char const *functionName) {
// STRSUB and CHARSUB adjust negative position arguments the same way,
// such that position -1 is the last character of a string.
if (pos < 0) {
pos += len + 1;
}
if (pos < 1) {
warning(WARNING_BUILTIN_ARG, "%s: Position starts at 1", functionName);
pos = 1;
}
return static_cast<uint32_t>(pos);
}
static std::string strrpl(std::string_view str, std::string const &old, std::string const &rep) {
if (old.empty()) {
warning(WARNING_EMPTY_STRRPL, "STRRPL: Cannot replace an empty string");
return std::string(str);
}
std::string rpl;
while (!str.empty()) {
auto pos = str.find(old);
if (pos == str.npos) {
rpl.append(str);
break;
}
rpl.append(str, 0, pos);
rpl.append(rep);
str.remove_prefix(pos + old.size());
}
return rpl;
}
static std::string
strfmt(std::string const &spec, std::vector<std::variant<uint32_t, std::string>> const &args) {
std::string str;
size_t argIndex = 0;
for (size_t i = 0; spec[i] != '\0'; ++i) {
int c = spec[i];
if (c != '%') {
str += c;
continue;
}
c = spec[++i];
if (c == '%') {
str += c;
continue;
}
FormatSpec fmt{};
while (c != '\0') {
fmt.useCharacter(c);
if (fmt.isFinished()) {
break;
}
c = spec[++i];
}
if (fmt.isEmpty()) {
error("STRFMT: Illegal '%%' at end of format string");
str += '%';
break;
}
if (!fmt.isValid()) {
error("STRFMT: Invalid format spec for argument %zu", argIndex + 1);
str += '%';
} else if (argIndex >= args.size()) {
// Will warn after formatting is done.
str += '%';
} else if (std::holds_alternative<uint32_t>(args[argIndex])) {
fmt.appendNumber(str, std::get<uint32_t>(args[argIndex]));
} else {
fmt.appendString(str, std::get<std::string>(args[argIndex]));
}
++argIndex;
}
if (argIndex < args.size()) {
error("STRFMT: %zu unformatted argument(s)", args.size() - argIndex);
} else if (argIndex > args.size()) {
error(
"STRFMT: Not enough arguments for format spec, got: %zu, need: %zu",
args.size(),
argIndex
);
}
return str;
}
static void compoundAssignment(std::string const &symName, RPNCommand op, int32_t constValue) {
Expression oldExpr, constExpr, newExpr;
int32_t newValue;
oldExpr.makeSymbol(symName);
constExpr.makeNumber(constValue);
newExpr.makeBinaryOp(op, std::move(oldExpr), constExpr);
newValue = newExpr.getConstVal();
sym_AddVar(symName, newValue);
}
static void failAssert(AssertionType type) {
switch (type) {
case ASSERT_FATAL:
fatal("Assertion failed");
case ASSERT_ERROR:
error("Assertion failed");
break;
case ASSERT_WARN:
warning(WARNING_ASSERT, "Assertion failed");
break;
}
}
static void failAssertMsg(AssertionType type, std::string const &message) {
switch (type) {
case ASSERT_FATAL:
fatal("Assertion failed: %s", message.c_str());
case ASSERT_ERROR:
error("Assertion failed: %s", message.c_str());
break;
case ASSERT_WARN:
warning(WARNING_ASSERT, "Assertion failed: %s", message.c_str());
break;
}
}

View File

@@ -723,44 +723,44 @@ void sect_ConstByte(uint8_t byte) {
writeByte(byte); writeByte(byte);
} }
void sect_ByteString(std::vector<int32_t> const &string) { void sect_ByteString(std::vector<int32_t> const &str) {
if (!requireCodeSection()) { if (!requireCodeSection()) {
return; return;
} }
for (int32_t unit : string) { for (int32_t unit : str) {
if (!checkNBit(unit, 8, "All character units")) { if (!checkNBit(unit, 8, "All character units")) {
break; break;
} }
} }
for (int32_t unit : string) { for (int32_t unit : str) {
writeByte(static_cast<uint8_t>(unit)); writeByte(static_cast<uint8_t>(unit));
} }
} }
void sect_WordString(std::vector<int32_t> const &string) { void sect_WordString(std::vector<int32_t> const &str) {
if (!requireCodeSection()) { if (!requireCodeSection()) {
return; return;
} }
for (int32_t unit : string) { for (int32_t unit : str) {
if (!checkNBit(unit, 16, "All character units")) { if (!checkNBit(unit, 16, "All character units")) {
break; break;
} }
} }
for (int32_t unit : string) { for (int32_t unit : str) {
writeWord(static_cast<uint16_t>(unit)); writeWord(static_cast<uint16_t>(unit));
} }
} }
void sect_LongString(std::vector<int32_t> const &string) { void sect_LongString(std::vector<int32_t> const &str) {
if (!requireCodeSection()) { if (!requireCodeSection()) {
return; return;
} }
for (int32_t unit : string) { for (int32_t unit : str) {
writeLong(static_cast<uint32_t>(unit)); writeLong(static_cast<uint32_t>(unit));
} }
} }