diff --git a/Makefile b/Makefile index 909b2e92..81f8f95d 100644 --- a/Makefile +++ b/Makefile @@ -55,6 +55,7 @@ common_obj := \ rgbasm_obj := \ ${common_obj} \ + src/asm/actions.o \ src/asm/charmap.o \ src/asm/fixpoint.o \ src/asm/format.o \ diff --git a/include/asm/actions.hpp b/include/asm/actions.hpp new file mode 100644 index 00000000..a429f319 --- /dev/null +++ b/include/asm/actions.hpp @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: MIT + +#ifndef RGBDS_ASM_ACTIONS_HPP +#define RGBDS_ASM_ACTIONS_HPP + +#include +#include +#include +#include +#include +#include + +#include "asm/output.hpp" // AssertionType +#include "asm/rpn.hpp" // RPNCommand + +std::optional act_ReadFile(std::string const &name, uint32_t maxLen); + +uint32_t act_StringToNum(std::vector const &str); + +size_t act_StringLen(std::string const &str, bool printErrors); +std::string act_StringSlice(std::string const &str, uint32_t start, uint32_t stop); +std::string act_StringSub(std::string const &str, uint32_t pos, uint32_t len); + +size_t act_CharLen(std::string const &str); +std::string act_StringChar(std::string const &str, uint32_t idx); +std::string act_CharSub(std::string const &str, uint32_t pos); +int32_t act_CharCmp(std::string_view str1, std::string_view str2); + +uint32_t act_AdjustNegativeIndex(int32_t idx, size_t len, char const *functionName); +uint32_t act_AdjustNegativePos(int32_t pos, size_t len, char const *functionName); + +std::string act_StringReplace(std::string_view str, std::string const &old, std::string const &rep); +std::string act_StringFormat( + std::string const &spec, std::vector> const &args +); + +void act_CompoundAssignment(std::string const &symName, RPNCommand op, int32_t constValue); + +void act_FailAssert(AssertionType type); +void act_FailAssertMsg(AssertionType type, std::string const &message); + +#endif // RGBDS_ASM_ACTIONS_HPP diff --git a/include/asm/section.hpp b/include/asm/section.hpp index 11d9503d..9f3b61d5 100644 --- a/include/asm/section.hpp +++ b/include/asm/section.hpp @@ -84,9 +84,9 @@ void sect_EndUnion(); void sect_CheckUnionClosed(); void sect_ConstByte(uint8_t byte); -void sect_ByteString(std::vector const &string); -void sect_WordString(std::vector const &string); -void sect_LongString(std::vector const &string); +void sect_ByteString(std::vector const &str); +void sect_WordString(std::vector const &str); +void sect_LongString(std::vector const &str); void sect_Skip(uint32_t skip, bool ds); void sect_RelByte(Expression const &expr, uint32_t pcShift); void sect_RelBytes(uint32_t n, std::vector const &exprs); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 93ae1779..945aa7a1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -34,6 +34,7 @@ BISON_TARGET(LINKER_SCRIPT_PARSER "link/script.y" set(rgbasm_src "${BISON_ASM_PARSER_OUTPUT_SOURCE}" + "asm/actions.cpp" "asm/charmap.cpp" "asm/fixpoint.cpp" "asm/format.cpp" diff --git a/src/asm/actions.cpp b/src/asm/actions.cpp new file mode 100644 index 00000000..dd026109 --- /dev/null +++ b/src/asm/actions.cpp @@ -0,0 +1,467 @@ +#include "asm/actions.hpp" + +#include +#include +#include + +#include "extern/utf8decoder.hpp" +#include "helpers.hpp" + +#include "asm/charmap.hpp" +#include "asm/format.hpp" +#include "asm/fstack.hpp" +#include "asm/symbol.hpp" +#include "asm/warning.hpp" + +std::optional act_ReadFile(std::string const &name, uint32_t maxLen) { + FILE *file = nullptr; + if (std::optional fullPath = fstk_FindFile(name); fullPath) { + file = fopen(fullPath->c_str(), "rb"); + } + if (!file) { + if (fstk_FileError(name, "READFILE")) { + // If `fstk_FileError` returned true due to `-MG`, we should abort due to a + // missing file, so return `std::nullopt`, which tells the caller to `YYACCEPT` + return std::nullopt; + } + return ""; + } + Defer closeFile{[&] { fclose(file); }}; + + size_t readSize = maxLen; + if (fseek(file, 0, SEEK_END) == 0) { + // If the file is seekable and shorter than the max length, + // just read as many bytes as there are + if (long fileSize = ftell(file); static_cast(fileSize) < readSize) { + readSize = fileSize; + } + fseek(file, 0, SEEK_SET); + } else if (errno != ESPIPE) { + error("Error determining size of READFILE file '%s': %s", name.c_str(), strerror(errno)); + } + + std::string contents; + contents.resize(readSize); + + if (fread(&contents[0], 1, readSize, file) < readSize || ferror(file)) { + error("Error reading READFILE file '%s': %s", name.c_str(), strerror(errno)); + return ""; + } + + return contents; +} + +uint32_t act_StringToNum(std::vector const &str) { + uint32_t length = str.size(); + + if (length == 1) { + // The string is a single character with a single value, + // which can be used directly as a number. + return static_cast(str[0]); + } + + warning(WARNING_OBSOLETE, "Treating multi-unit strings as numbers is deprecated"); + + for (int32_t v : str) { + if (!checkNBit(v, 8, "All character units")) { + break; + } + } + + uint32_t r = 0; + + for (uint32_t i = length < 4 ? 0 : length - 4; i < length; ++i) { + r <<= 8; + r |= static_cast(str[i]); + } + + return r; +} + +static void errorInvalidUTF8Byte(uint8_t byte, char const *functionName) { + error("%s: Invalid UTF-8 byte 0x%02hhX", functionName, byte); +} + +size_t act_StringLen(std::string const &str, bool printErrors) { + size_t len = 0; + uint32_t state = UTF8_ACCEPT; + uint32_t codepoint = 0; + + for (char c : str) { + uint8_t byte = static_cast(c); + + switch (decode(&state, &codepoint, byte)) { + case UTF8_REJECT: + if (printErrors) { + errorInvalidUTF8Byte(byte, "STRLEN"); + } + state = UTF8_ACCEPT; + // fallthrough + case UTF8_ACCEPT: + ++len; + break; + } + } + + // Check for partial code point. + if (state != UTF8_ACCEPT) { + if (printErrors) { + error("STRLEN: Incomplete UTF-8 character"); + } + ++len; + } + + return len; +} + +std::string act_StringSlice(std::string const &str, uint32_t start, uint32_t stop) { + size_t strLen = str.length(); + size_t index = 0; + uint32_t state = UTF8_ACCEPT; + uint32_t codepoint = 0; + uint32_t curIdx = 0; + + // Advance to starting index in source string. + while (index < strLen && curIdx < start) { + switch (decode(&state, &codepoint, str[index])) { + case UTF8_REJECT: + errorInvalidUTF8Byte(str[index], "STRSLICE"); + state = UTF8_ACCEPT; + // fallthrough + case UTF8_ACCEPT: + ++curIdx; + break; + } + ++index; + } + + // An index 1 past the end of the string is allowed, but will trigger the + // "Length too big" warning below if the length is nonzero. + if (index >= strLen && start > curIdx) { + warning( + WARNING_BUILTIN_ARG, + "STRSLICE: Start index %" PRIu32 " is past the end of the string", + start + ); + } + + size_t startIndex = index; + + // Advance to ending index in source string. + while (index < strLen && curIdx < stop) { + switch (decode(&state, &codepoint, str[index])) { + case UTF8_REJECT: + errorInvalidUTF8Byte(str[index], "STRSLICE"); + state = UTF8_ACCEPT; + // fallthrough + case UTF8_ACCEPT: + ++curIdx; + break; + } + ++index; + } + + // Check for partial code point. + if (state != UTF8_ACCEPT) { + error("STRSLICE: Incomplete UTF-8 character"); + ++curIdx; + } + + if (curIdx < stop) { + warning( + WARNING_BUILTIN_ARG, + "STRSLICE: Stop index %" PRIu32 " is past the end of the string", + stop + ); + } + + return str.substr(startIndex, index - startIndex); +} + +std::string act_StringSub(std::string const &str, uint32_t pos, uint32_t len) { + size_t strLen = str.length(); + size_t index = 0; + uint32_t state = UTF8_ACCEPT; + uint32_t codepoint = 0; + uint32_t curPos = 1; + + // Advance to starting position in source string. + while (index < strLen && curPos < pos) { + switch (decode(&state, &codepoint, str[index])) { + case UTF8_REJECT: + errorInvalidUTF8Byte(str[index], "STRSUB"); + state = UTF8_ACCEPT; + // fallthrough + case UTF8_ACCEPT: + ++curPos; + break; + } + ++index; + } + + // A position 1 past the end of the string is allowed, but will trigger the + // "Length too big" warning below if the length is nonzero. + if (index >= strLen && pos > curPos) { + warning( + WARNING_BUILTIN_ARG, "STRSUB: Position %" PRIu32 " is past the end of the string", pos + ); + } + + size_t startIndex = index; + uint32_t curLen = 0; + + // Compute the result length in bytes. + while (index < strLen && curLen < len) { + switch (decode(&state, &codepoint, str[index])) { + case UTF8_REJECT: + errorInvalidUTF8Byte(str[index], "STRSUB"); + state = UTF8_ACCEPT; + // fallthrough + case UTF8_ACCEPT: + ++curLen; + break; + } + ++index; + } + + // Check for partial code point. + if (state != UTF8_ACCEPT) { + error("STRSUB: Incomplete UTF-8 character"); + ++curLen; + } + + if (curLen < len) { + warning(WARNING_BUILTIN_ARG, "STRSUB: Length too big: %" PRIu32, len); + } + + return str.substr(startIndex, index - startIndex); +} + +size_t act_CharLen(std::string const &str) { + std::string_view view = str; + size_t len; + + for (len = 0; charmap_ConvertNext(view, nullptr); ++len) {} + + return len; +} + +std::string act_StringChar(std::string const &str, uint32_t idx) { + std::string_view view = str; + size_t charLen = 1; + + // Advance to starting index in source string. + for (uint32_t curIdx = 0; charLen && curIdx < idx; ++curIdx) { + charLen = charmap_ConvertNext(view, nullptr); + } + + std::string_view start = view; + + if (!charmap_ConvertNext(view, nullptr)) { + warning( + WARNING_BUILTIN_ARG, "STRCHAR: Index %" PRIu32 " is past the end of the string", idx + ); + } + + start = start.substr(0, start.length() - view.length()); + return std::string(start); +} + +std::string act_CharSub(std::string const &str, uint32_t pos) { + std::string_view view = str; + size_t charLen = 1; + + // Advance to starting position in source string. + for (uint32_t curPos = 1; charLen && curPos < pos; ++curPos) { + charLen = charmap_ConvertNext(view, nullptr); + } + + std::string_view start = view; + + if (!charmap_ConvertNext(view, nullptr)) { + warning( + WARNING_BUILTIN_ARG, "CHARSUB: Position %" PRIu32 " is past the end of the string", pos + ); + } + + start = start.substr(0, start.length() - view.length()); + return std::string(start); +} + +int32_t act_CharCmp(std::string_view str1, std::string_view str2) { + std::vector seq1, seq2; + size_t idx1 = 0, idx2 = 0; + for (;;) { + if (idx1 >= seq1.size()) { + idx1 = 0; + seq1.clear(); + charmap_ConvertNext(str1, &seq1); + } + if (idx2 >= seq2.size()) { + idx2 = 0; + seq2.clear(); + charmap_ConvertNext(str2, &seq2); + } + if (seq1.empty() != seq2.empty()) { + return seq1.empty() ? -1 : 1; + } else if (seq1.empty()) { + return 0; + } else { + int32_t value1 = seq1[idx1++], value2 = seq2[idx2++]; + if (value1 != value2) { + return (value1 > value2) - (value1 < value2); + } + } + } +} + +uint32_t act_AdjustNegativeIndex(int32_t idx, size_t len, char const *functionName) { + // String functions adjust negative index arguments the same way, + // such that position -1 is the last character of a string. + if (idx < 0) { + idx += len; + } + if (idx < 0) { + warning(WARNING_BUILTIN_ARG, "%s: Index starts at 0", functionName); + idx = 0; + } + return static_cast(idx); +} + +uint32_t act_AdjustNegativePos(int32_t pos, size_t len, char const *functionName) { + // STRSUB and CHARSUB adjust negative position arguments the same way, + // such that position -1 is the last character of a string. + if (pos < 0) { + pos += len + 1; + } + if (pos < 1) { + warning(WARNING_BUILTIN_ARG, "%s: Position starts at 1", functionName); + pos = 1; + } + return static_cast(pos); +} + +std::string + act_StringReplace(std::string_view str, std::string const &old, std::string const &rep) { + if (old.empty()) { + warning(WARNING_EMPTY_STRRPL, "STRRPL: Cannot replace an empty string"); + return std::string(str); + } + + std::string rpl; + + while (!str.empty()) { + auto pos = str.find(old); + if (pos == str.npos) { + rpl.append(str); + break; + } + rpl.append(str, 0, pos); + rpl.append(rep); + str.remove_prefix(pos + old.size()); + } + + return rpl; +} + +std::string act_StringFormat( + std::string const &spec, std::vector> const &args +) { + std::string str; + size_t argIndex = 0; + + for (size_t i = 0; spec[i] != '\0'; ++i) { + int c = spec[i]; + + if (c != '%') { + str += c; + continue; + } + + c = spec[++i]; + + if (c == '%') { + str += c; + continue; + } + + FormatSpec fmt{}; + + while (c != '\0') { + fmt.useCharacter(c); + if (fmt.isFinished()) { + break; + } + c = spec[++i]; + } + + if (fmt.isEmpty()) { + error("STRFMT: Illegal '%%' at end of format string"); + str += '%'; + break; + } + + if (!fmt.isValid()) { + error("STRFMT: Invalid format spec for argument %zu", argIndex + 1); + str += '%'; + } else if (argIndex >= args.size()) { + // Will warn after formatting is done. + str += '%'; + } else if (std::holds_alternative(args[argIndex])) { + fmt.appendNumber(str, std::get(args[argIndex])); + } else { + fmt.appendString(str, std::get(args[argIndex])); + } + + ++argIndex; + } + + if (argIndex < args.size()) { + error("STRFMT: %zu unformatted argument(s)", args.size() - argIndex); + } else if (argIndex > args.size()) { + error( + "STRFMT: Not enough arguments for format spec, got: %zu, need: %zu", + args.size(), + argIndex + ); + } + + return str; +} + +void act_CompoundAssignment(std::string const &symName, RPNCommand op, int32_t constValue) { + Expression oldExpr, constExpr, newExpr; + int32_t newValue; + + oldExpr.makeSymbol(symName); + constExpr.makeNumber(constValue); + newExpr.makeBinaryOp(op, std::move(oldExpr), constExpr); + newValue = newExpr.getConstVal(); + sym_AddVar(symName, newValue); +} + +void act_FailAssert(AssertionType type) { + switch (type) { + case ASSERT_FATAL: + fatal("Assertion failed"); + case ASSERT_ERROR: + error("Assertion failed"); + break; + case ASSERT_WARN: + warning(WARNING_ASSERT, "Assertion failed"); + break; + } +} + +void act_FailAssertMsg(AssertionType type, std::string const &message) { + switch (type) { + case ASSERT_FATAL: + fatal("Assertion failed: %s", message.c_str()); + case ASSERT_ERROR: + error("Assertion failed: %s", message.c_str()); + break; + case ASSERT_WARN: + warning(WARNING_ASSERT, "Assertion failed: %s", message.c_str()); + break; + } +} diff --git a/src/asm/parser.y b/src/asm/parser.y index 4416a38e..602c1af2 100644 --- a/src/asm/parser.y +++ b/src/asm/parser.y @@ -47,9 +47,9 @@ #include "extern/utf8decoder.hpp" #include "helpers.hpp" + #include "asm/actions.hpp" #include "asm/charmap.hpp" #include "asm/fixpoint.hpp" - #include "asm/format.hpp" #include "asm/fstack.hpp" #include "asm/main.hpp" #include "asm/opt.hpp" @@ -62,26 +62,6 @@ yy::parser::symbol_type yylex(); // Provided by lexer.cpp - static std::optional readFile(std::string const &name, uint32_t maxLen); - static uint32_t strToNum(std::vector const &s); - static void errorInvalidUTF8Byte(uint8_t byte, char const *functionName); - static size_t strlenUTF8(std::string const &str, bool printErrors); - static std::string strsliceUTF8(std::string const &str, uint32_t start, uint32_t stop); - static std::string strsubUTF8(std::string const &str, uint32_t pos, uint32_t len); - static size_t charlenUTF8(std::string const &str); - static std::string strcharUTF8(std::string const &str, uint32_t idx); - static std::string charsubUTF8(std::string const &str, uint32_t pos); - static int32_t charcmp(std::string_view str1, std::string_view str2); - static uint32_t adjustNegativeIndex(int32_t idx, size_t len, char const *functionName); - static uint32_t adjustNegativePos(int32_t pos, size_t len, char const *functionName); - static std::string strrpl(std::string_view str, std::string const &old, std::string const &rep); - static std::string strfmt( - std::string const &spec, std::vector> const &args - ); - static void compoundAssignment(std::string const &symName, RPNCommand op, int32_t constValue); - static void failAssert(AssertionType type); - static void failAssertMsg(AssertionType type, std::string const &message); - template static auto handleSymbolByType(std::string const &symName, N numCallback, S strCallback) { if (Symbol *sym = sym_FindScopedSymbol(symName); sym && sym->type == SYM_EQUS) { @@ -819,24 +799,24 @@ assert: if (!$3.isKnown()) { out_CreateAssert($2, $3, "", sect_GetOutputOffset()); } else if ($3.value() == 0) { - failAssert($2); + act_FailAssert($2); } } | POP_ASSERT assert_type relocexpr COMMA string { if (!$3.isKnown()) { out_CreateAssert($2, $3, $5, sect_GetOutputOffset()); } else if ($3.value() == 0) { - failAssertMsg($2, $5); + act_FailAssertMsg($2, $5); } } | POP_STATIC_ASSERT assert_type iconst { if ($3 == 0) { - failAssert($2); + act_FailAssert($2); } } | POP_STATIC_ASSERT assert_type iconst COMMA string { if ($3 == 0) { - failAssertMsg($2, $5); + act_FailAssertMsg($2, $5); } } ; @@ -1051,11 +1031,11 @@ def_set: } | def_id compound_eq iconst { $$ = std::move($1); - compoundAssignment($$, $2, $3); + act_CompoundAssignment($$, $2, $3); } | redef_id compound_eq iconst { $$ = std::move($1); - compoundAssignment($$, $2, $3); + act_CompoundAssignment($$, $2, $3); } ; @@ -1384,7 +1364,7 @@ relocexpr: } | string_literal { std::vector output = charmap_Convert($1); - $$.makeNumber(strToNum(output)); + $$.makeNumber(act_StringToNum(output)); } | scoped_sym { $$ = handleSymbolByType( @@ -1393,7 +1373,7 @@ relocexpr: [](std::string const &str) { std::vector output = charmap_Convert(str); Expression expr; - expr.makeNumber(strToNum(output)); + expr.makeNumber(act_StringToNum(output)); return expr; } ); @@ -1596,19 +1576,19 @@ relocexpr_no_str: $$.makeNumber(pos != std::string::npos ? pos + 1 : 0); } | OP_STRLEN LPAREN string RPAREN { - $$.makeNumber(strlenUTF8($3, true)); + $$.makeNumber(act_StringLen($3, true)); } | OP_BYTELEN LPAREN string RPAREN { $$.makeNumber($3.length()); } | OP_CHARLEN LPAREN string RPAREN { - $$.makeNumber(charlenUTF8($3)); + $$.makeNumber(act_CharLen($3)); } | OP_INCHARMAP LPAREN string RPAREN { $$.makeNumber(charmap_HasChar($3)); } | OP_CHARCMP LPAREN string COMMA string RPAREN { - $$.makeNumber(charcmp($3, $5)); + $$.makeNumber(act_CharCmp($3, $5)); } | OP_CHARSIZE LPAREN string RPAREN { size_t charSize = charmap_CharSize($3); @@ -1619,7 +1599,7 @@ relocexpr_no_str: } | OP_CHARVAL LPAREN string COMMA iconst RPAREN { if (size_t len = charmap_CharSize($3); len != 0) { - uint32_t idx = adjustNegativeIndex($5, len, "CHARVAL"); + uint32_t idx = act_AdjustNegativeIndex($5, len, "CHARVAL"); if (std::optional val = charmap_CharValue($3, idx); val.has_value()) { $$.makeNumber(*val); } else { @@ -1637,7 +1617,7 @@ relocexpr_no_str: } | OP_STRBYTE LPAREN string COMMA iconst RPAREN { size_t len = $3.length(); - uint32_t idx = adjustNegativeIndex($5, len, "STRBYTE"); + uint32_t idx = act_AdjustNegativeIndex($5, len, "STRBYTE"); if (idx < len) { $$.makeNumber(static_cast($3[idx])); } else { @@ -1691,49 +1671,49 @@ string_literal: $$.append($3); } | OP_READFILE LPAREN string RPAREN { - if (std::optional contents = readFile($3, UINT32_MAX); contents) { + if (std::optional contents = act_ReadFile($3, UINT32_MAX); contents) { $$ = std::move(*contents); } else { YYACCEPT; } } | OP_READFILE LPAREN string COMMA uconst RPAREN { - if (std::optional contents = readFile($3, $5); contents) { + if (std::optional contents = act_ReadFile($3, $5); contents) { $$ = std::move(*contents); } else { YYACCEPT; } } | OP_STRSLICE LPAREN string COMMA iconst COMMA iconst RPAREN { - size_t len = strlenUTF8($3, false); - uint32_t start = adjustNegativeIndex($5, len, "STRSLICE"); - uint32_t stop = adjustNegativeIndex($7, len, "STRSLICE"); - $$ = strsliceUTF8($3, start, stop); + size_t len = act_StringLen($3, false); + uint32_t start = act_AdjustNegativeIndex($5, len, "STRSLICE"); + uint32_t stop = act_AdjustNegativeIndex($7, len, "STRSLICE"); + $$ = act_StringSlice($3, start, stop); } | OP_STRSLICE LPAREN string COMMA iconst RPAREN { - size_t len = strlenUTF8($3, false); - uint32_t start = adjustNegativeIndex($5, len, "STRSLICE"); - $$ = strsliceUTF8($3, start, len); + size_t len = act_StringLen($3, false); + uint32_t start = act_AdjustNegativeIndex($5, len, "STRSLICE"); + $$ = act_StringSlice($3, start, len); } | OP_STRSUB LPAREN string COMMA iconst COMMA uconst RPAREN { - size_t len = strlenUTF8($3, false); - uint32_t pos = adjustNegativePos($5, len, "STRSUB"); - $$ = strsubUTF8($3, pos, $7); + size_t len = act_StringLen($3, false); + uint32_t pos = act_AdjustNegativePos($5, len, "STRSUB"); + $$ = act_StringSub($3, pos, $7); } | OP_STRSUB LPAREN string COMMA iconst RPAREN { - size_t len = strlenUTF8($3, false); - uint32_t pos = adjustNegativePos($5, len, "STRSUB"); - $$ = strsubUTF8($3, pos, pos > len ? 0 : len + 1 - pos); + size_t len = act_StringLen($3, false); + uint32_t pos = act_AdjustNegativePos($5, len, "STRSUB"); + $$ = act_StringSub($3, pos, pos > len ? 0 : len + 1 - pos); } | OP_STRCHAR LPAREN string COMMA iconst RPAREN { - size_t len = charlenUTF8($3); - uint32_t idx = adjustNegativeIndex($5, len, "STRCHAR"); - $$ = strcharUTF8($3, idx); + size_t len = act_CharLen($3); + uint32_t idx = act_AdjustNegativeIndex($5, len, "STRCHAR"); + $$ = act_StringChar($3, idx); } | OP_CHARSUB LPAREN string COMMA iconst RPAREN { - size_t len = charlenUTF8($3); - uint32_t pos = adjustNegativePos($5, len, "CHARSUB"); - $$ = charsubUTF8($3, pos); + size_t len = act_CharLen($3); + uint32_t pos = act_AdjustNegativePos($5, len, "CHARSUB"); + $$ = act_CharSub($3, pos); } | OP_REVCHAR LPAREN charmap_args RPAREN { bool unique; @@ -1759,10 +1739,10 @@ string_literal: std::transform(RANGE($$), $$.begin(), [](char c) { return tolower(c); }); } | OP_STRRPL LPAREN string COMMA string COMMA string RPAREN { - $$ = strrpl($3, $5, $7); + $$ = act_StringReplace($3, $5, $7); } | OP_STRFMT LPAREN strfmt_args RPAREN { - $$ = strfmt($3.format, $3.args); + $$ = act_StringFormat($3.format, $3.args); } | POP_SECTION LPAREN scoped_sym RPAREN { Symbol *sym = sym_FindScopedValidSymbol($3); @@ -2744,458 +2724,3 @@ hl_ind_dec: void yy::parser::error(std::string const &str) { ::error("%s", str.c_str()); } - -static std::optional readFile(std::string const &name, uint32_t maxLen) { - FILE *file = nullptr; - if (std::optional fullPath = fstk_FindFile(name); fullPath) { - file = fopen(fullPath->c_str(), "rb"); - } - if (!file) { - if (fstk_FileError(name, "READFILE")) { - // If `fstk_FileError` returned true due to `-MG`, we should abort due to a - // missing file, so return `std::nullopt`, which tells the caller to `YYACCEPT` - return std::nullopt; - } - return ""; - } - Defer closeFile{[&] { fclose(file); }}; - - size_t readSize = maxLen; - if (fseek(file, 0, SEEK_END) == 0) { - // If the file is seekable and shorter than the max length, - // just read as many bytes as there are - if (long fileSize = ftell(file); static_cast(fileSize) < readSize) { - readSize = fileSize; - } - fseek(file, 0, SEEK_SET); - } else if (errno != ESPIPE) { - error("Error determining size of READFILE file '%s': %s", name.c_str(), strerror(errno)); - } - - std::string contents; - contents.resize(readSize); - - if (fread(&contents[0], 1, readSize, file) < readSize || ferror(file)) { - error("Error reading READFILE file '%s': %s", name.c_str(), strerror(errno)); - return ""; - } - - return contents; -} - -static uint32_t strToNum(std::vector const &s) { - uint32_t length = s.size(); - - if (length == 1) { - // The string is a single character with a single value, - // which can be used directly as a number. - return static_cast(s[0]); - } - - warning(WARNING_OBSOLETE, "Treating multi-unit strings as numbers is deprecated"); - - for (int32_t v : s) { - if (!checkNBit(v, 8, "All character units")) { - break; - } - } - - uint32_t r = 0; - - for (uint32_t i = length < 4 ? 0 : length - 4; i < length; ++i) { - r <<= 8; - r |= static_cast(s[i]); - } - - return r; -} - -static void errorInvalidUTF8Byte(uint8_t byte, char const *functionName) { - error("%s: Invalid UTF-8 byte 0x%02hhX", functionName, byte); -} - -static size_t strlenUTF8(std::string const &str, bool printErrors) { - size_t len = 0; - uint32_t state = UTF8_ACCEPT; - uint32_t codepoint = 0; - - for (char c : str) { - uint8_t byte = static_cast(c); - - switch (decode(&state, &codepoint, byte)) { - case UTF8_REJECT: - if (printErrors) { - errorInvalidUTF8Byte(byte, "STRLEN"); - } - state = UTF8_ACCEPT; - // fallthrough - case UTF8_ACCEPT: - ++len; - break; - } - } - - // Check for partial code point. - if (state != UTF8_ACCEPT) { - if (printErrors) { - error("STRLEN: Incomplete UTF-8 character"); - } - ++len; - } - - return len; -} - -static std::string strsliceUTF8(std::string const &str, uint32_t start, uint32_t stop) { - size_t strLen = str.length(); - size_t index = 0; - uint32_t state = UTF8_ACCEPT; - uint32_t codepoint = 0; - uint32_t curIdx = 0; - - // Advance to starting index in source string. - while (index < strLen && curIdx < start) { - switch (decode(&state, &codepoint, str[index])) { - case UTF8_REJECT: - errorInvalidUTF8Byte(str[index], "STRSLICE"); - state = UTF8_ACCEPT; - // fallthrough - case UTF8_ACCEPT: - ++curIdx; - break; - } - ++index; - } - - // An index 1 past the end of the string is allowed, but will trigger the - // "Length too big" warning below if the length is nonzero. - if (index >= strLen && start > curIdx) { - warning( - WARNING_BUILTIN_ARG, - "STRSLICE: Start index %" PRIu32 " is past the end of the string", - start - ); - } - - size_t startIndex = index; - - // Advance to ending index in source string. - while (index < strLen && curIdx < stop) { - switch (decode(&state, &codepoint, str[index])) { - case UTF8_REJECT: - errorInvalidUTF8Byte(str[index], "STRSLICE"); - state = UTF8_ACCEPT; - // fallthrough - case UTF8_ACCEPT: - ++curIdx; - break; - } - ++index; - } - - // Check for partial code point. - if (state != UTF8_ACCEPT) { - error("STRSLICE: Incomplete UTF-8 character"); - ++curIdx; - } - - if (curIdx < stop) { - warning( - WARNING_BUILTIN_ARG, - "STRSLICE: Stop index %" PRIu32 " is past the end of the string", - stop - ); - } - - return str.substr(startIndex, index - startIndex); -} - -static std::string strsubUTF8(std::string const &str, uint32_t pos, uint32_t len) { - size_t strLen = str.length(); - size_t index = 0; - uint32_t state = UTF8_ACCEPT; - uint32_t codepoint = 0; - uint32_t curPos = 1; - - // Advance to starting position in source string. - while (index < strLen && curPos < pos) { - switch (decode(&state, &codepoint, str[index])) { - case UTF8_REJECT: - errorInvalidUTF8Byte(str[index], "STRSUB"); - state = UTF8_ACCEPT; - // fallthrough - case UTF8_ACCEPT: - ++curPos; - break; - } - ++index; - } - - // A position 1 past the end of the string is allowed, but will trigger the - // "Length too big" warning below if the length is nonzero. - if (index >= strLen && pos > curPos) { - warning( - WARNING_BUILTIN_ARG, "STRSUB: Position %" PRIu32 " is past the end of the string", pos - ); - } - - size_t startIndex = index; - uint32_t curLen = 0; - - // Compute the result length in bytes. - while (index < strLen && curLen < len) { - switch (decode(&state, &codepoint, str[index])) { - case UTF8_REJECT: - errorInvalidUTF8Byte(str[index], "STRSUB"); - state = UTF8_ACCEPT; - // fallthrough - case UTF8_ACCEPT: - ++curLen; - break; - } - ++index; - } - - // Check for partial code point. - if (state != UTF8_ACCEPT) { - error("STRSUB: Incomplete UTF-8 character"); - ++curLen; - } - - if (curLen < len) { - warning(WARNING_BUILTIN_ARG, "STRSUB: Length too big: %" PRIu32, len); - } - - return str.substr(startIndex, index - startIndex); -} - -static size_t charlenUTF8(std::string const &str) { - std::string_view view = str; - size_t len; - - for (len = 0; charmap_ConvertNext(view, nullptr); ++len) {} - - return len; -} - -static std::string strcharUTF8(std::string const &str, uint32_t idx) { - std::string_view view = str; - size_t charLen = 1; - - // Advance to starting index in source string. - for (uint32_t curIdx = 0; charLen && curIdx < idx; ++curIdx) { - charLen = charmap_ConvertNext(view, nullptr); - } - - std::string_view start = view; - - if (!charmap_ConvertNext(view, nullptr)) { - warning( - WARNING_BUILTIN_ARG, - "STRCHAR: Index %" PRIu32 " is past the end of the string", - idx - ); - } - - start = start.substr(0, start.length() - view.length()); - return std::string(start); -} - -static std::string charsubUTF8(std::string const &str, uint32_t pos) { - std::string_view view = str; - size_t charLen = 1; - - // Advance to starting position in source string. - for (uint32_t curPos = 1; charLen && curPos < pos; ++curPos) { - charLen = charmap_ConvertNext(view, nullptr); - } - - std::string_view start = view; - - if (!charmap_ConvertNext(view, nullptr)) { - warning( - WARNING_BUILTIN_ARG, - "CHARSUB: Position %" PRIu32 " is past the end of the string", - pos - ); - } - - start = start.substr(0, start.length() - view.length()); - return std::string(start); -} - -static int32_t charcmp(std::string_view str1, std::string_view str2) { - std::vector seq1, seq2; - size_t idx1 = 0, idx2 = 0; - for (;;) { - if (idx1 >= seq1.size()) { - idx1 = 0; - seq1.clear(); - charmap_ConvertNext(str1, &seq1); - } - if (idx2 >= seq2.size()) { - idx2 = 0; - seq2.clear(); - charmap_ConvertNext(str2, &seq2); - } - if (seq1.empty() != seq2.empty()) { - return seq1.empty() ? -1 : 1; - } else if (seq1.empty()) { - return 0; - } else { - int32_t value1 = seq1[idx1++], value2 = seq2[idx2++]; - if (value1 != value2) { - return (value1 > value2) - (value1 < value2); - } - } - } -} - -static uint32_t adjustNegativeIndex(int32_t idx, size_t len, char const *functionName) { - // String functions adjust negative index arguments the same way, - // such that position -1 is the last character of a string. - if (idx < 0) { - idx += len; - } - if (idx < 0) { - warning(WARNING_BUILTIN_ARG, "%s: Index starts at 0", functionName); - idx = 0; - } - return static_cast(idx); -} - -static uint32_t adjustNegativePos(int32_t pos, size_t len, char const *functionName) { - // STRSUB and CHARSUB adjust negative position arguments the same way, - // such that position -1 is the last character of a string. - if (pos < 0) { - pos += len + 1; - } - if (pos < 1) { - warning(WARNING_BUILTIN_ARG, "%s: Position starts at 1", functionName); - pos = 1; - } - return static_cast(pos); -} - -static std::string strrpl(std::string_view str, std::string const &old, std::string const &rep) { - if (old.empty()) { - warning(WARNING_EMPTY_STRRPL, "STRRPL: Cannot replace an empty string"); - return std::string(str); - } - - std::string rpl; - - while (!str.empty()) { - auto pos = str.find(old); - if (pos == str.npos) { - rpl.append(str); - break; - } - rpl.append(str, 0, pos); - rpl.append(rep); - str.remove_prefix(pos + old.size()); - } - - return rpl; -} - -static std::string - strfmt(std::string const &spec, std::vector> const &args) { - std::string str; - size_t argIndex = 0; - - for (size_t i = 0; spec[i] != '\0'; ++i) { - int c = spec[i]; - - if (c != '%') { - str += c; - continue; - } - - c = spec[++i]; - - if (c == '%') { - str += c; - continue; - } - - FormatSpec fmt{}; - - while (c != '\0') { - fmt.useCharacter(c); - if (fmt.isFinished()) { - break; - } - c = spec[++i]; - } - - if (fmt.isEmpty()) { - error("STRFMT: Illegal '%%' at end of format string"); - str += '%'; - break; - } - - if (!fmt.isValid()) { - error("STRFMT: Invalid format spec for argument %zu", argIndex + 1); - str += '%'; - } else if (argIndex >= args.size()) { - // Will warn after formatting is done. - str += '%'; - } else if (std::holds_alternative(args[argIndex])) { - fmt.appendNumber(str, std::get(args[argIndex])); - } else { - fmt.appendString(str, std::get(args[argIndex])); - } - - ++argIndex; - } - - if (argIndex < args.size()) { - error("STRFMT: %zu unformatted argument(s)", args.size() - argIndex); - } else if (argIndex > args.size()) { - error( - "STRFMT: Not enough arguments for format spec, got: %zu, need: %zu", - args.size(), - argIndex - ); - } - - return str; -} - -static void compoundAssignment(std::string const &symName, RPNCommand op, int32_t constValue) { - Expression oldExpr, constExpr, newExpr; - int32_t newValue; - - oldExpr.makeSymbol(symName); - constExpr.makeNumber(constValue); - newExpr.makeBinaryOp(op, std::move(oldExpr), constExpr); - newValue = newExpr.getConstVal(); - sym_AddVar(symName, newValue); -} - -static void failAssert(AssertionType type) { - switch (type) { - case ASSERT_FATAL: - fatal("Assertion failed"); - case ASSERT_ERROR: - error("Assertion failed"); - break; - case ASSERT_WARN: - warning(WARNING_ASSERT, "Assertion failed"); - break; - } -} - -static void failAssertMsg(AssertionType type, std::string const &message) { - switch (type) { - case ASSERT_FATAL: - fatal("Assertion failed: %s", message.c_str()); - case ASSERT_ERROR: - error("Assertion failed: %s", message.c_str()); - break; - case ASSERT_WARN: - warning(WARNING_ASSERT, "Assertion failed: %s", message.c_str()); - break; - } -} diff --git a/src/asm/section.cpp b/src/asm/section.cpp index dc7c815a..841f5f8d 100644 --- a/src/asm/section.cpp +++ b/src/asm/section.cpp @@ -723,44 +723,44 @@ void sect_ConstByte(uint8_t byte) { writeByte(byte); } -void sect_ByteString(std::vector const &string) { +void sect_ByteString(std::vector const &str) { if (!requireCodeSection()) { return; } - for (int32_t unit : string) { + for (int32_t unit : str) { if (!checkNBit(unit, 8, "All character units")) { break; } } - for (int32_t unit : string) { + for (int32_t unit : str) { writeByte(static_cast(unit)); } } -void sect_WordString(std::vector const &string) { +void sect_WordString(std::vector const &str) { if (!requireCodeSection()) { return; } - for (int32_t unit : string) { + for (int32_t unit : str) { if (!checkNBit(unit, 16, "All character units")) { break; } } - for (int32_t unit : string) { + for (int32_t unit : str) { writeWord(static_cast(unit)); } } -void sect_LongString(std::vector const &string) { +void sect_LongString(std::vector const &str) { if (!requireCodeSection()) { return; } - for (int32_t unit : string) { + for (int32_t unit : str) { writeLong(static_cast(unit)); } }