From b6d77fbb9e720b48e424bfa4286b6a8a690c9cbe Mon Sep 17 00:00:00 2001 From: Rangi <35663410+Rangi42@users.noreply.github.com> Date: Mon, 14 Jul 2025 21:46:35 -0400 Subject: [PATCH] Implement `BYTELEN` and `STRBYTE` (#1744) --- man/rgbasm.5 | 2 ++ src/asm/lexer.cpp | 2 ++ src/asm/parser.y | 19 +++++++++++++++++++ test/asm/bytelen-strbyte.asm | 36 ++++++++++++++++++++++++++++++++++++ test/asm/bytelen-strbyte.err | 4 ++++ 5 files changed, 63 insertions(+) create mode 100644 test/asm/bytelen-strbyte.asm create mode 100644 test/asm/bytelen-strbyte.err diff --git a/man/rgbasm.5 b/man/rgbasm.5 index 967bb33e..cff8204b 100644 --- a/man/rgbasm.5 +++ b/man/rgbasm.5 @@ -604,6 +604,8 @@ The following functions operate on string expressions, but return integers. .It Fn STRCMP str1 str2 Ta Compares Ar str1 No and Ar str2 No according to ASCII ordering of their characters. Returns -1 if Ar str1 No is lower than Ar str2 Ns , 1 if Ar str1 No is greater than Ar str2 Ns , or 0 if they match. .It Fn STRFIND str sub Ta Returns the first index of Ar sub No in Ar str Ns , or -1 if it's not present. .It Fn STRRFIND str sub Ta Returns the last index of Ar sub No in Ar str Ns , or -1 if it's not present. +.It Fn BYTELEN str Ta Returns the number of bytes in Ar str . Pq Non-ASCII characters can be multiple bytes. +.It Fn STRBYTE str idx Ta Returns the byte value at Ar idx No in Ar str . .It Fn INCHARMAP str Ta Returns 1 if Ar str No has an entry in the current charmap, or 0 otherwise. .It Fn CHARLEN str Ta Returns the number of charmap entries in Ar str No with the current charmap. .It Fn CHARCMP str1 str2 Ta Compares Ar str1 No and Ar str2 No according to their charmap entry values with the current charmap. Returns -1 if Ar str1 No is lower than Ar str2 Ns , 1 if Ar str1 No is greater than Ar str2 Ns , or 0 if they match. diff --git a/src/asm/lexer.cpp b/src/asm/lexer.cpp index 56e4fc27..67d30d5a 100644 --- a/src/asm/lexer.cpp +++ b/src/asm/lexer.cpp @@ -238,6 +238,8 @@ static std::unordered_map ke {"BITWIDTH", T_(OP_BITWIDTH) }, {"TZCOUNT", T_(OP_TZCOUNT) }, + {"BYTELEN", T_(OP_BYTELEN) }, + {"STRBYTE", T_(OP_STRBYTE) }, {"STRCAT", T_(OP_STRCAT) }, {"STRCHAR", T_(OP_STRCHAR) }, {"STRCMP", T_(OP_STRCMP) }, diff --git a/src/asm/parser.y b/src/asm/parser.y index 2efa0cdc..0220acb3 100644 --- a/src/asm/parser.y +++ b/src/asm/parser.y @@ -284,6 +284,7 @@ %token OP_ATAN2 "ATAN2" %token OP_BANK "BANK" %token OP_BITWIDTH "BITWIDTH" +%token OP_BYTELEN "BYTELEN" %token OP_CEIL "CEIL" %token OP_CHARCMP "CHARCMP" %token OP_CHARLEN "CHARLEN" @@ -307,6 +308,7 @@ %token OP_SIN "SIN" %token OP_SIZEOF "SIZEOF" %token OP_STARTOF "STARTOF" +%token OP_STRBYTE "STRBYTE" %token OP_STRCAT "STRCAT" %token OP_STRCHAR "STRCHAR" %token OP_STRCMP "STRCMP" @@ -1592,6 +1594,9 @@ relocexpr_no_str: | OP_STRLEN LPAREN string RPAREN { $$.makeNumber(strlenUTF8($3, true)); } + | OP_BYTELEN LPAREN string RPAREN { + $$.makeNumber($3.length()); + } | OP_CHARLEN LPAREN string RPAREN { $$.makeNumber(charlenUTF8($3)); } @@ -1626,6 +1631,20 @@ relocexpr_no_str: $$.makeNumber(0); } } + | OP_STRBYTE LPAREN string COMMA iconst RPAREN { + size_t len = $3.length(); + uint32_t idx = adjustNegativeIndex($5, len, "STRBYTE"); + if (idx < len) { + $$.makeNumber(static_cast($3[idx])); + } else { + warning( + WARNING_BUILTIN_ARG, + "STRBYTE: Index %" PRIu32 " is past the end of the string", + idx + ); + $$.makeNumber(0); + } + } | LPAREN relocexpr RPAREN { $$ = std::move($2); } diff --git a/test/asm/bytelen-strbyte.asm b/test/asm/bytelen-strbyte.asm new file mode 100644 index 00000000..71957dc7 --- /dev/null +++ b/test/asm/bytelen-strbyte.asm @@ -0,0 +1,36 @@ +assert bytelen("") == 0 +assert bytelen("ABC") == 3 +assert strbyte("ABC", 0) == $41 +assert strbyte("ABC", -1) == $43 + +charmap "ABC", 42 +assert bytelen("ABC") == 3 + +; characters: +; 1: U+72AC kanji (0xE7 0x8A 0xAC) +; 2: U+1F499 emoji (0xF0 0x9F 0x92 0x99) +; 3: U+0021 +def utf8 equs "犬💙!" +assert bytelen(#utf8) == 8 +assert strbyte(#utf8, 0) == $e7 +assert strbyte(#utf8, 4) == $9f +assert strbyte(#utf8, -2) == $99 +assert strbyte(#utf8, -1) == $21 + +; characters: +; 1: U+0041 A +; 2: U+0020 space +; 3: invalid byte 0xFE +; 4: invalid byte 0x81 +; 5: invalid byte 0xFF +; 6: U+0020 space +; 7: U+6F22 kanji (0xE6 0xBC 0xA2) +def invalid EQUS "A 漢" +assert bytelen(#invalid) == 9 +assert strbyte(#invalid, 0) == $41 +assert strbyte(#invalid, 4) == $ff +assert strbyte(#invalid, 8) == $a2 + +; out of bounds +assert strbyte("abc", -10) == $61 ; -10 clamped to 0 +assert strbyte("abc", 10) == 0 diff --git a/test/asm/bytelen-strbyte.err b/test/asm/bytelen-strbyte.err new file mode 100644 index 00000000..09835bfa --- /dev/null +++ b/test/asm/bytelen-strbyte.err @@ -0,0 +1,4 @@ +warning: bytelen-strbyte.asm(35): [-Wbuiltin-args] + STRBYTE: Index starts at 0 +warning: bytelen-strbyte.asm(36): [-Wbuiltin-args] + STRBYTE: Index 10 is past the end of the string