Implement BYTELEN and STRBYTE (#1744)

This commit is contained in:
Rangi
2025-07-14 21:46:35 -04:00
committed by GitHub
parent 8a19c5c30a
commit b6d77fbb9e
5 changed files with 63 additions and 0 deletions

View File

@@ -604,6 +604,8 @@ The following functions operate on string expressions, but return integers.
.It Fn STRCMP str1 str2 Ta Compares Ar str1 No and Ar str2 No according to ASCII ordering of their characters. Returns -1 if Ar str1 No is lower than Ar str2 Ns , 1 if Ar str1 No is greater than Ar str2 Ns , or 0 if they match. .It Fn STRCMP str1 str2 Ta Compares Ar str1 No and Ar str2 No according to ASCII ordering of their characters. Returns -1 if Ar str1 No is lower than Ar str2 Ns , 1 if Ar str1 No is greater than Ar str2 Ns , or 0 if they match.
.It Fn STRFIND str sub Ta Returns the first index of Ar sub No in Ar str Ns , or -1 if it's not present. .It Fn STRFIND str sub Ta Returns the first index of Ar sub No in Ar str Ns , or -1 if it's not present.
.It Fn STRRFIND str sub Ta Returns the last index of Ar sub No in Ar str Ns , or -1 if it's not present. .It Fn STRRFIND str sub Ta Returns the last index of Ar sub No in Ar str Ns , or -1 if it's not present.
.It Fn BYTELEN str Ta Returns the number of bytes in Ar str . Pq Non-ASCII characters can be multiple bytes.
.It Fn STRBYTE str idx Ta Returns the byte value at Ar idx No in Ar str .
.It Fn INCHARMAP str Ta Returns 1 if Ar str No has an entry in the current charmap, or 0 otherwise. .It Fn INCHARMAP str Ta Returns 1 if Ar str No has an entry in the current charmap, or 0 otherwise.
.It Fn CHARLEN str Ta Returns the number of charmap entries in Ar str No with the current charmap. .It Fn CHARLEN str Ta Returns the number of charmap entries in Ar str No with the current charmap.
.It Fn CHARCMP str1 str2 Ta Compares Ar str1 No and Ar str2 No according to their charmap entry values with the current charmap. Returns -1 if Ar str1 No is lower than Ar str2 Ns , 1 if Ar str1 No is greater than Ar str2 Ns , or 0 if they match. .It Fn CHARCMP str1 str2 Ta Compares Ar str1 No and Ar str2 No according to their charmap entry values with the current charmap. Returns -1 if Ar str1 No is lower than Ar str2 Ns , 1 if Ar str1 No is greater than Ar str2 Ns , or 0 if they match.

View File

@@ -238,6 +238,8 @@ static std::unordered_map<std::string, int, CaseInsensitive, CaseInsensitive> ke
{"BITWIDTH", T_(OP_BITWIDTH) }, {"BITWIDTH", T_(OP_BITWIDTH) },
{"TZCOUNT", T_(OP_TZCOUNT) }, {"TZCOUNT", T_(OP_TZCOUNT) },
{"BYTELEN", T_(OP_BYTELEN) },
{"STRBYTE", T_(OP_STRBYTE) },
{"STRCAT", T_(OP_STRCAT) }, {"STRCAT", T_(OP_STRCAT) },
{"STRCHAR", T_(OP_STRCHAR) }, {"STRCHAR", T_(OP_STRCHAR) },
{"STRCMP", T_(OP_STRCMP) }, {"STRCMP", T_(OP_STRCMP) },

View File

@@ -284,6 +284,7 @@
%token OP_ATAN2 "ATAN2" %token OP_ATAN2 "ATAN2"
%token OP_BANK "BANK" %token OP_BANK "BANK"
%token OP_BITWIDTH "BITWIDTH" %token OP_BITWIDTH "BITWIDTH"
%token OP_BYTELEN "BYTELEN"
%token OP_CEIL "CEIL" %token OP_CEIL "CEIL"
%token OP_CHARCMP "CHARCMP" %token OP_CHARCMP "CHARCMP"
%token OP_CHARLEN "CHARLEN" %token OP_CHARLEN "CHARLEN"
@@ -307,6 +308,7 @@
%token OP_SIN "SIN" %token OP_SIN "SIN"
%token OP_SIZEOF "SIZEOF" %token OP_SIZEOF "SIZEOF"
%token OP_STARTOF "STARTOF" %token OP_STARTOF "STARTOF"
%token OP_STRBYTE "STRBYTE"
%token OP_STRCAT "STRCAT" %token OP_STRCAT "STRCAT"
%token OP_STRCHAR "STRCHAR" %token OP_STRCHAR "STRCHAR"
%token OP_STRCMP "STRCMP" %token OP_STRCMP "STRCMP"
@@ -1592,6 +1594,9 @@ relocexpr_no_str:
| OP_STRLEN LPAREN string RPAREN { | OP_STRLEN LPAREN string RPAREN {
$$.makeNumber(strlenUTF8($3, true)); $$.makeNumber(strlenUTF8($3, true));
} }
| OP_BYTELEN LPAREN string RPAREN {
$$.makeNumber($3.length());
}
| OP_CHARLEN LPAREN string RPAREN { | OP_CHARLEN LPAREN string RPAREN {
$$.makeNumber(charlenUTF8($3)); $$.makeNumber(charlenUTF8($3));
} }
@@ -1626,6 +1631,20 @@ relocexpr_no_str:
$$.makeNumber(0); $$.makeNumber(0);
} }
} }
| OP_STRBYTE LPAREN string COMMA iconst RPAREN {
size_t len = $3.length();
uint32_t idx = adjustNegativeIndex($5, len, "STRBYTE");
if (idx < len) {
$$.makeNumber(static_cast<uint8_t>($3[idx]));
} else {
warning(
WARNING_BUILTIN_ARG,
"STRBYTE: Index %" PRIu32 " is past the end of the string",
idx
);
$$.makeNumber(0);
}
}
| LPAREN relocexpr RPAREN { | LPAREN relocexpr RPAREN {
$$ = std::move($2); $$ = std::move($2);
} }

View File

@@ -0,0 +1,36 @@
assert bytelen("") == 0
assert bytelen("ABC") == 3
assert strbyte("ABC", 0) == $41
assert strbyte("ABC", -1) == $43
charmap "ABC", 42
assert bytelen("ABC") == 3
; characters:
; 1: U+72AC kanji (0xE7 0x8A 0xAC)
; 2: U+1F499 emoji (0xF0 0x9F 0x92 0x99)
; 3: U+0021
def utf8 equs "犬💙!"
assert bytelen(#utf8) == 8
assert strbyte(#utf8, 0) == $e7
assert strbyte(#utf8, 4) == $9f
assert strbyte(#utf8, -2) == $99
assert strbyte(#utf8, -1) == $21
; characters:
; 1: U+0041 A
; 2: U+0020 space
; 3: invalid byte 0xFE
; 4: invalid byte 0x81
; 5: invalid byte 0xFF
; 6: U+0020 space
; 7: U+6F22 kanji (0xE6 0xBC 0xA2)
def invalid EQUS "A <20><><EFBFBD> 漢"
assert bytelen(#invalid) == 9
assert strbyte(#invalid, 0) == $41
assert strbyte(#invalid, 4) == $ff
assert strbyte(#invalid, 8) == $a2
; out of bounds
assert strbyte("abc", -10) == $61 ; -10 clamped to 0
assert strbyte("abc", 10) == 0

View File

@@ -0,0 +1,4 @@
warning: bytelen-strbyte.asm(35): [-Wbuiltin-args]
STRBYTE: Index starts at 0
warning: bytelen-strbyte.asm(36): [-Wbuiltin-args]
STRBYTE: Index 10 is past the end of the string