Make the len parameter optional in STRSUB(str, pos, len)

An unspecified length will continue to the end of the string.
This commit is contained in:
Rangi
2021-04-19 12:00:42 -04:00
committed by Eldred Habert
parent b1e6c73197
commit dc5b7802c8
5 changed files with 67 additions and 36 deletions

View File

@@ -105,25 +105,18 @@ static size_t strlenUTF8(char const *s)
return len;
}
static void strsubUTF8(char *dest, size_t destLen, char const *src, int32_t pos, uint32_t len)
static void strsubUTF8(char *dest, size_t destLen, char const *src, uint32_t pos, uint32_t len)
{
size_t srcIndex = 0;
size_t destIndex = 0;
uint32_t state = 0;
uint32_t codep = 0;
uint32_t curLen = 0;
if (pos < 1) {
pos += strlenUTF8(src);
if (pos < 1) {
warning(WARNING_BUILTIN_ARG, "STRSUB: Position starts at 1\n");
pos = 1;
}
}
uint32_t curPos = 1;
/* Advance to starting position in source string. */
for (uint32_t curPos = 1; src[srcIndex] && curPos < pos; srcIndex++) {
switch (decode(&state, &codep, src[srcIndex])) {
while (src[srcIndex] && curPos < pos) {
switch (decode(&state, &codep, src[srcIndex++])) {
case 1:
fatalerror("STRSUB: Invalid UTF-8 character\n");
break;
@@ -133,9 +126,13 @@ static void strsubUTF8(char *dest, size_t destLen, char const *src, int32_t pos,
}
}
if (!src[srcIndex] && len)
/*
* A position 1 past the end of the string is allowed, but will trigger the
* "Length too big" warning below if the length is nonzero.
*/
if (!src[srcIndex] && pos > curPos)
warning(WARNING_BUILTIN_ARG,
"STRSUB: Position %" PRId32 " is past the end of the string\n", pos);
"STRSUB: Position %" PRIu32 " is past the end of the string\n", pos);
/* Copy from source to destination. */
while (src[srcIndex] && destIndex < destLen - 1 && curLen < len) {
@@ -151,7 +148,7 @@ static void strsubUTF8(char *dest, size_t destLen, char const *src, int32_t pos,
}
if (curLen < len)
warning(WARNING_BUILTIN_ARG, "STRSUB: Length too big: %lu\n", (unsigned long)len);
warning(WARNING_BUILTIN_ARG, "STRSUB: Length too big: %" PRIu32 "\n", len);
/* Check for partial code point. */
if (state != 0)
@@ -170,18 +167,10 @@ static size_t charlenUTF8(char const *s)
return len;
}
static void charsubUTF8(char *dest, char const *src, int32_t pos)
static void charsubUTF8(char *dest, char const *src, uint32_t pos)
{
size_t charLen = 1;
if (pos < 1) {
pos += charlenUTF8(src);
if (pos < 1) {
warning(WARNING_BUILTIN_ARG, "CHARSUB: Position starts at 1\n");
pos = 1;
}
}
/* Advance to starting position in source string. */
for (uint32_t curPos = 1; charLen && curPos < pos; curPos++)
charLen = charmap_ConvertNext(&src, NULL);
@@ -190,7 +179,7 @@ static void charsubUTF8(char *dest, char const *src, int32_t pos)
if (!charmap_ConvertNext(&src, NULL))
warning(WARNING_BUILTIN_ARG,
"CHARSUB: Position %" PRId32 " is past the end of the string\n", pos);
"CHARSUB: Position %" PRIu32 " is past the end of the string\n", pos);
/* Copy from source to destination. */
memcpy(dest, start, src - start);
@@ -198,6 +187,22 @@ static void charsubUTF8(char *dest, char const *src, int32_t pos)
dest[src - start] = '\0';
}
static uint32_t adjustNegativePos(int32_t pos, size_t len, char const *functionName)
{
/*
* STRSUB and CHARSUB adjust negative `pos` arguments the same way,
* such that position 0 is the last character of a string.
*/
if (pos < 1) {
pos += len;
if (pos < 1) {
warning(WARNING_BUILTIN_ARG, "%s: Position starts at 1\n", functionName);
pos = 1;
}
}
return (uint32_t)pos;
}
static void strrpl(char *dest, size_t destLen, char const *src, char const *old, char const *new)
{
size_t oldLen = strlen(old);
@@ -1531,10 +1536,22 @@ const_no_str : relocexpr_no_str {
string : T_STRING
| T_OP_STRSUB T_LPAREN string T_COMMA const T_COMMA uconst T_RPAREN {
strsubUTF8($$, sizeof($$), $3, $5, $7);
size_t len = strlenUTF8($3);
uint32_t pos = adjustNegativePos($5, len, "STRSUB");
strsubUTF8($$, sizeof($$), $3, pos, $7);
}
| T_OP_STRSUB T_LPAREN string T_COMMA const T_RPAREN {
size_t len = strlenUTF8($3);
uint32_t pos = adjustNegativePos($5, len, "STRSUB");
strsubUTF8($$, sizeof($$), $3, pos, pos > len ? 0 : len + 1 - pos);
}
| T_OP_CHARSUB T_LPAREN string T_COMMA const T_RPAREN {
charsubUTF8($$, $3, $5);
size_t len = charlenUTF8($3);
uint32_t pos = adjustNegativePos($5, len, "CHARSUB");
charsubUTF8($$, $3, pos);
}
| T_OP_STRCAT T_LPAREN T_RPAREN {
$$[0] = '\0';

View File

@@ -394,7 +394,7 @@ Most of them return a string, however some of these functions actually return an
.It Fn STRCMP str1 str2 Ta Returns -1 if Ar str1 No is alphabetically lower than Ar str2 No , zero if they match, 1 if Ar str1 No is greater than Ar str2 .
.It Fn STRIN str1 str2 Ta Returns the first position of Ar str2 No in Ar str1 No or zero if it's not present Pq first character is position 1 .
.It Fn STRRIN str1 str2 Ta Returns the last position of Ar str2 No in Ar str1 No or zero if it's not present Pq first character is position 1 .
.It Fn STRSUB str pos len Ta Returns a substring from Ar str No starting at Ar pos No (first character is position 1) and Ar len No characters long. Zero or negative Ar pos No counts from the end, as if Qo STRLEN(str) Qc were added to it.
.It Fn STRSUB str pos len Ta Returns a substring from Ar str No starting at Ar pos No (first character is position 1) and Ar len No characters long. Zero or negative Ar pos No counts from the end, as if Qo STRLEN(str) Qc were added to it. If Ar len No is not specified the substring continues to the end of Ar str .
.It Fn STRUPR str Ta Returns Ar str No with all letters in uppercase.
.It Fn STRLWR str Ta Returns Ar str No with all letters in lowercase.
.It Fn STRRPL str old new Ta Returns Ar str No with each non-overlapping occurrence of the substring Ar old No replaced with Ar new .

View File

@@ -1,7 +1,7 @@
SECTION "sec", ROM0
xstrsub: MACRO
PRINTLN STRSUB(\1, \2, \3)
PRINTLN STRSUB(\#)
ENDM
xstrsub "ABC", 1, 1
@@ -10,12 +10,17 @@ ENDM
xstrsub "ABC", -2, 1
xstrsub "ABC", -1, 1
xstrsub "ABC", 0, 1
xstrsub "ABC", 2
xstrsub "ABC", -1
xstrsub "ABC", 5
xstrsub "ABC", -5
xstrsub "ABC", 1, 2
xstrsub "ABC", 2, 2
xstrsub "ABC", 2, 32
xstrsub "ABC", 2, 300
xstrsub "ABC", -3, 300
xstrsub "ABC", 4, 0
xstrsub "ABC", 5, 0
xstrsub "ABC", 4, 1
xstrsub "カタカナ", 1, 2
xstrsub "カタカナ", 3, 2

View File

@@ -1,14 +1,18 @@
warning: strsub.asm(15) -> strsub.asm::xstrsub(4): [-Wbuiltin-args]
STRSUB: Length too big: 32
STRSUB: Position 5 is past the end of the string
warning: strsub.asm(16) -> strsub.asm::xstrsub(4): [-Wbuiltin-args]
STRSUB: Length too big: 300
warning: strsub.asm(17) -> strsub.asm::xstrsub(4): [-Wbuiltin-args]
STRSUB: Position starts at 1
warning: strsub.asm(17) -> strsub.asm::xstrsub(4): [-Wbuiltin-args]
warning: strsub.asm(19) -> strsub.asm::xstrsub(4): [-Wbuiltin-args]
STRSUB: Length too big: 32
warning: strsub.asm(20) -> strsub.asm::xstrsub(4): [-Wbuiltin-args]
STRSUB: Length too big: 300
warning: strsub.asm(19) -> strsub.asm::xstrsub(4): [-Wbuiltin-args]
STRSUB: Position 4 is past the end of the string
warning: strsub.asm(19) -> strsub.asm::xstrsub(4): [-Wbuiltin-args]
warning: strsub.asm(21) -> strsub.asm::xstrsub(4): [-Wbuiltin-args]
STRSUB: Position starts at 1
warning: strsub.asm(21) -> strsub.asm::xstrsub(4): [-Wbuiltin-args]
STRSUB: Length too big: 300
warning: strsub.asm(23) -> strsub.asm::xstrsub(4): [-Wbuiltin-args]
STRSUB: Position 5 is past the end of the string
warning: strsub.asm(24) -> strsub.asm::xstrsub(4): [-Wbuiltin-args]
STRSUB: Length too big: 1
warning: strsub.asm(22) -> strsub.asm::xstrsub(4): [-Wbuiltin-args]
warning: strsub.asm(27) -> strsub.asm::xstrsub(4): [-Wbuiltin-args]
STRSUB: Length too big: 10

View File

@@ -4,6 +4,10 @@ C
A
B
C
BC
BC
ABC
AB
BC
BC
@@ -11,6 +15,7 @@ BC
ABC
カタ
カナ
カナ