Implement #"raw strings" (#1122)

Fixes #1121
This commit is contained in:
Rangi
2023-11-07 17:48:23 -05:00
committed by GitHub
parent 9fc088dcb0
commit 8eeb40cca8
5 changed files with 120 additions and 6 deletions

View File

@@ -414,12 +414,21 @@ There are a number of escape sequences you can use within a string:
.Pp .Pp
Multi-line strings are contained in triple quotes Multi-line strings are contained in triple quotes
.Pq Ql \&"\&"\&"for instance\&"\&"\&" . .Pq Ql \&"\&"\&"for instance\&"\&"\&" .
Escape sequences work the same way in multi-line strings; however, literal newline Escape sequences work the same way in multi-line strings; however, literal newline characters will be included as-is, without needing to escape them with
characters will be included as-is, without needing to escape them with
.Ql \er .Ql \er
or or
.Ql \en . .Ql \en .
.Pp .Pp
Raw strings are prefixed by a hash
.Sq # .
Inside them, backslashes and braces are treated like regular characters, so they will not be expanded as macro arguments, interpolated symbols, or escape sequences.
For example, the raw string
.Ql #"\t\1{s}\"
is equivalent to the regular string
.Ql "\\t\\1\{s}\\" .
(Note that this prevents raw strings from including the double quote character.)
Raw strings also may be contained in triple quotes for them to be multi-line, so they can include literal newline or quote characters (although still not three quotes in a row).
.Pp
The following functions operate on string expressions. The following functions operate on string expressions.
Most of them return a string, however some of these functions actually return an integer and can be used as part of an integer expression! Most of them return a string, however some of these functions actually return an integer and can be used as part of an integer expression!
.Bl -column "STRSUB(str, pos, len)" .Bl -column "STRSUB(str, pos, len)"

View File

@@ -1459,7 +1459,7 @@ static size_t appendEscapedSubstring(char const *str, size_t i)
return i; return i;
} }
static void readString(void) static void readString(bool raw)
{ {
lexerState->disableMacroArgs = true; lexerState->disableMacroArgs = true;
lexerState->disableInterpolation = true; lexerState->disableInterpolation = true;
@@ -1517,6 +1517,8 @@ static void readString(void)
goto finish; goto finish;
case '\\': // Character escape or macro arg case '\\': // Character escape or macro arg
if (raw)
break;
c = peek(); c = peek();
switch (c) { switch (c) {
case '\\': case '\\':
@@ -1581,6 +1583,8 @@ static void readString(void)
break; break;
case '{': // Symbol interpolation case '{': // Symbol interpolation
if (raw)
break;
// We'll be exiting the string scope, so re-enable expansions // We'll be exiting the string scope, so re-enable expansions
// (Not interpolations, since they're handled by the function itself...) // (Not interpolations, since they're handled by the function itself...)
lexerState->disableMacroArgs = false; lexerState->disableMacroArgs = false;
@@ -1609,7 +1613,7 @@ finish:
lexerState->disableInterpolation = false; lexerState->disableInterpolation = false;
} }
static size_t appendStringLiteral(size_t i) static size_t appendStringLiteral(size_t i, bool raw)
{ {
lexerState->disableMacroArgs = true; lexerState->disableMacroArgs = true;
lexerState->disableInterpolation = true; lexerState->disableInterpolation = true;
@@ -1670,6 +1674,8 @@ static size_t appendStringLiteral(size_t i)
goto finish; goto finish;
case '\\': // Character escape or macro arg case '\\': // Character escape or macro arg
if (raw)
break;
c = peek(); c = peek();
switch (c) { switch (c) {
// Character escape // Character escape
@@ -1725,6 +1731,8 @@ static size_t appendStringLiteral(size_t i)
break; break;
case '{': // Symbol interpolation case '{': // Symbol interpolation
if (raw)
break;
// We'll be exiting the string scope, so re-enable expansions // We'll be exiting the string scope, so re-enable expansions
// (Not interpolations, since they're handled by the function itself...) // (Not interpolations, since they're handled by the function itself...)
lexerState->disableMacroArgs = false; lexerState->disableMacroArgs = false;
@@ -1979,7 +1987,7 @@ static int yylex_NORMAL(void)
// Handle strings // Handle strings
case '"': case '"':
readString(); readString(false);
return T_STRING; return T_STRING;
// Handle newlines and EOF // Handle newlines and EOF
@@ -2001,6 +2009,16 @@ static int yylex_NORMAL(void)
readLineContinuation(); readLineContinuation();
break; break;
// Handle raw strings... or fall through if '#' is not followed by '"'
case '#':
if (peek() == '"') {
shiftChar();
readString(true);
return T_STRING;
}
// fallthrough
// Handle identifiers... or report garbage characters // Handle identifiers... or report garbage characters
default: default:
@@ -2079,7 +2097,16 @@ static int yylex_RAW(void)
switch (c) { switch (c) {
case '"': // String literals inside macro args case '"': // String literals inside macro args
shiftChar(); shiftChar();
i = appendStringLiteral(i); i = appendStringLiteral(i, false);
break;
case '#': // Raw string literals inside macro args
append_yylval_string(c);
shiftChar();
if (peek() == '"') {
shiftChar();
i = appendStringLiteral(i, true);
}
break; break;
case ';': // Comments inside macro args case ';': // Comments inside macro args

76
test/asm/raw-strings.asm Normal file
View File

@@ -0,0 +1,76 @@
DEF q EQUS "\""
assert !strcmp( \
#"\t\1{s}\", \
"\\t\\1\{s}\\" )
assert !strcmp( \
#"\a,\b,\1,\2", \
"\\a,\\b,\\1,\\2" )
assert !strcmp( \
#"""new
line""", \
"new\nline" )
assert !strcmp( \
#"""new\nline""", \
"""new\\nline""" )
assert !strcmp( \
#"/\w+(\+\w+)?@[a-z]+\.[a-z]{2,3}/i", \
"/\\w+(\\+\\w+)?@[a-z]+\\.[a-z]\{2,3}/i" )
assert !strcmp( \
#{q}{q}{q}rs", \
{q}\{q}\{q}rs" )
assert !strcmp( \
#"", \
"" )
assert !strcmp( \
#"""""", \
"""""" )
MACRO test
REDEF raw EQUS \1
REDEF plain EQUS \2
assert !strcmp("{raw}", "{plain}")
ENDM
; test lexing string literals within macro args
test \
#"\t\1{s}\", \
"\\t\\1\{s}\\"
test \
#"\a,\b,\1,\2", \
"\\a,\\b,\\1,\\2"
test \
#"""new,
line""", \
"new,\nline"
test \
#"""new,\nline""", \
"""new,\\nline"""
test \
#"/\w+(\+\w+)?@[a-z]+\.[a-z]{2,3}/i", \
"/\\w+(\\+\\w+)?@[a-z]+\\.[a-z]\{2,3}/i"
test \
#{q}{q}{q}rs", \
{q}\{q}\{q}rs"
test \
#"", \
""
test \
#"""""", \
""""""
MACRO echo
println "\#"
ENDM
DEF s EQUS "foo"
echo \
# "{s}", \
#"{s}", \ ; raw!
#raw"{s}", \
#/*comment*/"{s}"
echo \
# """{s}""", \
#"""{s}""", \ ; raw!
#raw"""{s}""", \
#/*comment*/"""{s}"""

0
test/asm/raw-strings.err Normal file
View File

2
test/asm/raw-strings.out Normal file
View File

@@ -0,0 +1,2 @@
# "foo",#"{s}",#raw"foo",#"foo"
# """foo""",#"""{s}""",#raw"""foo""",#"""foo"""