diff --git a/man/rgbasm.5 b/man/rgbasm.5 index 24ee103e..e466a4f2 100644 --- a/man/rgbasm.5 +++ b/man/rgbasm.5 @@ -414,12 +414,21 @@ There are a number of escape sequences you can use within a string: .Pp Multi-line strings are contained in triple quotes .Pq Ql \&"\&"\&"for instance\&"\&"\&" . -Escape sequences work the same way in multi-line strings; however, literal newline -characters will be included as-is, without needing to escape them with +Escape sequences work the same way in multi-line strings; however, literal newline characters will be included as-is, without needing to escape them with .Ql \er or .Ql \en . .Pp +Raw strings are prefixed by a hash +.Sq # . +Inside them, backslashes and braces are treated like regular characters, so they will not be expanded as macro arguments, interpolated symbols, or escape sequences. +For example, the raw string +.Ql #"\t\1{s}\" +is equivalent to the regular string +.Ql "\\t\\1\{s}\\" . +(Note that this prevents raw strings from including the double quote character.) +Raw strings also may be contained in triple quotes for them to be multi-line, so they can include literal newline or quote characters (although still not three quotes in a row). +.Pp The following functions operate on string expressions. Most of them return a string, however some of these functions actually return an integer and can be used as part of an integer expression! .Bl -column "STRSUB(str, pos, len)" diff --git a/src/asm/lexer.cpp b/src/asm/lexer.cpp index d4ad4010..5342afc4 100644 --- a/src/asm/lexer.cpp +++ b/src/asm/lexer.cpp @@ -1459,7 +1459,7 @@ static size_t appendEscapedSubstring(char const *str, size_t i) return i; } -static void readString(void) +static void readString(bool raw) { lexerState->disableMacroArgs = true; lexerState->disableInterpolation = true; @@ -1517,6 +1517,8 @@ static void readString(void) goto finish; case '\\': // Character escape or macro arg + if (raw) + break; c = peek(); switch (c) { case '\\': @@ -1581,6 +1583,8 @@ static void readString(void) break; case '{': // Symbol interpolation + if (raw) + break; // We'll be exiting the string scope, so re-enable expansions // (Not interpolations, since they're handled by the function itself...) lexerState->disableMacroArgs = false; @@ -1609,7 +1613,7 @@ finish: lexerState->disableInterpolation = false; } -static size_t appendStringLiteral(size_t i) +static size_t appendStringLiteral(size_t i, bool raw) { lexerState->disableMacroArgs = true; lexerState->disableInterpolation = true; @@ -1670,6 +1674,8 @@ static size_t appendStringLiteral(size_t i) goto finish; case '\\': // Character escape or macro arg + if (raw) + break; c = peek(); switch (c) { // Character escape @@ -1725,6 +1731,8 @@ static size_t appendStringLiteral(size_t i) break; case '{': // Symbol interpolation + if (raw) + break; // We'll be exiting the string scope, so re-enable expansions // (Not interpolations, since they're handled by the function itself...) lexerState->disableMacroArgs = false; @@ -1979,7 +1987,7 @@ static int yylex_NORMAL(void) // Handle strings case '"': - readString(); + readString(false); return T_STRING; // Handle newlines and EOF @@ -2001,6 +2009,16 @@ static int yylex_NORMAL(void) readLineContinuation(); break; + // Handle raw strings... or fall through if '#' is not followed by '"' + + case '#': + if (peek() == '"') { + shiftChar(); + readString(true); + return T_STRING; + } + // fallthrough + // Handle identifiers... or report garbage characters default: @@ -2079,7 +2097,16 @@ static int yylex_RAW(void) switch (c) { case '"': // String literals inside macro args shiftChar(); - i = appendStringLiteral(i); + i = appendStringLiteral(i, false); + break; + + case '#': // Raw string literals inside macro args + append_yylval_string(c); + shiftChar(); + if (peek() == '"') { + shiftChar(); + i = appendStringLiteral(i, true); + } break; case ';': // Comments inside macro args diff --git a/test/asm/raw-strings.asm b/test/asm/raw-strings.asm new file mode 100644 index 00000000..2ee6df72 --- /dev/null +++ b/test/asm/raw-strings.asm @@ -0,0 +1,76 @@ +DEF q EQUS "\"" + + assert !strcmp( \ + #"\t\1{s}\", \ + "\\t\\1\{s}\\" ) + assert !strcmp( \ + #"\a,\b,\1,\2", \ + "\\a,\\b,\\1,\\2" ) + assert !strcmp( \ + #"""new +line""", \ + "new\nline" ) + assert !strcmp( \ + #"""new\nline""", \ + """new\\nline""" ) + assert !strcmp( \ + #"/\w+(\+\w+)?@[a-z]+\.[a-z]{2,3}/i", \ + "/\\w+(\\+\\w+)?@[a-z]+\\.[a-z]\{2,3}/i" ) + assert !strcmp( \ + #{q}{q}{q}rs", \ + {q}\{q}\{q}rs" ) + assert !strcmp( \ + #"", \ + "" ) + assert !strcmp( \ + #"""""", \ + """""" ) + +MACRO test + REDEF raw EQUS \1 + REDEF plain EQUS \2 + assert !strcmp("{raw}", "{plain}") +ENDM + + ; test lexing string literals within macro args + test \ + #"\t\1{s}\", \ + "\\t\\1\{s}\\" + test \ + #"\a,\b,\1,\2", \ + "\\a,\\b,\\1,\\2" + test \ + #"""new, +line""", \ + "new,\nline" + test \ + #"""new,\nline""", \ + """new,\\nline""" + test \ + #"/\w+(\+\w+)?@[a-z]+\.[a-z]{2,3}/i", \ + "/\\w+(\\+\\w+)?@[a-z]+\\.[a-z]\{2,3}/i" + test \ + #{q}{q}{q}rs", \ + {q}\{q}\{q}rs" + test \ + #"", \ + "" + test \ + #"""""", \ + """""" + +MACRO echo + println "\#" +ENDM + +DEF s EQUS "foo" + echo \ + # "{s}", \ + #"{s}", \ ; raw! + #raw"{s}", \ + #/*comment*/"{s}" + echo \ + # """{s}""", \ + #"""{s}""", \ ; raw! + #raw"""{s}""", \ + #/*comment*/"""{s}""" diff --git a/test/asm/raw-strings.err b/test/asm/raw-strings.err new file mode 100644 index 00000000..e69de29b diff --git a/test/asm/raw-strings.out b/test/asm/raw-strings.out new file mode 100644 index 00000000..bfda1431 --- /dev/null +++ b/test/asm/raw-strings.out @@ -0,0 +1,2 @@ +# "foo",#"{s}",#raw"foo",#"foo" +# """foo""",#"""{s}""",#raw"""foo""",#"""foo"""