Handle string literals within macro arguments (#685)

Fixes #683 and #691

The lexer's raw mode for reading macro args already attempted
to handle semicolons inside string literals, versus outside ones
which start comments. This change reuses the same function for
reading string literals in normal and raw modes, also handling:

- Commas in strings versus between macro args
- Character escapes
- {Interpolations} and \1-\9 args inside vs. outside strings
- Multi-line string literals

Macro args now allow escaping '\', '"', and '\\'.

A consistent model for expanding macro args and interpolations,
within macro args, string literals, and normal context:

- "{S}" should always equal the contents of S
- "\1" should always act like quoting the value of \1
This commit is contained in:
Rangi
2021-02-16 19:44:25 -08:00
committed by GitHub
parent 8c0275480c
commit d049ffc0f0
17 changed files with 360 additions and 79 deletions

View File

@@ -845,7 +845,6 @@ restart:
} }
} else if (c == '{' && !lexerState->disableInterpolation) { } else if (c == '{' && !lexerState->disableInterpolation) {
/* If character is an open brace, do symbol interpolation */ /* If character is an open brace, do symbol interpolation */
lexerState->macroArgScanDistance++;
shiftChars(1); shiftChars(1);
char const *ptr = readInterpolation(); char const *ptr = readInterpolation();
@@ -1247,7 +1246,7 @@ static void readGfxConstant(void)
yylval.nConstValue = bp1 << 8 | bp0; yylval.nConstValue = bp1 << 8 | bp0;
} }
/* Function to read identifiers & keywords */ /* Functions to read identifiers & keywords */
static bool startsIdentifier(int c) static bool startsIdentifier(int c)
{ {
@@ -1373,51 +1372,39 @@ static char const *readInterpolation(void)
return NULL; return NULL;
} }
static int appendMacroArg(char const *str, int i) #define append_yylval_tzString(c) do { \
if (i < sizeof(yylval.tzString)) \
yylval.tzString[i++] = (c); \
} while (0)
static size_t appendEscapedSubstring(char const *str, size_t i)
{ {
while (*str && i < sizeof(yylval.tzString)) { /* Copy one extra to flag overflow */
while (*str) {
int c = *str++; int c = *str++;
if (c != '\\') { /* Escape characters that need escaping */
yylval.tzString[i++] = c;
continue;
}
c = *str++;
switch (c) { switch (c) {
case '\\': /* Return that character unchanged */ case '\\':
case '"': case '"':
case '{': case '{':
case '}': append_yylval_tzString('\\');
break; break;
case 'n': case '\n':
c = '\n'; append_yylval_tzString('\\');
c = 'n';
break; break;
case 'r': case '\r':
c = '\r'; append_yylval_tzString('\\');
c = 'r';
break; break;
case 't': case '\t':
c = '\t'; append_yylval_tzString('\\');
break; c = 't';
case '\0': /* Can't really print that one */
error("Illegal character escape at end of macro arg\n");
yylval.tzString[i++] = '\\';
break;
/*
* Line continuations and macro args were already
* handled while reading the macro args, so '\@',
* '\#', and '\0'-'\9' should not occur here.
*/
default:
error("Illegal character escape '%s'\n", print(c));
c = '\\';
break; break;
} }
yylval.tzString[i++] = c;
append_yylval_tzString(c);
} }
return i; return i;
@@ -1479,10 +1466,11 @@ static void readString(void)
case '\\': // Character escape or macro arg case '\\': // Character escape or macro arg
c = peek(0); c = peek(0);
switch (c) { switch (c) {
case '\\': // Return that character unchanged case '\\':
case '"': case '"':
case '{': case '{':
case '}': case '}':
// Return that character unchanged
shiftChars(1); shiftChars(1);
break; break;
case 'n': case 'n':
@@ -1521,16 +1509,18 @@ static void readString(void)
shiftChars(1); shiftChars(1);
char const *str = readMacroArg(c); char const *str = readMacroArg(c);
i = appendMacroArg(str, i); while (*str)
append_yylval_tzString(*str++);
continue; // Do not copy an additional character continue; // Do not copy an additional character
case EOF: // Can't really print that one case EOF: // Can't really print that one
error("Illegal character escape at end of input\n"); error("Illegal character escape at end of input\n");
c = '\\'; c = '\\';
break; break;
default: default:
error("Illegal character escape '%s'\n", print(c)); error("Illegal character escape '%s'\n", print(c));
c = '\\'; shiftChars(1);
break; break;
} }
break; break;
@@ -1542,16 +1532,15 @@ static void readString(void)
char const *ptr = readInterpolation(); char const *ptr = readInterpolation();
if (ptr) if (ptr)
while (*ptr && i < sizeof(yylval.tzString)) while (*ptr)
yylval.tzString[i++] = *ptr++; append_yylval_tzString(*ptr++);
lexerState->disableMacroArgs = true; lexerState->disableMacroArgs = true;
continue; // Do not copy an additional character continue; // Do not copy an additional character
// Regular characters will just get copied // Regular characters will just get copied
} }
if (i < sizeof(yylval.tzString)) // Copy one extra to flag overflow append_yylval_tzString(c);
yylval.tzString[i++] = c;
} }
finish: finish:
@@ -1566,6 +1555,155 @@ finish:
lexerState->disableInterpolation = false; lexerState->disableInterpolation = false;
} }
static size_t appendStringLiteral(size_t i)
{
dbgPrint("Reading string\n");
lexerState->disableMacroArgs = true;
lexerState->disableInterpolation = true;
bool multiline = false;
// We reach this function after reading a single quote, but we also support triple quotes
append_yylval_tzString('"');
if (peek(0) == '"') {
append_yylval_tzString('"');
shiftChars(1);
if (peek(0) == '"') {
// """ begins a multi-line string
append_yylval_tzString('"');
shiftChars(1);
multiline = true;
} else {
// "" is an empty string, skip the loop
goto finish;
}
}
for (;;) {
int c = peek(0);
// '\r', '\n' or EOF ends a single-line string early
if (c == EOF || (!multiline && (c == '\r' || c == '\n'))) {
error("Unterminated string\n");
break;
}
// We'll be staying in the string, so we can safely consume the char
shiftChars(1);
// Handle '\r' or '\n' (in multiline strings only, already handled above otherwise)
if (c == '\r' || c == '\n') {
/* Handle CRLF before nextLine() since shiftChars updates colNo */
if (c == '\r' && peek(0) == '\n')
shiftChars(1);
nextLine();
c = '\n';
}
switch (c) {
case '"':
if (multiline) {
// Only """ ends a multi-line string
if (peek(0) != '"' || peek(1) != '"')
break;
append_yylval_tzString('"');
append_yylval_tzString('"');
shiftChars(2);
}
append_yylval_tzString('"');
goto finish;
case '\\': // Character escape or macro arg
c = peek(0);
switch (c) {
// Character escape
case '\\':
case '"':
case '{':
case '}':
case 'n':
case 'r':
case 't':
// Return that character unchanged
append_yylval_tzString('\\');
shiftChars(1);
break;
// Line continuation
case ' ':
case '\r':
case '\n':
readLineContinuation();
continue;
// Macro arg
case '@':
case '#':
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
shiftChars(1);
char const *str = readMacroArg(c);
i = appendEscapedSubstring(str, i);
continue; // Do not copy an additional character
case EOF: // Can't really print that one
error("Illegal character escape at end of input\n");
c = '\\';
break;
case ',': /* `\,` inside a macro arg string literal */
warning(WARNING_OBSOLETE,
"`\\,` is deprecated inside strings\n");
shiftChars(1);
break;
default:
error("Illegal character escape '%s'\n", print(c));
shiftChars(1);
break;
}
break;
case '{': // Symbol interpolation
// We'll be exiting the string scope, so re-enable expansions
// (Not interpolations, since they're handled by the function itself...)
lexerState->disableMacroArgs = false;
char const *ptr = readInterpolation();
if (ptr)
i = appendEscapedSubstring(ptr, i);
lexerState->disableMacroArgs = true;
continue; // Do not copy an additional character
// Regular characters will just get copied
}
append_yylval_tzString(c);
}
finish:
if (i == sizeof(yylval.tzString)) {
i--;
warning(WARNING_LONG_STR, "String constant too long\n");
}
yylval.tzString[i] = '\0';
dbgPrint("Read string \"%s\"\n", yylval.tzString);
lexerState->disableMacroArgs = false;
lexerState->disableInterpolation = false;
return i;
}
/* Function to report one character's worth of garbage bytes */ /* Function to report one character's worth of garbage bytes */
static char const *reportGarbageChar(unsigned char firstByte) static char const *reportGarbageChar(unsigned char firstByte)
@@ -1835,6 +1973,7 @@ static int yylex_NORMAL(void)
case EOF: case EOF:
error("Illegal character escape at end of input\n"); error("Illegal character escape at end of input\n");
break; break;
default: default:
shiftChars(1); shiftChars(1);
error("Illegal character escape '%s'\n", print(c)); error("Illegal character escape '%s'\n", print(c));
@@ -1886,9 +2025,8 @@ static int yylex_RAW(void)
dbgPrint("Lexing in raw mode, line=%" PRIu32 ", col=%" PRIu32 "\n", dbgPrint("Lexing in raw mode, line=%" PRIu32 ", col=%" PRIu32 "\n",
lexer_GetLineNo(), lexer_GetColNo()); lexer_GetLineNo(), lexer_GetColNo());
/* This is essentially a modified `readString` */ /* This is essentially a modified `appendStringLiteral` */
size_t i = 0; size_t i = 0;
bool insideString = false;
/* Trim left of string... */ /* Trim left of string... */
while (isWhitespace(peek(0))) while (isWhitespace(peek(0)))
@@ -1898,18 +2036,16 @@ static int yylex_RAW(void)
int c = peek(0); int c = peek(0);
switch (c) { switch (c) {
case '"': case '"': /* String literals inside macro args */
insideString = !insideString; shiftChars(1);
/* Other than that, just process quotes normally */ i = appendStringLiteral(i);
break; break;
case ';': /* Comments inside macro args */ case ';': /* Comments inside macro args */
if (insideString)
break;
discardComment(); discardComment();
c = peek(0); c = peek(0);
/* fallthrough */ /* fallthrough */
case ',': case ',': /* End of macro arg */
case '\r': case '\r':
case '\n': case '\n':
case EOF: case EOF:
@@ -1939,16 +2075,30 @@ static int yylex_RAW(void)
return T_STRING; return T_STRING;
case '\\': /* Character escape */ case '\\': /* Character escape */
c = peek(1); shiftChars(1); /* Shift the backslash */
c = peek(0);
switch (c) { switch (c) {
case ',': case ',': /* Escape `\,` only inside a macro arg */
shiftChars(1); case '\\': /* Escapes shared with string literals */
case '"':
case '{':
case '}':
break;
case 'n':
c = '\n';
break;
case 'r':
c = '\r';
break;
case 't':
c = '\t';
break; break;
case ' ': case ' ':
case '\r': case '\r':
case '\n': case '\n':
shiftChars(1); /* Shift the backslash */
readLineContinuation(); readLineContinuation();
continue; continue;
@@ -1956,20 +2106,28 @@ static int yylex_RAW(void)
error("Illegal character escape at end of input\n"); error("Illegal character escape at end of input\n");
c = '\\'; c = '\\';
break; break;
default: /* Pass the rest as-is */
c = '\\';
break;
}
break;
/* Regular characters will just get copied */ /*
* Macro args were already handled by peek, so '\@',
* '\#', and '\0'-'\9' should not occur here.
*/
default:
error("Illegal character escape '%s'\n", print(c));
break;
} }
if (i < sizeof(yylval.tzString)) /* Copy one extra to flag overflow */ /* fallthrough */
yylval.tzString[i++] = c;
default: /* Regular characters will just get copied */
append_yylval_tzString(c);
shiftChars(1); shiftChars(1);
break;
}
} }
} }
#undef append_yylval_tzString
/* /*
* This function uses the fact that `if`, etc. constructs are only valid when * This function uses the fact that `if`, etc. constructs are only valid when
* there's nothing before them on their lines. This enables filtering * there's nothing before them on their lines. This enables filtering

View File

@@ -235,7 +235,6 @@ There are a number of escape sequences you can use within a string:
.It Sy String Ta Sy Meaning .It Sy String Ta Sy Meaning
.It Ql \[rs]\[rs] Ta Produces a backslash .It Ql \[rs]\[rs] Ta Produces a backslash
.It Ql \[rs]" Ta Produces a double quote without terminating .It Ql \[rs]" Ta Produces a double quote without terminating
.It Ql \[rs], Ta Comma
.It Ql \[rs]{ Ta Curly bracket left .It Ql \[rs]{ Ta Curly bracket left
.It Ql \[rs]} Ta Curly bracket right .It Ql \[rs]} Ta Curly bracket right
.It Ql \[rs]n Ta Newline ($0A) .It Ql \[rs]n Ta Newline ($0A)
@@ -1088,6 +1087,10 @@ definition
ENDM ENDM
.Ed .Ed
.El .El
.Pp
Macro arguments support all the escape sequences of strings, as well as
.Ql \[rs],
to escape commas, since those otherwise separate arguments.
.Ss Exporting and importing symbols .Ss Exporting and importing symbols
Importing and exporting of symbols is a feature that is very useful when your project spans many source files and, for example, you need to jump to a routine defined in another file. Importing and exporting of symbols is a feature that is very useful when your project spans many source files and, for example, you need to jump to a routine defined in another file.
.Pp .Pp
@@ -1462,16 +1465,13 @@ PrintMacro: MACRO
ENDM ENDM
PrintMacro STRCAT("Hello "\[rs], \[rs] PrintMacro STRCAT("Hello "\[rs], \[rs]
"world\[rs]\[rs]n") "world\[rs]n")
.Ed .Ed
.Pp .Pp
The comma needs to be escaped to avoid it being treated as separating the macro's arguments. The comma needs to be escaped to avoid it being treated as separating the macro's arguments.
The backslash The backslash in
.Sq \[rs] .Ql \[rs]n
.Pq from Sq \[rs]n does not need to be escaped because string literals also work as usual inside macro arguments.
also needs to be escaped because of the way
.Nm
processes macro arguments.
.Pp .Pp
In reality, up to 256 arguments can be passed to a macro, but you can only use the first 9 like this. In reality, up to 256 arguments can be passed to a macro, but you can only use the first 9 like this.
If you want to use the rest, you need to use the If you want to use the rest, you need to use the

View File

@@ -1,9 +1,12 @@
print1: MACRO print1: MACRO
if _NARG == 2
assert !STRCMP("\1", \2)
endc
PRINTLN "\1" PRINTLN "\1"
ENDM ENDM
print1 John "Danger" Smith print1 John "Danger" Smith
print1 \\A\nB print1 \\\\A\\nB\n, "\\\\A\\nB\n"
print1 C\ print1 C\
D D
print1 E\!F ; illegal character escape print1 E\!F ; illegal character escape
@@ -15,3 +18,10 @@ ENDM
s EQUS "hello" s EQUS "hello"
iprint s iprint s
symprint: MACRO
PRINTLN {\1}
ENDM
hello EQUS "\"goodbye\""
symprint s

View File

@@ -1,3 +1,3 @@
ERROR: macro-arg-in-string.asm(9) -> macro-arg-in-string.asm::print1(2): ERROR: macro-arg-in-string.asm(12):
Illegal character escape '!' Illegal character escape '!'
error: Assembly aborted (1 errors)! error: Assembly aborted (1 errors)!

View File

@@ -1,6 +1,7 @@
John "Danger" Smith John "Danger" Smith
\A \\A\nB
B
CD CD
E\F E!F
hello hello
goodbye

View File

@@ -21,7 +21,8 @@ printarg: MACRO
ENDM ENDM
printarg " printarg "
printarg """ printarg """multi-line
string argument"""
EMPTY1 EQUS "" EMPTY1 EQUS ""
EMPTY2 EQUS "\ ; comment EMPTY2 EQUS "\ ; comment

View File

@@ -1,2 +1,5 @@
warning: multi-line-strings.asm(34): [-Wuser] ERROR: multi-line-strings.asm(23):
Unterminated string
warning: multi-line-strings.asm(35): [-Wuser]
check the line number check the line number
error: Assembly aborted (1 errors)!

View File

@@ -8,6 +8,8 @@ The multi-line string can contain:
! !
arg <"> arg <">
arg (") arg (")
arg <"""> arg <"""multi-line
arg (""") string argument""">
arg ("""multi-line
string argument""")
() ()

14
test/asm/quine.asm Normal file
View File

@@ -0,0 +1,14 @@
R:MACRO
REPT _NARG
PRINT STRSUB("\n\"\\ ENRST1ABCDFGHIMOPU_n#()+,:>",\1+1,1)
SHIFT
ENDR
ENDM
N:MACRO
R \#
REPT _NARG
PRINT"\1",STRSUB("\n,",(_NARG>1)+1,1)
SHIFT
ENDR
ENDM
N 6,29,18,10,12,6,19,0,6,4,20,8,3,22,5,10,6,15,0,20,6,17,5,8,3,7,8,6,7,21,11,25,1,2,23,2,1,2,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,1,28,2,9,27,9,28,9,26,0,7,16,17,14,8,0,4,5,13,6,0,4,5,13,18,0,5,29,18,10,12,6,19,0,3,6,3,2,24,0,6,4,20,8,3,22,5,10,6,15,0,20,6,17,5,8,1,2,9,1,28,7,8,6,7,21,11,25,1,2,23,28,1,28,25,22,5,10,6,15,30,9,26,27,9,28,9,26,0,7,16,17,14,8,0,4,5,13,6,0,4,5,13,18,0,3,5,3

0
test/asm/quine.err Normal file
View File

14
test/asm/quine.out Normal file
View File

@@ -0,0 +1,14 @@
R:MACRO
REPT _NARG
PRINT STRSUB("\n\"\\ ENRST1ABCDFGHIMOPU_n#()+,:>",\1+1,1)
SHIFT
ENDR
ENDM
N:MACRO
R \#
REPT _NARG
PRINT"\1",STRSUB("\n,",(_NARG>1)+1,1)
SHIFT
ENDR
ENDM
N 6,29,18,10,12,6,19,0,6,4,20,8,3,22,5,10,6,15,0,20,6,17,5,8,3,7,8,6,7,21,11,25,1,2,23,2,1,2,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,1,28,2,9,27,9,28,9,26,0,7,16,17,14,8,0,4,5,13,6,0,4,5,13,18,0,5,29,18,10,12,6,19,0,3,6,3,2,24,0,6,4,20,8,3,22,5,10,6,15,0,20,6,17,5,8,1,2,9,1,28,7,8,6,7,21,11,25,1,2,23,28,1,28,25,22,5,10,6,15,30,9,26,27,9,28,9,26,0,7,16,17,14,8,0,4,5,13,6,0,4,5,13,18,0,3,5,3

4
test/asm/quine2.asm Normal file
View File

@@ -0,0 +1,4 @@
q: macro
println \1,"\1"
endm
q "q: macro\n\tprintln \\1,\"\\1\"\nendm\n\tq "

0
test/asm/quine2.err Normal file
View File

4
test/asm/quine2.out Normal file
View File

@@ -0,0 +1,4 @@
q: macro
println \1,"\1"
endm
q "q: macro\n\tprintln \\1,\"\\1\"\nendm\n\tq "

View File

@@ -0,0 +1,38 @@
printargs: MACRO
rept _NARG
println \1
shift
endr
ENDM
printlit: MACRO
rept _NARG
println "\1"
shift
endr
ENDM
NUM EQU 42
STR EQUS "str\"ing"
printargs NUM
printargs "{d:NUM}"
printargs "{STR}", 16 ; comment 1
printargs "\"literal \\\"\\\\\\\"\""
printargs "literal \"\\\"", \ ; comment 2
"""multi-"line"
""string"" arg"""
printargs MUL(2.0\, 3.0)
printargs "unclosed
printlit NUM
printlit "{d:NUM}"
printlit "{STR}", 16 ; comment 3
printlit "\"literal \\\"\\\\\\\"\""
printlit "literal \"\\\"", \ ; comment 4
"""multi-"line"
""string"" arg"""
printlit MUL(2.0\, 3.0)
printlit this\n is\, \{not\} a\\n syntax\" error
printlit "unclosed
printlit """EOF

View File

@@ -0,0 +1,9 @@
ERROR: raw-macro-args.asm(26):
Unterminated string
ERROR: raw-macro-args.asm(26) -> raw-macro-args.asm::printargs(2) -> raw-macro-args.asm::printargs::REPT~1(3):
Unterminated string
ERROR: raw-macro-args.asm(37):
Unterminated string
ERROR: raw-macro-args.asm(38):
Unterminated string
error: Assembly aborted (4 errors)!

View File

@@ -0,0 +1,23 @@
$2A
42
str"ing
$10
"literal \"\\\""
literal "\"
multi-"line"
""string"" arg
$60000
unclosed
NUM
"42"
"str\"ing"
16
"\"literal \\\"\\\\\\\"\""
"literal \"\\\""
"""multi-"line"
""string"" arg"""
MUL(2.0, 3.0)
this
is, {not} a\n syntax" error
"unclosed
"""EOF