Reject unescaped newlines in strings.

This commit is contained in:
Paul Eggert
2004-05-03 07:42:52 +00:00
parent e23d0dd71e
commit 4febdd9667
2 changed files with 77 additions and 45 deletions

8
NEWS
View File

@@ -3,11 +3,19 @@ Bison News
Changes in version 1.875d: Changes in version 1.875d:
* Unescaped newlines are no longer allowed in character constants or
string literals. They were never portable, and GCC 3.4.0 has
dropped support for them. Better diagnostics are now generated if
forget a closing quote.
* NUL bytes are no longer allowed in Bison string literals, unfortunately. * NUL bytes are no longer allowed in Bison string literals, unfortunately.
* %token numbers can now be hexadecimal integers, e.g., `%token FOO 0x12d'. * %token numbers can now be hexadecimal integers, e.g., `%token FOO 0x12d'.
This is a GNU extension. This is a GNU extension.
* A new directive "%expect-rr N" specifies the expected number of
reduce/reduce conflicts in GLR parsers.
* Experimental %destructor support has been added to lalr1.cc. * Experimental %destructor support has been added to lalr1.cc.
Changes in version 1.875c, 2003-08-25: Changes in version 1.875c, 2003-08-25:

View File

@@ -104,6 +104,7 @@ static void handle_syncline (char *args);
static unsigned long int scan_integer (char const *p, int base, location loc); static unsigned long int scan_integer (char const *p, int base, location loc);
static int convert_ucn_to_byte (char const *hex_text); static int convert_ucn_to_byte (char const *hex_text);
static void unexpected_eof (boundary, char const *); static void unexpected_eof (boundary, char const *);
static void unexpected_newline (boundary, char const *);
%} %}
%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT %x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
@@ -157,10 +158,9 @@ splice (\\[ \f\t\v]*\n)*
<INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE> <INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE>
{ {
[ \f\n\t\v] ; /* Comments and white space. */
"," warn_at (*loc, _("stray `,' treated as white space")); "," warn_at (*loc, _("stray `,' treated as white space"));
[ \f\n\t\v] |
/* Comments. */
"//".* ; "//".* ;
"/*" { "/*" {
token_start = loc->start; token_start = loc->start;
@@ -352,10 +352,10 @@ splice (\\[ \f\t\v]*\n)*
} }
/*----------------------------------------------------------------. /*------------------------------------------------.
| Scanning a C string, including its escapes. The initial `"' is | | Scanning a Bison string, including its escapes. |
| already eaten. | | The initial quote is already eaten. |
`----------------------------------------------------------------*/ `------------------------------------------------*/
<SC_ESCAPED_STRING> <SC_ESCAPED_STRING>
{ {
@@ -368,16 +368,14 @@ splice (\\[ \f\t\v]*\n)*
BEGIN INITIAL; BEGIN INITIAL;
return STRING; return STRING;
} }
\n unexpected_newline (token_start, "\""); BEGIN INITIAL;
\0 complain_at (*loc, _("invalid null character")); <<EOF>> unexpected_eof (token_start, "\""); BEGIN INITIAL;
.|\n STRING_GROW;
<<EOF>> unexpected_eof (token_start, "\""); BEGIN INITIAL;
} }
/*---------------------------------------------------------------. /*----------------------------------------------------------.
| Scanning a C character, decoding its escapes. The initial "'" | | Scanning a Bison character literal, decoding its escapes. |
| is already eaten. | | The initial quote is already eaten. |
`---------------------------------------------------------------*/ `----------------------------------------------------------*/
<SC_ESCAPED_CHARACTER> <SC_ESCAPED_CHARACTER>
{ {
@@ -395,10 +393,13 @@ splice (\\[ \f\t\v]*\n)*
BEGIN INITIAL; BEGIN INITIAL;
return ID; return ID;
} }
\n unexpected_newline (token_start, "'"); BEGIN INITIAL;
<<EOF>> unexpected_eof (token_start, "'"); BEGIN INITIAL;
}
<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>
{
\0 complain_at (*loc, _("invalid null character")); \0 complain_at (*loc, _("invalid null character"));
.|\n STRING_GROW;
<<EOF>> unexpected_eof (token_start, "'"); BEGIN INITIAL;
} }
@@ -456,33 +457,27 @@ splice (\\[ \f\t\v]*\n)*
} }
} }
/*--------------------------------------------.
| Scanning user-code characters and strings. |
`--------------------------------------------*/
/*----------------------------------------------------------. <SC_CHARACTER,SC_STRING>
| Scanning a C character without decoding its escapes. The | {
| initial "'" is already eaten. | {splice}|\\{splice}[^\n$@\[\]] STRING_GROW;
`----------------------------------------------------------*/ }
<SC_CHARACTER> <SC_CHARACTER>
{ {
"'" STRING_GROW; BEGIN context_state; "'" STRING_GROW; BEGIN context_state;
\\{splice}[^$@\[\]] STRING_GROW; \n unexpected_newline (token_start, "'"); BEGIN context_state;
<<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state; <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
} }
/*----------------------------------------------------------------.
| Scanning a C string, without decoding its escapes. The initial |
| `"' is already eaten. |
`----------------------------------------------------------------*/
<SC_STRING> <SC_STRING>
{ {
"\"" STRING_GROW; BEGIN context_state; "\"" STRING_GROW; BEGIN context_state;
\\{splice}[^$@\[\]] STRING_GROW; \n unexpected_newline (token_start, "\""); BEGIN context_state;
<<EOF>> { <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
unexpected_eof (token_start, "\"");
BEGIN context_state;
}
} }
@@ -641,10 +636,9 @@ splice (\\[ \f\t\v]*\n)*
} }
/*----------------------------------------------------------------. /*-----------------------------------------.
| By default, grow the string obstack with the input, escaping M4 | | Escape M4 quoting characters in C code. |
| quoting characters. | `-----------------------------------------*/
`----------------------------------------------------------------*/
<SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE> <SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
{ {
@@ -652,10 +646,16 @@ splice (\\[ \f\t\v]*\n)*
\@ obstack_sgrow (&obstack_for_string, "@@"); \@ obstack_sgrow (&obstack_for_string, "@@");
\[ obstack_sgrow (&obstack_for_string, "@{"); \[ obstack_sgrow (&obstack_for_string, "@{");
\] obstack_sgrow (&obstack_for_string, "@}"); \] obstack_sgrow (&obstack_for_string, "@}");
.|\n STRING_GROW;
} }
/*-----------------------------------------------------.
| By default, grow the string obstack with the input. |
`-----------------------------------------------------*/
<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
%% %%
/* Keeps track of the maximum number of semantic values to the left of /* Keeps track of the maximum number of semantic values to the left of
@@ -1009,6 +1009,22 @@ handle_syncline (char *args)
} }
/*----------------------------------------------------------------.
| For a token or comment starting at START, report message MSGID, |
| which should say that an end marker was found before |
| the expected TOKEN_END. |
`----------------------------------------------------------------*/
static void
unexpected_end (boundary start, char const *msgid, char const *token_end)
{
location loc;
loc.start = start;
loc.end = scanner_cursor;
complain_at (loc, _(msgid), token_end);
}
/*------------------------------------------------------------------------. /*------------------------------------------------------------------------.
| Report an unexpected EOF in a token or comment starting at START. | | Report an unexpected EOF in a token or comment starting at START. |
| An end of file was encountered and the expected TOKEN_END was missing. | | An end of file was encountered and the expected TOKEN_END was missing. |
@@ -1017,10 +1033,18 @@ handle_syncline (char *args)
static void static void
unexpected_eof (boundary start, char const *token_end) unexpected_eof (boundary start, char const *token_end)
{ {
location loc; unexpected_end (start, N_("missing `%s' at end of file"), token_end);
loc.start = start; }
loc.end = scanner_cursor;
complain_at (loc, _("missing `%s' at end of file"), token_end);
/*----------------------------------------.
| Likewise, but for unexpected newlines. |
`----------------------------------------*/
static void
unexpected_newline (boundary start, char const *token_end)
{
unexpected_end (start, N_("missing `%s' at end of line"), token_end);
} }