Reject unescaped newlines in strings.

2026-06-13 19:22:12 +00:00 · 2004-05-03 07:42:52 +00:00
parent e23d0dd71e
commit 4febdd9667
2 changed files with 77 additions and 45 deletions
@@ -3,11 +3,19 @@ Bison News
 Changes in version 1.875d:
 * Unescaped newlines are no longer allowed in character constants or
  string literals.  They were never portable, and GCC 3.4.0 has
  dropped support for them.  Better diagnostics are now generated if
  forget a closing quote.
 * NUL bytes are no longer allowed in Bison string literals, unfortunately.
 * %token numbers can now be hexadecimal integers, e.g., `%token FOO 0x12d'.
  This is a GNU extension.
 * A new directive "%expect-rr N" specifies the expected number of
  reduce/reduce conflicts in GLR parsers.
 * Experimental %destructor support has been added to lalr1.cc.
 Changes in version 1.875c, 2003-08-25:
@@ -104,6 +104,7 @@ static void handle_syncline (char *args);
 static unsigned long int scan_integer (char const *p, int base, location loc);
 static int convert_ucn_to_byte (char const *hex_text);
 static void unexpected_eof (boundary, char const *);
 static void unexpected_newline (boundary, char const *);
 %}
 %x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
@@ -157,10 +158,9 @@ splice	 (\\[ \f\t\v]*\n)*
 <INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE>
 {
-  [ \f\n\t\v]  ;
+  /* Comments and white space.  */
  ","	       warn_at (*loc, _("stray `,' treated as white space"));
-
+  [ \f\n\t\v]  |
  /* Comments. */
  "//".*       ;
  "/*" {
    token_start = loc->start;
@@ -352,10 +352,10 @@ splice	 (\\[ \f\t\v]*\n)*
 }
-  /*----------------------------------------------------------------.
+  /*------------------------------------------------.
-  | Scanning a C string, including its escapes.  The initial `"' is |
+  | Scanning a Bison string, including its escapes. |
-  | already eaten.                                                  |
+  | The initial quote is already eaten.             |
-  `----------------------------------------------------------------*/
+  `------------------------------------------------*/
 <SC_ESCAPED_STRING>
 {
@@ -368,16 +368,14 @@ splice	 (\\[ \f\t\v]*\n)*
    BEGIN INITIAL;
    return STRING;
  }
-
+  \n		unexpected_newline (token_start, "\"");	BEGIN INITIAL;
-  \0	    complain_at (*loc, _("invalid null character"));
+  <<EOF>>	unexpected_eof (token_start, "\"");	BEGIN INITIAL;
  .|\n	    STRING_GROW;
  <<EOF>>   unexpected_eof (token_start, "\""); BEGIN INITIAL;
 }
-  /*---------------------------------------------------------------.
+  /*----------------------------------------------------------.
-  | Scanning a C character, decoding its escapes.  The initial "'" |
+  | Scanning a Bison character literal, decoding its escapes. |
-  | is already eaten.                                              |
+  | The initial quote is already eaten.			      |
-  `---------------------------------------------------------------*/
+  `----------------------------------------------------------*/
 <SC_ESCAPED_CHARACTER>
 {
@@ -395,10 +393,13 @@ splice	 (\\[ \f\t\v]*\n)*
    BEGIN INITIAL;
    return ID;
  }
  \n		unexpected_newline (token_start, "'");	BEGIN INITIAL;
  <<EOF>>	unexpected_eof (token_start, "'");	BEGIN INITIAL;
 }
 <SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>
 {
  \0	    complain_at (*loc, _("invalid null character"));
  .|\n	    STRING_GROW;
  <<EOF>>   unexpected_eof (token_start, "'"); BEGIN INITIAL;
 }
@@ -456,33 +457,27 @@ splice	 (\\[ \f\t\v]*\n)*
  }
 }
  /*--------------------------------------------.
  | Scanning user-code characters and strings.  |
  `--------------------------------------------*/
-  /*----------------------------------------------------------.
+<SC_CHARACTER,SC_STRING>
-  | Scanning a C character without decoding its escapes.  The |
+{
-  | initial "'" is already eaten.                             |
+  {splice}|\\{splice}[^\n$@\[\]]	STRING_GROW;
-  `----------------------------------------------------------*/
+}
 <SC_CHARACTER>
 {
-  "'"			STRING_GROW; BEGIN context_state;
+  "'"		STRING_GROW; BEGIN context_state;
-  \\{splice}[^$@\[\]]	STRING_GROW;
+  \n		unexpected_newline (token_start, "'"); BEGIN context_state;
-  <<EOF>>		unexpected_eof (token_start, "'"); BEGIN context_state;
+  <<EOF>>	unexpected_eof (token_start, "'"); BEGIN context_state;
 }
  /*----------------------------------------------------------------.
  | Scanning a C string, without decoding its escapes.  The initial |
  | `"' is already eaten.                                           |
  `----------------------------------------------------------------*/
 <SC_STRING>
 {
-  "\""			STRING_GROW; BEGIN context_state;
+  "\""		STRING_GROW; BEGIN context_state;
-  \\{splice}[^$@\[\]]	STRING_GROW;
+  \n		unexpected_newline (token_start, "\""); BEGIN context_state;
-  <<EOF>> {
+  <<EOF>>	unexpected_eof (token_start, "\""); BEGIN context_state;
    unexpected_eof (token_start, "\"");
    BEGIN context_state;
  }
 }
@@ -641,10 +636,9 @@ splice	 (\\[ \f\t\v]*\n)*
 }
-  /*----------------------------------------------------------------.
+  /*-----------------------------------------.
-  | By default, grow the string obstack with the input, escaping M4 |
+  | Escape M4 quoting characters in C code.  |
-  | quoting characters.						    |
+  `-----------------------------------------*/
  `----------------------------------------------------------------*/
 <SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
 {
@@ -652,10 +646,16 @@ splice	 (\\[ \f\t\v]*\n)*
  \@	obstack_sgrow (&obstack_for_string, "@@");
  \[	obstack_sgrow (&obstack_for_string, "@{");
  \]	obstack_sgrow (&obstack_for_string, "@}");
  .|\n  STRING_GROW;
 }
  /*-----------------------------------------------------.
  | By default, grow the string obstack with the input.  |
  `-----------------------------------------------------*/
 <SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>.	|
 <SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n	STRING_GROW;
 %%
 /* Keeps track of the maximum number of semantic values to the left of
@@ -1009,6 +1009,22 @@ handle_syncline (char *args)
 }
 /*----------------------------------------------------------------.
 | For a token or comment starting at START, report message MSGID, |
 | which should say that an end marker was found before		  |
 | the expected TOKEN_END.					  |
 `----------------------------------------------------------------*/
 static void
 unexpected_end (boundary start, char const *msgid, char const *token_end)
 {
  location loc;
  loc.start = start;
  loc.end = scanner_cursor;
  complain_at (loc, _(msgid), token_end);
 }
 /*------------------------------------------------------------------------.
 | Report an unexpected EOF in a token or comment starting at START.       |
 | An end of file was encountered and the expected TOKEN_END was missing.  |
@@ -1017,10 +1033,18 @@ handle_syncline (char *args)
 static void
 unexpected_eof (boundary start, char const *token_end)
 {
-  location loc;
+  unexpected_end (start, N_("missing `%s' at end of file"), token_end);
-  loc.start = start;
+}
-  loc.end = scanner_cursor;
+
-  complain_at (loc, _("missing `%s' at end of file"), token_end);
+
 /*----------------------------------------.
 | Likewise, but for unexpected newlines.  |
 `----------------------------------------*/
 static void
 unexpected_newline (boundary start, char const *token_end)
 {
  unexpected_end (start, N_("missing `%s' at end of line"), token_end);
 }