merge branch 'maint'

* upstream/maint: maint: post-release administrivia version 3.5.3 news: update for 3.5.3 yacc.c: make sure we properly propagated the user's number for error diagnostics: don't crash because of repeated definitions of error style: initialize some struct members diagnostics: beware of zero-width characters diagnostics: be sure to close the styling when lines are too short muscles: fix incorrect decoding of $ code: be robust to reference with invalid tags build: fix typo doc: update recommandation for libtextstyle style: comment changes examples: use consistently the GFDL header for readmes style: remove useless declarations typo: succesful -> successful README: point to tests/bison, and document --trace gnulib: update maint: post-release administrivia
2026-04-24 18:52:21 +00:00 · 2020-03-08 09:52:13 +01:00
parent ecd922024e 15ea35019f
commit 951da960e6
13 changed files with 188 additions and 39 deletions
@@ -1 +1 @@
-3.5.1
+3.5.3
@@ -110,6 +110,16 @@ GNU Bison NEWS
  tracking, internationalized custom error messages, lookahead-correction,
  rich debug traces, etc.

+* Noteworthy changes in release 3.5.3 (2020-03-08) [stable]
+
+** Bug fixes
+
+  Error messages could quote lines containing zero-width characters (such as
+  \005) with incorrect styling.  Fixes for similar issues with unexpectedly
+  short lines (e.g., the file was changed between parsing and diagnosing).
+
+  Several unlikely crashes found by fuzzing have been fixed.
+
 * Noteworthy changes in release 3.5.2 (2020-02-13) [stable]

 ** Bug fixes
@@ -4,8 +4,9 @@ it is today without the invaluable help of these people:
 Aaro Koskinen             aaro.koskinen@iki.fi
 Аскар Сафин               safinaskar@mail.ru
 Adam Sampson              ats@offog.org
+Ahcheong Lee              dkcjd2000@gmail.com
 Airy Andre                Airy.Andre@edf.fr
-Akim Demaille             akim@lrde.epita.fr
+Akim Demaille             akim@gnu.org
 Albert Chin-A-Young       china@thewrittenword.com
 Alexander Belopolsky      alexb@rentec.com
 Alexandre Duret-Lutz      adl@lrde.epita.fr
@@ -305,13 +305,8 @@ It would be a very nice source of inspiration for the other languages.
 Valentin Tolmer is working on this.

 ** YYERRCODE
-Defined to 256, but not used, not documented.  Probably the token
-number for the error token, which POSIX wants to be 256, but which
-Bison might renumber if the user used number 256.  Keep fix and doc?
-Throw away?
-
-Also, why don't we output the token name of the error token in the
-output?  It is explicitly skipped:
+Why don't we output the token name of the error token in the output?  It is
+explicitly skipped:

      /* Skip error token and tokens without identifier.  */
      if (sym != errtoken && id)
@@ -739,7 +739,7 @@ static const ]b4_int_type_for([b4_toknum])[ yytoknum[] =
 /* Error symbol internal number. */
 #define YYTERROR        1
 /* Error token external number. */
-#define YYERRCODE       256
+#define YYERRCODE       ]b4_symbol(1, user_number)[

 ]b4_locations_if([[
 ]b4_yylloc_default_define[
@@ -175,6 +175,8 @@ location_print (location loc, FILE *out)
    }
  else
    {
+      aver (loc.start.file);
+      aver (loc.end.file);
      int end_col = 0 != loc.end.column ? loc.end.column - 1 : 0;
      res += fprintf (out, "%s",
                      quotearg_n_style (3, escape_quoting_style, loc.start.file));
@@ -317,7 +319,7 @@ caret_getc_internal (mbchar_t *res)

 /* Move CARET_INFO (which has a valid FILE) to the line number LINE.
   Compute and cache that line's length in CARET_INFO.LINE_LEN.
-   Return whether successful.*/
+   Return whether successful.  */
 static bool
 caret_set_line (int line)
 {
@@ -423,12 +425,14 @@ location_caret (location loc, const char *style, FILE *out)
      {
        /* The last column to highlight.  Only the first line of
           multiline locations are quoted, in which case the ending
-           column is the end of line.  Single point locations (with
-           equal boundaries) denote the character that they
-           follow.  */
-        int col_end
+           column is the end of line.
+
+           We used to work with byte offsets, and that was much
+           easier.  However, we went back to using (visual) columns to
+           support truncating of long lines.  */
+        const int col_end
          = loc.start.line == loc.end.line
-          ? loc.end.column + (loc.start.column == loc.end.column)
+          ? loc.end.column
          : caret_info.line_len;
        /* Quote the file (at most the first line in the case of
           multiline locations).  */
@@ -438,24 +442,28 @@ location_caret (location loc, const char *style, FILE *out)
             expected (maybe the file was changed since the scanner
             ran), we might reach the end before we actually saw the
             opening column.  */
-          bool opened = false;
+          enum { before, inside, after } state = before;
          while (!mb_iseof (c) && !mb_iseq (c, '\n'))
            {
-              if (caret_info.pos.column == loc.start.column)
+              // We might have already opened (and even closed!) the
+              // style and yet have the equality of the columns if we
+              // just saw zero-width characters.
+              if (state == before
+                  && caret_info.pos.column == loc.start.column)
                {
                  begin_use_class (style, out);
-                  opened = true;
+                  state = inside;
                }
              if (skip < caret_info.pos.column)
                mb_putc (c, out);
              boundary_compute (&caret_info.pos, mb_ptr (c), mb_len (c));
              caret_getc (c);
-              if (opened
+              if (state == inside
                  && (caret_info.pos.column == col_end
                      || width < caret_info.pos.column - skip))
                {
                  end_use_class (style, out);
-                  opened = false;
+                  state = after;
                }
              if (width < caret_info.pos.column - skip)
                {
@@ -463,6 +471,12 @@ location_caret (location loc, const char *style, FILE *out)
                  break;
                }
            }
+          if (state == inside)
+            {
+              // The line is shorter than expected.
+              end_use_class (style, out);
+              state = after;
+            }
          putc ('\n', out);
        }

@@ -42,16 +42,14 @@ typedef struct

  /* If positive, the column (starting at 1) just after the boundary.
     This is neither a byte count, nor a character count; it is a
-     column count.  If this is INT_MAX, the column number has
+     (visual) column count.  If this is INT_MAX, the column number has
     overflowed.

-     Meaningless and not displayed if nonpositive.
-  */
+     Meaningless and not displayed if nonpositive.  */
  int column;

-  /* If nonnegative, the byte number (starting at 0) in the current line.
-     Never displayed, used when printing error messages with colors to
-     know where colors start and end.  */
+  /* If nonnegative, the byte number (starting at 0) in the current
+     line.  Not displayed (unless --trace=location).  */
  int byte;

 } boundary;
@@ -292,7 +292,6 @@ muscle_location_grow (char const *key, location loc)

 #define COMMON_DECODE(Value)                                    \
  case '$':                                                     \
-    ++(Value); aver (*(Value) == '[');                          \
    ++(Value); aver (*(Value) == ']');                          \
    ++(Value); aver (*(Value) == '[');                          \
    obstack_sgrow (&muscle_obstack, "$");                       \
@@ -81,7 +81,7 @@ static bool untyped_var_seen;
   historically almost any character is allowed in a tag.  We disallow
   NUL and newline, as this simplifies our implementation.  We allow
   "->" as a means to dereference a pointer.  */
-tag      ([^\0\n>]|->)+
+tag      ([^\0\n>]|->)*[^-]

 /* Zero or more instances of backslash-newline.  Following GCC, allow
   white space between the backslash and the newline.  */
@@ -77,10 +77,12 @@ sym_content_new (symbol *s)
  res->symbol = s;

  res->type_name = NULL;
+  res->type_loc = empty_loc;
  for (int i = 0; i < CODE_PROPS_SIZE; ++i)
    code_props_none_init (&res->props[i]);

  res->number = NUMBER_UNDEFINED;
+  res->prec_loc = empty_loc;
  res->prec = 0;
  res->assoc = undef_assoc;
  res->user_token_number = USER_NUMBER_UNDEFINED;
@@ -539,9 +541,12 @@ symbol_class_set (symbol *sym, symbol_class class, location loc, bool declaring)
                           _("previous declaration"));
            }
          else
+            {
+              sym->location = loc;
              s->status = declared;
            }
        }
+    }
 }


@@ -209,8 +209,8 @@ typedef size_t uintptr_t;

 /* Output Str both quoted for M4 (i.e., embed in [[...]]), and escaped
   for our postprocessing (i.e., escape M4 special characters).  If
-   Str is empty (or NULL), output "[]" instead of "[[]]" as it make M4
-   programming easier (m4_ifval can be used).
+   Str is empty (or NULL), output "[]" instead of "[[]]" as it makes
+   M4 programming easier (m4_ifval can be used).

   For instance "[foo]" -> "[[@{foo@}]]", "$$" -> "[[$][$][]]".  */

@@ -37,15 +37,15 @@ AT_BISON_OPTION_PUSHDEFS

 AT_DATA_GRAMMAR([[input.y]], [$2])

+AT_DATA([experr], [$4])
+
 # For some reason, literal ^M in the input are removed and don't end
 # in `input.y`.  So use the two-character ^M represent it, and let
 # Perl insert real CR characters.
-if grep '\^M' input.y >/dev/null; then
-  AT_PERL_REQUIRE([-pi -e 's{\^M}{\r}gx' input.y])
+if $EGREP ['\^M|\\[0-9][0-9][0-9]'] input.y experr >/dev/null; then
+  AT_PERL_REQUIRE([-pi -e 's{\^M}{\r}g;s{\\(\d{3}|.)}{$v = $[]1; $v =~ /\A\d+\z/ ? chr($v) : $v}ge' input.y experr])
 fi

-AT_DATA([experr], [$4])
-
 AT_CHECK([LC_ALL="$locale" $5 bison -fcaret --color=debug -Wall input.y], [$3], [], [experr])

 # When no style, same messages, but without style.
@@ -152,6 +152,65 @@ input.y: <warning>warning:</warning> fix-its can be applied.  Rerun with option
 ]])


+
+## ------------------------------------- ##
+## Line is too short, and then you die.  ##
+## ------------------------------------- ##
+
+# We trust the "#line", since that's what allows us to quote the
+# actual source from which the gramar file was generated.  But #line
+# can also be wrong, and point to a line which is shorter that the bad
+# one.  In which case we can easily forget to close the styling.
+#
+# Be sure to have #line point to a line long enough to open the
+# styling, but not enough to close it.
+
+AT_TEST([[Line is too short, and then you die]],
+[[// Beware that there are 9 lines inserted before (including this one).
+#line 12
+%token foo 123
+%token foo 123123
+%token foo 123
+%%
+exp:
+]],
+[1],
+[[input.y:13.8-10: <warning>warning:</warning> symbol foo redeclared [<warning>-Wother</warning>]
+   13 | %token <warning>foo</warning> 123
+      |        <warning>^~~</warning>
+input.y:12.8-10: <note>note:</note> previous declaration
+   12 | %token <note>foo</note> 123123
+      |        <note>^~~</note>
+input.y:13.12-17: <error>error:</error> redefining user token number of foo
+   13 | %token foo <error>123</error>
+      |            <error>^~~~~~</error>
+input.y:14.8-10: <warning>warning:</warning> symbol foo redeclared [<warning>-Wother</warning>]
+   14 | %%
+      |        <warning>^~~</warning>
+input.y:12.8-10: <note>note:</note> previous declaration
+   12 | %token <note>foo</note> 123123
+      |        <note>^~~</note>
+]])
+
+
+## ----------------------- ##
+## Zero-width characters.  ##
+## ----------------------- ##
+
+# We used to open twice the styling for characters that have a
+# zero-width on display (e.g., \005).
+
+AT_TEST([[Zero-width characters]],
+[[%%
+exp: an\005error.
+]],
+[1],
+[[input.y:10.8: <error>error:</error> invalid character: '\\005'
+   10 | exp: an<error>\005</error>error.
+      |        <error>^</error>
+]])
+
+
 ## -------------------------------------- ##
 ## Tabulations and multibyte characters.  ##
 ## -------------------------------------- ##
@@ -262,7 +321,7 @@ input.y:10.1-27: <error>error:</error> %define variable 'error2' is not used
 ## ----------------- ##

 # Carriage-return used to count as a newline in the scanner, and not
-# in diagnostics.  Resulting in all sort of nice bugs.
+# in diagnostics.  Resulting in all kinds of nice bugs.

 AT_TEST([[Carriage return]],
 [[^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M
@@ -298,6 +298,52 @@ input.y:8.14: error: unexpected integer literal
 AT_CLEANUP


+## ---------------------------- ##
+## Redefining the error token.  ##
+## ---------------------------- ##
+
+AT_SETUP([Redefining the error token])
+
+# We used to crash when trying to display the original definition of
+# "error", which is a builtin without any location.
+
+AT_DATA([input.y],
+[[%token error 123
+%token error 124
+%%
+exp:
+]])
+
+AT_BISON_CHECK([-fcaret input.y], [1], [],
+[[input.y:2.8-12: warning: symbol error redeclared [-Wother]
+    2 | %token error 124
+      |        ^~~~~
+input.y:1.8-12: note: previous declaration
+    1 | %token error 123
+      |        ^~~~~
+input.y:2.14-16: error: redefining user token number of error
+    2 | %token error 124
+      |              ^~~
+]])
+
+# While at it, make sure we properly used the user's number for
+# "error".
+AT_DATA([input.y],
+[[%token error 123
+%%
+exp:
+]])
+
+AT_BISON_CHECK([input.y])
+
+AT_CHECK([$EGREP -E '123|256' input.tab.c], [],
+[[       0,   123,   257
+#define YYERRCODE       123
+]])
+
+AT_CLEANUP
+
+
 ## ------------------ ##
 ## Dangling aliases.  ##
 ## ------------------ ##
@@ -2069,6 +2115,25 @@ input.y:1.1-34: note: accepted value: 'consistent'
 input.y:1.1-34: note: accepted value: 'accepting'
 ]])

+# Check escapes.
+AT_DATA([[input.y]],
+[[%define lr.default-reduction {[$@]}
+%%
+start: %empty;
+]])
+AT_BISON_CHECK([[-fcaret input.y]], [[1]], [[]],
+[[input.y:1.1-35: warning: %define variable 'lr.default-reduction' requires keyword values [-Wdeprecated]
+    1 | %define lr.default-reduction {[$@]}
+      | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+input.y:1.1-35: error: invalid value for %define variable 'lr.default-reduction': '[$@]'
+    1 | %define lr.default-reduction {[$@]}
+      | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+input.y:1.1-35: note: accepted value: 'most'
+input.y:1.1-35: note: accepted value: 'consistent'
+input.y:1.1-35: note: accepted value: 'accepting'
+]])
+
+
 # Back-end.
 AT_DATA([[input.y]],
 [[%define api.push-pull neither
@@ -2548,7 +2613,9 @@ AT_DATA_GRAMMAR([[input.y]],
 %printer        { $%; @%; } <*> exp TOK;
 %{ $ @ %} // Should not warn.
 %%
-exp: TOK        { $%; @%; $$ = $1; };
+exp: TOK        { $%; @%; $$ = $1; }
+   | 'a'        { $<->1; $$ = 1; }
+   | 'b'        { $<foo->bar>$; }
 %%
 $ @ // Should not warn.
 ]])
@@ -2562,6 +2629,7 @@ input.y:13.19: warning: stray '$' [-Wother]
 input.y:13.23: warning: stray '@' [-Wother]
 input.y:16.19: warning: stray '$' [-Wother]
 input.y:16.23: warning: stray '@' [-Wother]
+input.y:17.19: warning: stray '$' [-Wother]
 ]])

 AT_BISON_OPTION_POPDEFS
@@ -1 +1 @@
 .5.1
 .5.3