merge branch 'maint'

* upstream/maint:
  maint: post-release administrivia
  version 3.5.3
  news: update for 3.5.3
  yacc.c: make sure we properly propagated the user's number for error
  diagnostics: don't crash because of repeated definitions of error
  style: initialize some struct members
  diagnostics: beware of zero-width characters
  diagnostics: be sure to close the styling when lines are too short
  muscles: fix incorrect decoding of $
  code: be robust to reference with invalid tags
  build: fix typo
  doc: update recommandation for libtextstyle
  style: comment changes
  examples: use consistently the GFDL header for readmes
  style: remove useless declarations
  typo: succesful -> successful
  README: point to tests/bison, and document --trace
  gnulib: update
  maint: post-release administrivia
This commit is contained in:
Akim Demaille
2020-03-08 09:52:13 +01:00
13 changed files with 188 additions and 39 deletions

View File

@@ -1 +1 @@
3.5.1
3.5.3

10
NEWS
View File

@@ -110,6 +110,16 @@ GNU Bison NEWS
tracking, internationalized custom error messages, lookahead-correction,
rich debug traces, etc.
* Noteworthy changes in release 3.5.3 (2020-03-08) [stable]
** Bug fixes
Error messages could quote lines containing zero-width characters (such as
\005) with incorrect styling. Fixes for similar issues with unexpectedly
short lines (e.g., the file was changed between parsing and diagnosing).
Several unlikely crashes found by fuzzing have been fixed.
* Noteworthy changes in release 3.5.2 (2020-02-13) [stable]
** Bug fixes

3
THANKS
View File

@@ -4,8 +4,9 @@ it is today without the invaluable help of these people:
Aaro Koskinen aaro.koskinen@iki.fi
Аскар Сафин safinaskar@mail.ru
Adam Sampson ats@offog.org
Ahcheong Lee dkcjd2000@gmail.com
Airy Andre Airy.Andre@edf.fr
Akim Demaille akim@lrde.epita.fr
Akim Demaille akim@gnu.org
Albert Chin-A-Young china@thewrittenword.com
Alexander Belopolsky alexb@rentec.com
Alexandre Duret-Lutz adl@lrde.epita.fr

9
TODO
View File

@@ -305,13 +305,8 @@ It would be a very nice source of inspiration for the other languages.
Valentin Tolmer is working on this.
** YYERRCODE
Defined to 256, but not used, not documented. Probably the token
number for the error token, which POSIX wants to be 256, but which
Bison might renumber if the user used number 256. Keep fix and doc?
Throw away?
Also, why don't we output the token name of the error token in the
output? It is explicitly skipped:
Why don't we output the token name of the error token in the output? It is
explicitly skipped:
/* Skip error token and tokens without identifier. */
if (sym != errtoken && id)

View File

@@ -739,7 +739,7 @@ static const ]b4_int_type_for([b4_toknum])[ yytoknum[] =
/* Error symbol internal number. */
#define YYTERROR 1
/* Error token external number. */
#define YYERRCODE 256
#define YYERRCODE ]b4_symbol(1, user_number)[
]b4_locations_if([[
]b4_yylloc_default_define[

View File

@@ -175,6 +175,8 @@ location_print (location loc, FILE *out)
}
else
{
aver (loc.start.file);
aver (loc.end.file);
int end_col = 0 != loc.end.column ? loc.end.column - 1 : 0;
res += fprintf (out, "%s",
quotearg_n_style (3, escape_quoting_style, loc.start.file));
@@ -317,7 +319,7 @@ caret_getc_internal (mbchar_t *res)
/* Move CARET_INFO (which has a valid FILE) to the line number LINE.
Compute and cache that line's length in CARET_INFO.LINE_LEN.
Return whether successful.*/
Return whether successful. */
static bool
caret_set_line (int line)
{
@@ -423,12 +425,14 @@ location_caret (location loc, const char *style, FILE *out)
{
/* The last column to highlight. Only the first line of
multiline locations are quoted, in which case the ending
column is the end of line. Single point locations (with
equal boundaries) denote the character that they
follow. */
int col_end
column is the end of line.
We used to work with byte offsets, and that was much
easier. However, we went back to using (visual) columns to
support truncating of long lines. */
const int col_end
= loc.start.line == loc.end.line
? loc.end.column + (loc.start.column == loc.end.column)
? loc.end.column
: caret_info.line_len;
/* Quote the file (at most the first line in the case of
multiline locations). */
@@ -438,24 +442,28 @@ location_caret (location loc, const char *style, FILE *out)
expected (maybe the file was changed since the scanner
ran), we might reach the end before we actually saw the
opening column. */
bool opened = false;
enum { before, inside, after } state = before;
while (!mb_iseof (c) && !mb_iseq (c, '\n'))
{
if (caret_info.pos.column == loc.start.column)
// We might have already opened (and even closed!) the
// style and yet have the equality of the columns if we
// just saw zero-width characters.
if (state == before
&& caret_info.pos.column == loc.start.column)
{
begin_use_class (style, out);
opened = true;
state = inside;
}
if (skip < caret_info.pos.column)
mb_putc (c, out);
boundary_compute (&caret_info.pos, mb_ptr (c), mb_len (c));
caret_getc (c);
if (opened
if (state == inside
&& (caret_info.pos.column == col_end
|| width < caret_info.pos.column - skip))
{
end_use_class (style, out);
opened = false;
state = after;
}
if (width < caret_info.pos.column - skip)
{
@@ -463,6 +471,12 @@ location_caret (location loc, const char *style, FILE *out)
break;
}
}
if (state == inside)
{
// The line is shorter than expected.
end_use_class (style, out);
state = after;
}
putc ('\n', out);
}

View File

@@ -42,16 +42,14 @@ typedef struct
/* If positive, the column (starting at 1) just after the boundary.
This is neither a byte count, nor a character count; it is a
column count. If this is INT_MAX, the column number has
(visual) column count. If this is INT_MAX, the column number has
overflowed.
Meaningless and not displayed if nonpositive.
*/
Meaningless and not displayed if nonpositive. */
int column;
/* If nonnegative, the byte number (starting at 0) in the current line.
Never displayed, used when printing error messages with colors to
know where colors start and end. */
/* If nonnegative, the byte number (starting at 0) in the current
line. Not displayed (unless --trace=location). */
int byte;
} boundary;

View File

@@ -292,7 +292,6 @@ muscle_location_grow (char const *key, location loc)
#define COMMON_DECODE(Value) \
case '$': \
++(Value); aver (*(Value) == '['); \
++(Value); aver (*(Value) == ']'); \
++(Value); aver (*(Value) == '['); \
obstack_sgrow (&muscle_obstack, "$"); \

View File

@@ -81,7 +81,7 @@ static bool untyped_var_seen;
historically almost any character is allowed in a tag. We disallow
NUL and newline, as this simplifies our implementation. We allow
"->" as a means to dereference a pointer. */
tag ([^\0\n>]|->)+
tag ([^\0\n>]|->)*[^-]
/* Zero or more instances of backslash-newline. Following GCC, allow
white space between the backslash and the newline. */

View File

@@ -77,10 +77,12 @@ sym_content_new (symbol *s)
res->symbol = s;
res->type_name = NULL;
res->type_loc = empty_loc;
for (int i = 0; i < CODE_PROPS_SIZE; ++i)
code_props_none_init (&res->props[i]);
res->number = NUMBER_UNDEFINED;
res->prec_loc = empty_loc;
res->prec = 0;
res->assoc = undef_assoc;
res->user_token_number = USER_NUMBER_UNDEFINED;
@@ -539,9 +541,12 @@ symbol_class_set (symbol *sym, symbol_class class, location loc, bool declaring)
_("previous declaration"));
}
else
{
sym->location = loc;
s->status = declared;
}
}
}
}

View File

@@ -209,8 +209,8 @@ typedef size_t uintptr_t;
/* Output Str both quoted for M4 (i.e., embed in [[...]]), and escaped
for our postprocessing (i.e., escape M4 special characters). If
Str is empty (or NULL), output "[]" instead of "[[]]" as it make M4
programming easier (m4_ifval can be used).
Str is empty (or NULL), output "[]" instead of "[[]]" as it makes
M4 programming easier (m4_ifval can be used).
For instance "[foo]" -> "[[@{foo@}]]", "$$" -> "[[$][$][]]". */

View File

@@ -37,15 +37,15 @@ AT_BISON_OPTION_PUSHDEFS
AT_DATA_GRAMMAR([[input.y]], [$2])
AT_DATA([experr], [$4])
# For some reason, literal ^M in the input are removed and don't end
# in `input.y`. So use the two-character ^M represent it, and let
# Perl insert real CR characters.
if grep '\^M' input.y >/dev/null; then
AT_PERL_REQUIRE([-pi -e 's{\^M}{\r}gx' input.y])
if $EGREP ['\^M|\\[0-9][0-9][0-9]'] input.y experr >/dev/null; then
AT_PERL_REQUIRE([-pi -e 's{\^M}{\r}g;s{\\(\d{3}|.)}{$v = $[]1; $v =~ /\A\d+\z/ ? chr($v) : $v}ge' input.y experr])
fi
AT_DATA([experr], [$4])
AT_CHECK([LC_ALL="$locale" $5 bison -fcaret --color=debug -Wall input.y], [$3], [], [experr])
# When no style, same messages, but without style.
@@ -152,6 +152,65 @@ input.y: <warning>warning:</warning> fix-its can be applied. Rerun with option
]])
## ------------------------------------- ##
## Line is too short, and then you die. ##
## ------------------------------------- ##
# We trust the "#line", since that's what allows us to quote the
# actual source from which the gramar file was generated. But #line
# can also be wrong, and point to a line which is shorter that the bad
# one. In which case we can easily forget to close the styling.
#
# Be sure to have #line point to a line long enough to open the
# styling, but not enough to close it.
AT_TEST([[Line is too short, and then you die]],
[[// Beware that there are 9 lines inserted before (including this one).
#line 12
%token foo 123
%token foo 123123
%token foo 123
%%
exp:
]],
[1],
[[input.y:13.8-10: <warning>warning:</warning> symbol foo redeclared [<warning>-Wother</warning>]
13 | %token <warning>foo</warning> 123
| <warning>^~~</warning>
input.y:12.8-10: <note>note:</note> previous declaration
12 | %token <note>foo</note> 123123
| <note>^~~</note>
input.y:13.12-17: <error>error:</error> redefining user token number of foo
13 | %token foo <error>123</error>
| <error>^~~~~~</error>
input.y:14.8-10: <warning>warning:</warning> symbol foo redeclared [<warning>-Wother</warning>]
14 | %%
| <warning>^~~</warning>
input.y:12.8-10: <note>note:</note> previous declaration
12 | %token <note>foo</note> 123123
| <note>^~~</note>
]])
## ----------------------- ##
## Zero-width characters. ##
## ----------------------- ##
# We used to open twice the styling for characters that have a
# zero-width on display (e.g., \005).
AT_TEST([[Zero-width characters]],
[[%%
exp: an\005error.
]],
[1],
[[input.y:10.8: <error>error:</error> invalid character: '\\005'
10 | exp: an<error>\005</error>error.
| <error>^</error>
]])
## -------------------------------------- ##
## Tabulations and multibyte characters. ##
## -------------------------------------- ##
@@ -262,7 +321,7 @@ input.y:10.1-27: <error>error:</error> %define variable 'error2' is not used
## ----------------- ##
# Carriage-return used to count as a newline in the scanner, and not
# in diagnostics. Resulting in all sort of nice bugs.
# in diagnostics. Resulting in all kinds of nice bugs.
AT_TEST([[Carriage return]],
[[^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M

View File

@@ -298,6 +298,52 @@ input.y:8.14: error: unexpected integer literal
AT_CLEANUP
## ---------------------------- ##
## Redefining the error token. ##
## ---------------------------- ##
AT_SETUP([Redefining the error token])
# We used to crash when trying to display the original definition of
# "error", which is a builtin without any location.
AT_DATA([input.y],
[[%token error 123
%token error 124
%%
exp:
]])
AT_BISON_CHECK([-fcaret input.y], [1], [],
[[input.y:2.8-12: warning: symbol error redeclared [-Wother]
2 | %token error 124
| ^~~~~
input.y:1.8-12: note: previous declaration
1 | %token error 123
| ^~~~~
input.y:2.14-16: error: redefining user token number of error
2 | %token error 124
| ^~~
]])
# While at it, make sure we properly used the user's number for
# "error".
AT_DATA([input.y],
[[%token error 123
%%
exp:
]])
AT_BISON_CHECK([input.y])
AT_CHECK([$EGREP -E '123|256' input.tab.c], [],
[[ 0, 123, 257
#define YYERRCODE 123
]])
AT_CLEANUP
## ------------------ ##
## Dangling aliases. ##
## ------------------ ##
@@ -2069,6 +2115,25 @@ input.y:1.1-34: note: accepted value: 'consistent'
input.y:1.1-34: note: accepted value: 'accepting'
]])
# Check escapes.
AT_DATA([[input.y]],
[[%define lr.default-reduction {[$@]}
%%
start: %empty;
]])
AT_BISON_CHECK([[-fcaret input.y]], [[1]], [[]],
[[input.y:1.1-35: warning: %define variable 'lr.default-reduction' requires keyword values [-Wdeprecated]
1 | %define lr.default-reduction {[$@]}
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
input.y:1.1-35: error: invalid value for %define variable 'lr.default-reduction': '[$@]'
1 | %define lr.default-reduction {[$@]}
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
input.y:1.1-35: note: accepted value: 'most'
input.y:1.1-35: note: accepted value: 'consistent'
input.y:1.1-35: note: accepted value: 'accepting'
]])
# Back-end.
AT_DATA([[input.y]],
[[%define api.push-pull neither
@@ -2548,7 +2613,9 @@ AT_DATA_GRAMMAR([[input.y]],
%printer { $%; @%; } <*> exp TOK;
%{ $ @ %} // Should not warn.
%%
exp: TOK { $%; @%; $$ = $1; };
exp: TOK { $%; @%; $$ = $1; }
| 'a' { $<->1; $$ = 1; }
| 'b' { $<foo->bar>$; }
%%
$ @ // Should not warn.
]])
@@ -2562,6 +2629,7 @@ input.y:13.19: warning: stray '$' [-Wother]
input.y:13.23: warning: stray '@' [-Wother]
input.y:16.19: warning: stray '$' [-Wother]
input.y:16.23: warning: stray '@' [-Wother]
input.y:17.19: warning: stray '$' [-Wother]
]])
AT_BISON_OPTION_POPDEFS