diff --git a/.prev-version b/.prev-version
index d5c0c991..444877d4 100644
--- a/.prev-version
+++ b/.prev-version
@@ -1 +1 @@
-3.5.1
+3.5.3
diff --git a/NEWS b/NEWS
index e9157594..7ee66c07 100644
--- a/NEWS
+++ b/NEWS
@@ -110,6 +110,16 @@ GNU Bison NEWS
tracking, internationalized custom error messages, lookahead-correction,
rich debug traces, etc.
+* Noteworthy changes in release 3.5.3 (2020-03-08) [stable]
+
+** Bug fixes
+
+ Error messages could quote lines containing zero-width characters (such as
+ \005) with incorrect styling. Fixes for similar issues with unexpectedly
+ short lines (e.g., the file was changed between parsing and diagnosing).
+
+ Several unlikely crashes found by fuzzing have been fixed.
+
* Noteworthy changes in release 3.5.2 (2020-02-13) [stable]
** Bug fixes
diff --git a/THANKS b/THANKS
index db54776a..d8ef2c0c 100644
--- a/THANKS
+++ b/THANKS
@@ -4,8 +4,9 @@ it is today without the invaluable help of these people:
Aaro Koskinen aaro.koskinen@iki.fi
Аскар Сафин safinaskar@mail.ru
Adam Sampson ats@offog.org
+Ahcheong Lee dkcjd2000@gmail.com
Airy Andre Airy.Andre@edf.fr
-Akim Demaille akim@lrde.epita.fr
+Akim Demaille akim@gnu.org
Albert Chin-A-Young china@thewrittenword.com
Alexander Belopolsky alexb@rentec.com
Alexandre Duret-Lutz adl@lrde.epita.fr
diff --git a/TODO b/TODO
index 65f9148d..0f42eb1c 100644
--- a/TODO
+++ b/TODO
@@ -305,13 +305,8 @@ It would be a very nice source of inspiration for the other languages.
Valentin Tolmer is working on this.
** YYERRCODE
-Defined to 256, but not used, not documented. Probably the token
-number for the error token, which POSIX wants to be 256, but which
-Bison might renumber if the user used number 256. Keep fix and doc?
-Throw away?
-
-Also, why don't we output the token name of the error token in the
-output? It is explicitly skipped:
+Why don't we output the token name of the error token in the output? It is
+explicitly skipped:
/* Skip error token and tokens without identifier. */
if (sym != errtoken && id)
diff --git a/data/skeletons/yacc.c b/data/skeletons/yacc.c
index 7ef0b218..7f92a200 100644
--- a/data/skeletons/yacc.c
+++ b/data/skeletons/yacc.c
@@ -739,7 +739,7 @@ static const ]b4_int_type_for([b4_toknum])[ yytoknum[] =
/* Error symbol internal number. */
#define YYTERROR 1
/* Error token external number. */
-#define YYERRCODE 256
+#define YYERRCODE ]b4_symbol(1, user_number)[
]b4_locations_if([[
]b4_yylloc_default_define[
diff --git a/src/location.c b/src/location.c
index 4bf3b123..5718144f 100644
--- a/src/location.c
+++ b/src/location.c
@@ -175,6 +175,8 @@ location_print (location loc, FILE *out)
}
else
{
+ aver (loc.start.file);
+ aver (loc.end.file);
int end_col = 0 != loc.end.column ? loc.end.column - 1 : 0;
res += fprintf (out, "%s",
quotearg_n_style (3, escape_quoting_style, loc.start.file));
@@ -317,7 +319,7 @@ caret_getc_internal (mbchar_t *res)
/* Move CARET_INFO (which has a valid FILE) to the line number LINE.
Compute and cache that line's length in CARET_INFO.LINE_LEN.
- Return whether successful.*/
+ Return whether successful. */
static bool
caret_set_line (int line)
{
@@ -423,12 +425,14 @@ location_caret (location loc, const char *style, FILE *out)
{
/* The last column to highlight. Only the first line of
multiline locations are quoted, in which case the ending
- column is the end of line. Single point locations (with
- equal boundaries) denote the character that they
- follow. */
- int col_end
+ column is the end of line.
+
+ We used to work with byte offsets, and that was much
+ easier. However, we went back to using (visual) columns to
+ support truncating of long lines. */
+ const int col_end
= loc.start.line == loc.end.line
- ? loc.end.column + (loc.start.column == loc.end.column)
+ ? loc.end.column
: caret_info.line_len;
/* Quote the file (at most the first line in the case of
multiline locations). */
@@ -438,24 +442,28 @@ location_caret (location loc, const char *style, FILE *out)
expected (maybe the file was changed since the scanner
ran), we might reach the end before we actually saw the
opening column. */
- bool opened = false;
+ enum { before, inside, after } state = before;
while (!mb_iseof (c) && !mb_iseq (c, '\n'))
{
- if (caret_info.pos.column == loc.start.column)
+ // We might have already opened (and even closed!) the
+ // style and yet have the equality of the columns if we
+ // just saw zero-width characters.
+ if (state == before
+ && caret_info.pos.column == loc.start.column)
{
begin_use_class (style, out);
- opened = true;
+ state = inside;
}
if (skip < caret_info.pos.column)
mb_putc (c, out);
boundary_compute (&caret_info.pos, mb_ptr (c), mb_len (c));
caret_getc (c);
- if (opened
+ if (state == inside
&& (caret_info.pos.column == col_end
|| width < caret_info.pos.column - skip))
{
end_use_class (style, out);
- opened = false;
+ state = after;
}
if (width < caret_info.pos.column - skip)
{
@@ -463,6 +471,12 @@ location_caret (location loc, const char *style, FILE *out)
break;
}
}
+ if (state == inside)
+ {
+ // The line is shorter than expected.
+ end_use_class (style, out);
+ state = after;
+ }
putc ('\n', out);
}
diff --git a/src/location.h b/src/location.h
index dcb594fb..d351cb0e 100644
--- a/src/location.h
+++ b/src/location.h
@@ -42,16 +42,14 @@ typedef struct
/* If positive, the column (starting at 1) just after the boundary.
This is neither a byte count, nor a character count; it is a
- column count. If this is INT_MAX, the column number has
+ (visual) column count. If this is INT_MAX, the column number has
overflowed.
- Meaningless and not displayed if nonpositive.
- */
+ Meaningless and not displayed if nonpositive. */
int column;
- /* If nonnegative, the byte number (starting at 0) in the current line.
- Never displayed, used when printing error messages with colors to
- know where colors start and end. */
+ /* If nonnegative, the byte number (starting at 0) in the current
+ line. Not displayed (unless --trace=location). */
int byte;
} boundary;
diff --git a/src/muscle-tab.c b/src/muscle-tab.c
index b5dc0f69..21443f4f 100644
--- a/src/muscle-tab.c
+++ b/src/muscle-tab.c
@@ -292,7 +292,6 @@ muscle_location_grow (char const *key, location loc)
#define COMMON_DECODE(Value) \
case '$': \
- ++(Value); aver (*(Value) == '['); \
++(Value); aver (*(Value) == ']'); \
++(Value); aver (*(Value) == '['); \
obstack_sgrow (&muscle_obstack, "$"); \
diff --git a/src/scan-code.l b/src/scan-code.l
index 20242857..db6d0764 100644
--- a/src/scan-code.l
+++ b/src/scan-code.l
@@ -81,7 +81,7 @@ static bool untyped_var_seen;
historically almost any character is allowed in a tag. We disallow
NUL and newline, as this simplifies our implementation. We allow
"->" as a means to dereference a pointer. */
-tag ([^\0\n>]|->)+
+tag ([^\0\n>]|->)*[^-]
/* Zero or more instances of backslash-newline. Following GCC, allow
white space between the backslash and the newline. */
diff --git a/src/symtab.c b/src/symtab.c
index b6f0a952..a446476c 100644
--- a/src/symtab.c
+++ b/src/symtab.c
@@ -77,10 +77,12 @@ sym_content_new (symbol *s)
res->symbol = s;
res->type_name = NULL;
+ res->type_loc = empty_loc;
for (int i = 0; i < CODE_PROPS_SIZE; ++i)
code_props_none_init (&res->props[i]);
res->number = NUMBER_UNDEFINED;
+ res->prec_loc = empty_loc;
res->prec = 0;
res->assoc = undef_assoc;
res->user_token_number = USER_NUMBER_UNDEFINED;
@@ -539,7 +541,10 @@ symbol_class_set (symbol *sym, symbol_class class, location loc, bool declaring)
_("previous declaration"));
}
else
- s->status = declared;
+ {
+ sym->location = loc;
+ s->status = declared;
+ }
}
}
}
diff --git a/src/system.h b/src/system.h
index b0ffb23a..0210f6c6 100644
--- a/src/system.h
+++ b/src/system.h
@@ -209,10 +209,10 @@ typedef size_t uintptr_t;
/* Output Str both quoted for M4 (i.e., embed in [[...]]), and escaped
for our postprocessing (i.e., escape M4 special characters). If
- Str is empty (or NULL), output "[]" instead of "[[]]" as it make M4
- programming easier (m4_ifval can be used).
+ Str is empty (or NULL), output "[]" instead of "[[]]" as it makes
+ M4 programming easier (m4_ifval can be used).
- For instance "[foo]" -> "[[@{foo@}]]", "$$" -> "[[$][$][]]". */
+ For instance "[foo]" -> "[[@{foo@}]]", "$$" -> "[[$][$][]]". */
# define obstack_quote(Obs, Str) \
do { \
diff --git a/tests/diagnostics.at b/tests/diagnostics.at
index 63d0133b..0724f88a 100644
--- a/tests/diagnostics.at
+++ b/tests/diagnostics.at
@@ -37,15 +37,15 @@ AT_BISON_OPTION_PUSHDEFS
AT_DATA_GRAMMAR([[input.y]], [$2])
+AT_DATA([experr], [$4])
+
# For some reason, literal ^M in the input are removed and don't end
# in `input.y`. So use the two-character ^M represent it, and let
# Perl insert real CR characters.
-if grep '\^M' input.y >/dev/null; then
- AT_PERL_REQUIRE([-pi -e 's{\^M}{\r}gx' input.y])
+if $EGREP ['\^M|\\[0-9][0-9][0-9]'] input.y experr >/dev/null; then
+ AT_PERL_REQUIRE([-pi -e 's{\^M}{\r}g;s{\\(\d{3}|.)}{$v = $[]1; $v =~ /\A\d+\z/ ? chr($v) : $v}ge' input.y experr])
fi
-AT_DATA([experr], [$4])
-
AT_CHECK([LC_ALL="$locale" $5 bison -fcaret --color=debug -Wall input.y], [$3], [], [experr])
# When no style, same messages, but without style.
@@ -152,6 +152,65 @@ input.y: warning: fix-its can be applied. Rerun with option
]])
+
+## ------------------------------------- ##
+## Line is too short, and then you die. ##
+## ------------------------------------- ##
+
+# We trust the "#line", since that's what allows us to quote the
+# actual source from which the gramar file was generated. But #line
+# can also be wrong, and point to a line which is shorter that the bad
+# one. In which case we can easily forget to close the styling.
+#
+# Be sure to have #line point to a line long enough to open the
+# styling, but not enough to close it.
+
+AT_TEST([[Line is too short, and then you die]],
+[[// Beware that there are 9 lines inserted before (including this one).
+#line 12
+%token foo 123
+%token foo 123123
+%token foo 123
+%%
+exp:
+]],
+[1],
+[[input.y:13.8-10: warning: symbol foo redeclared [-Wother]
+ 13 | %token foo 123
+ | ^~~
+input.y:12.8-10: note: previous declaration
+ 12 | %token foo 123123
+ | ^~~
+input.y:13.12-17: error: redefining user token number of foo
+ 13 | %token foo 123
+ | ^~~~~~
+input.y:14.8-10: warning: symbol foo redeclared [-Wother]
+ 14 | %%
+ | ^~~
+input.y:12.8-10: note: previous declaration
+ 12 | %token foo 123123
+ | ^~~
+]])
+
+
+## ----------------------- ##
+## Zero-width characters. ##
+## ----------------------- ##
+
+# We used to open twice the styling for characters that have a
+# zero-width on display (e.g., \005).
+
+AT_TEST([[Zero-width characters]],
+[[%%
+exp: an\005error.
+]],
+[1],
+[[input.y:10.8: error: invalid character: '\\005'
+ 10 | exp: an\005error.
+ | ^
+]])
+
+
## -------------------------------------- ##
## Tabulations and multibyte characters. ##
## -------------------------------------- ##
@@ -262,7 +321,7 @@ input.y:10.1-27: error: %define variable 'error2' is not used
## ----------------- ##
# Carriage-return used to count as a newline in the scanner, and not
-# in diagnostics. Resulting in all sort of nice bugs.
+# in diagnostics. Resulting in all kinds of nice bugs.
AT_TEST([[Carriage return]],
[[^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M
diff --git a/tests/input.at b/tests/input.at
index e7971267..4f196d7b 100644
--- a/tests/input.at
+++ b/tests/input.at
@@ -298,6 +298,52 @@ input.y:8.14: error: unexpected integer literal
AT_CLEANUP
+## ---------------------------- ##
+## Redefining the error token. ##
+## ---------------------------- ##
+
+AT_SETUP([Redefining the error token])
+
+# We used to crash when trying to display the original definition of
+# "error", which is a builtin without any location.
+
+AT_DATA([input.y],
+[[%token error 123
+%token error 124
+%%
+exp:
+]])
+
+AT_BISON_CHECK([-fcaret input.y], [1], [],
+[[input.y:2.8-12: warning: symbol error redeclared [-Wother]
+ 2 | %token error 124
+ | ^~~~~
+input.y:1.8-12: note: previous declaration
+ 1 | %token error 123
+ | ^~~~~
+input.y:2.14-16: error: redefining user token number of error
+ 2 | %token error 124
+ | ^~~
+]])
+
+# While at it, make sure we properly used the user's number for
+# "error".
+AT_DATA([input.y],
+[[%token error 123
+%%
+exp:
+]])
+
+AT_BISON_CHECK([input.y])
+
+AT_CHECK([$EGREP -E '123|256' input.tab.c], [],
+[[ 0, 123, 257
+#define YYERRCODE 123
+]])
+
+AT_CLEANUP
+
+
## ------------------ ##
## Dangling aliases. ##
## ------------------ ##
@@ -2069,6 +2115,25 @@ input.y:1.1-34: note: accepted value: 'consistent'
input.y:1.1-34: note: accepted value: 'accepting'
]])
+# Check escapes.
+AT_DATA([[input.y]],
+[[%define lr.default-reduction {[$@]}
+%%
+start: %empty;
+]])
+AT_BISON_CHECK([[-fcaret input.y]], [[1]], [[]],
+[[input.y:1.1-35: warning: %define variable 'lr.default-reduction' requires keyword values [-Wdeprecated]
+ 1 | %define lr.default-reduction {[$@]}
+ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+input.y:1.1-35: error: invalid value for %define variable 'lr.default-reduction': '[$@]'
+ 1 | %define lr.default-reduction {[$@]}
+ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+input.y:1.1-35: note: accepted value: 'most'
+input.y:1.1-35: note: accepted value: 'consistent'
+input.y:1.1-35: note: accepted value: 'accepting'
+]])
+
+
# Back-end.
AT_DATA([[input.y]],
[[%define api.push-pull neither
@@ -2548,7 +2613,9 @@ AT_DATA_GRAMMAR([[input.y]],
%printer { $%; @%; } <*> exp TOK;
%{ $ @ %} // Should not warn.
%%
-exp: TOK { $%; @%; $$ = $1; };
+exp: TOK { $%; @%; $$ = $1; }
+ | 'a' { $<->1; $$ = 1; }
+ | 'b' { $bar>$; }
%%
$ @ // Should not warn.
]])
@@ -2562,6 +2629,7 @@ input.y:13.19: warning: stray '$' [-Wother]
input.y:13.23: warning: stray '@' [-Wother]
input.y:16.19: warning: stray '$' [-Wother]
input.y:16.23: warning: stray '@' [-Wother]
+input.y:17.19: warning: stray '$' [-Wother]
]])
AT_BISON_OPTION_POPDEFS