diff --git a/.prev-version b/.prev-version index d5c0c991..444877d4 100644 --- a/.prev-version +++ b/.prev-version @@ -1 +1 @@ -3.5.1 +3.5.3 diff --git a/NEWS b/NEWS index e9157594..7ee66c07 100644 --- a/NEWS +++ b/NEWS @@ -110,6 +110,16 @@ GNU Bison NEWS tracking, internationalized custom error messages, lookahead-correction, rich debug traces, etc. +* Noteworthy changes in release 3.5.3 (2020-03-08) [stable] + +** Bug fixes + + Error messages could quote lines containing zero-width characters (such as + \005) with incorrect styling. Fixes for similar issues with unexpectedly + short lines (e.g., the file was changed between parsing and diagnosing). + + Several unlikely crashes found by fuzzing have been fixed. + * Noteworthy changes in release 3.5.2 (2020-02-13) [stable] ** Bug fixes diff --git a/THANKS b/THANKS index db54776a..d8ef2c0c 100644 --- a/THANKS +++ b/THANKS @@ -4,8 +4,9 @@ it is today without the invaluable help of these people: Aaro Koskinen aaro.koskinen@iki.fi Аскар Сафин safinaskar@mail.ru Adam Sampson ats@offog.org +Ahcheong Lee dkcjd2000@gmail.com Airy Andre Airy.Andre@edf.fr -Akim Demaille akim@lrde.epita.fr +Akim Demaille akim@gnu.org Albert Chin-A-Young china@thewrittenword.com Alexander Belopolsky alexb@rentec.com Alexandre Duret-Lutz adl@lrde.epita.fr diff --git a/TODO b/TODO index 65f9148d..0f42eb1c 100644 --- a/TODO +++ b/TODO @@ -305,13 +305,8 @@ It would be a very nice source of inspiration for the other languages. Valentin Tolmer is working on this. ** YYERRCODE -Defined to 256, but not used, not documented. Probably the token -number for the error token, which POSIX wants to be 256, but which -Bison might renumber if the user used number 256. Keep fix and doc? -Throw away? - -Also, why don't we output the token name of the error token in the -output? It is explicitly skipped: +Why don't we output the token name of the error token in the output? It is +explicitly skipped: /* Skip error token and tokens without identifier. */ if (sym != errtoken && id) diff --git a/data/skeletons/yacc.c b/data/skeletons/yacc.c index 7ef0b218..7f92a200 100644 --- a/data/skeletons/yacc.c +++ b/data/skeletons/yacc.c @@ -739,7 +739,7 @@ static const ]b4_int_type_for([b4_toknum])[ yytoknum[] = /* Error symbol internal number. */ #define YYTERROR 1 /* Error token external number. */ -#define YYERRCODE 256 +#define YYERRCODE ]b4_symbol(1, user_number)[ ]b4_locations_if([[ ]b4_yylloc_default_define[ diff --git a/src/location.c b/src/location.c index 4bf3b123..5718144f 100644 --- a/src/location.c +++ b/src/location.c @@ -175,6 +175,8 @@ location_print (location loc, FILE *out) } else { + aver (loc.start.file); + aver (loc.end.file); int end_col = 0 != loc.end.column ? loc.end.column - 1 : 0; res += fprintf (out, "%s", quotearg_n_style (3, escape_quoting_style, loc.start.file)); @@ -317,7 +319,7 @@ caret_getc_internal (mbchar_t *res) /* Move CARET_INFO (which has a valid FILE) to the line number LINE. Compute and cache that line's length in CARET_INFO.LINE_LEN. - Return whether successful.*/ + Return whether successful. */ static bool caret_set_line (int line) { @@ -423,12 +425,14 @@ location_caret (location loc, const char *style, FILE *out) { /* The last column to highlight. Only the first line of multiline locations are quoted, in which case the ending - column is the end of line. Single point locations (with - equal boundaries) denote the character that they - follow. */ - int col_end + column is the end of line. + + We used to work with byte offsets, and that was much + easier. However, we went back to using (visual) columns to + support truncating of long lines. */ + const int col_end = loc.start.line == loc.end.line - ? loc.end.column + (loc.start.column == loc.end.column) + ? loc.end.column : caret_info.line_len; /* Quote the file (at most the first line in the case of multiline locations). */ @@ -438,24 +442,28 @@ location_caret (location loc, const char *style, FILE *out) expected (maybe the file was changed since the scanner ran), we might reach the end before we actually saw the opening column. */ - bool opened = false; + enum { before, inside, after } state = before; while (!mb_iseof (c) && !mb_iseq (c, '\n')) { - if (caret_info.pos.column == loc.start.column) + // We might have already opened (and even closed!) the + // style and yet have the equality of the columns if we + // just saw zero-width characters. + if (state == before + && caret_info.pos.column == loc.start.column) { begin_use_class (style, out); - opened = true; + state = inside; } if (skip < caret_info.pos.column) mb_putc (c, out); boundary_compute (&caret_info.pos, mb_ptr (c), mb_len (c)); caret_getc (c); - if (opened + if (state == inside && (caret_info.pos.column == col_end || width < caret_info.pos.column - skip)) { end_use_class (style, out); - opened = false; + state = after; } if (width < caret_info.pos.column - skip) { @@ -463,6 +471,12 @@ location_caret (location loc, const char *style, FILE *out) break; } } + if (state == inside) + { + // The line is shorter than expected. + end_use_class (style, out); + state = after; + } putc ('\n', out); } diff --git a/src/location.h b/src/location.h index dcb594fb..d351cb0e 100644 --- a/src/location.h +++ b/src/location.h @@ -42,16 +42,14 @@ typedef struct /* If positive, the column (starting at 1) just after the boundary. This is neither a byte count, nor a character count; it is a - column count. If this is INT_MAX, the column number has + (visual) column count. If this is INT_MAX, the column number has overflowed. - Meaningless and not displayed if nonpositive. - */ + Meaningless and not displayed if nonpositive. */ int column; - /* If nonnegative, the byte number (starting at 0) in the current line. - Never displayed, used when printing error messages with colors to - know where colors start and end. */ + /* If nonnegative, the byte number (starting at 0) in the current + line. Not displayed (unless --trace=location). */ int byte; } boundary; diff --git a/src/muscle-tab.c b/src/muscle-tab.c index b5dc0f69..21443f4f 100644 --- a/src/muscle-tab.c +++ b/src/muscle-tab.c @@ -292,7 +292,6 @@ muscle_location_grow (char const *key, location loc) #define COMMON_DECODE(Value) \ case '$': \ - ++(Value); aver (*(Value) == '['); \ ++(Value); aver (*(Value) == ']'); \ ++(Value); aver (*(Value) == '['); \ obstack_sgrow (&muscle_obstack, "$"); \ diff --git a/src/scan-code.l b/src/scan-code.l index 20242857..db6d0764 100644 --- a/src/scan-code.l +++ b/src/scan-code.l @@ -81,7 +81,7 @@ static bool untyped_var_seen; historically almost any character is allowed in a tag. We disallow NUL and newline, as this simplifies our implementation. We allow "->" as a means to dereference a pointer. */ -tag ([^\0\n>]|->)+ +tag ([^\0\n>]|->)*[^-] /* Zero or more instances of backslash-newline. Following GCC, allow white space between the backslash and the newline. */ diff --git a/src/symtab.c b/src/symtab.c index b6f0a952..a446476c 100644 --- a/src/symtab.c +++ b/src/symtab.c @@ -77,10 +77,12 @@ sym_content_new (symbol *s) res->symbol = s; res->type_name = NULL; + res->type_loc = empty_loc; for (int i = 0; i < CODE_PROPS_SIZE; ++i) code_props_none_init (&res->props[i]); res->number = NUMBER_UNDEFINED; + res->prec_loc = empty_loc; res->prec = 0; res->assoc = undef_assoc; res->user_token_number = USER_NUMBER_UNDEFINED; @@ -539,7 +541,10 @@ symbol_class_set (symbol *sym, symbol_class class, location loc, bool declaring) _("previous declaration")); } else - s->status = declared; + { + sym->location = loc; + s->status = declared; + } } } } diff --git a/src/system.h b/src/system.h index b0ffb23a..0210f6c6 100644 --- a/src/system.h +++ b/src/system.h @@ -209,10 +209,10 @@ typedef size_t uintptr_t; /* Output Str both quoted for M4 (i.e., embed in [[...]]), and escaped for our postprocessing (i.e., escape M4 special characters). If - Str is empty (or NULL), output "[]" instead of "[[]]" as it make M4 - programming easier (m4_ifval can be used). + Str is empty (or NULL), output "[]" instead of "[[]]" as it makes + M4 programming easier (m4_ifval can be used). - For instance "[foo]" -> "[[@{foo@}]]", "$$" -> "[[$][$][]]". */ + For instance "[foo]" -> "[[@{foo@}]]", "$$" -> "[[$][$][]]". */ # define obstack_quote(Obs, Str) \ do { \ diff --git a/tests/diagnostics.at b/tests/diagnostics.at index 63d0133b..0724f88a 100644 --- a/tests/diagnostics.at +++ b/tests/diagnostics.at @@ -37,15 +37,15 @@ AT_BISON_OPTION_PUSHDEFS AT_DATA_GRAMMAR([[input.y]], [$2]) +AT_DATA([experr], [$4]) + # For some reason, literal ^M in the input are removed and don't end # in `input.y`. So use the two-character ^M represent it, and let # Perl insert real CR characters. -if grep '\^M' input.y >/dev/null; then - AT_PERL_REQUIRE([-pi -e 's{\^M}{\r}gx' input.y]) +if $EGREP ['\^M|\\[0-9][0-9][0-9]'] input.y experr >/dev/null; then + AT_PERL_REQUIRE([-pi -e 's{\^M}{\r}g;s{\\(\d{3}|.)}{$v = $[]1; $v =~ /\A\d+\z/ ? chr($v) : $v}ge' input.y experr]) fi -AT_DATA([experr], [$4]) - AT_CHECK([LC_ALL="$locale" $5 bison -fcaret --color=debug -Wall input.y], [$3], [], [experr]) # When no style, same messages, but without style. @@ -152,6 +152,65 @@ input.y: warning: fix-its can be applied. Rerun with option ]]) + +## ------------------------------------- ## +## Line is too short, and then you die. ## +## ------------------------------------- ## + +# We trust the "#line", since that's what allows us to quote the +# actual source from which the gramar file was generated. But #line +# can also be wrong, and point to a line which is shorter that the bad +# one. In which case we can easily forget to close the styling. +# +# Be sure to have #line point to a line long enough to open the +# styling, but not enough to close it. + +AT_TEST([[Line is too short, and then you die]], +[[// Beware that there are 9 lines inserted before (including this one). +#line 12 +%token foo 123 +%token foo 123123 +%token foo 123 +%% +exp: +]], +[1], +[[input.y:13.8-10: warning: symbol foo redeclared [-Wother] + 13 | %token foo 123 + | ^~~ +input.y:12.8-10: note: previous declaration + 12 | %token foo 123123 + | ^~~ +input.y:13.12-17: error: redefining user token number of foo + 13 | %token foo 123 + | ^~~~~~ +input.y:14.8-10: warning: symbol foo redeclared [-Wother] + 14 | %% + | ^~~ +input.y:12.8-10: note: previous declaration + 12 | %token foo 123123 + | ^~~ +]]) + + +## ----------------------- ## +## Zero-width characters. ## +## ----------------------- ## + +# We used to open twice the styling for characters that have a +# zero-width on display (e.g., \005). + +AT_TEST([[Zero-width characters]], +[[%% +exp: an\005error. +]], +[1], +[[input.y:10.8: error: invalid character: '\\005' + 10 | exp: an\005error. + | ^ +]]) + + ## -------------------------------------- ## ## Tabulations and multibyte characters. ## ## -------------------------------------- ## @@ -262,7 +321,7 @@ input.y:10.1-27: error: %define variable 'error2' is not used ## ----------------- ## # Carriage-return used to count as a newline in the scanner, and not -# in diagnostics. Resulting in all sort of nice bugs. +# in diagnostics. Resulting in all kinds of nice bugs. AT_TEST([[Carriage return]], [[^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M diff --git a/tests/input.at b/tests/input.at index e7971267..4f196d7b 100644 --- a/tests/input.at +++ b/tests/input.at @@ -298,6 +298,52 @@ input.y:8.14: error: unexpected integer literal AT_CLEANUP +## ---------------------------- ## +## Redefining the error token. ## +## ---------------------------- ## + +AT_SETUP([Redefining the error token]) + +# We used to crash when trying to display the original definition of +# "error", which is a builtin without any location. + +AT_DATA([input.y], +[[%token error 123 +%token error 124 +%% +exp: +]]) + +AT_BISON_CHECK([-fcaret input.y], [1], [], +[[input.y:2.8-12: warning: symbol error redeclared [-Wother] + 2 | %token error 124 + | ^~~~~ +input.y:1.8-12: note: previous declaration + 1 | %token error 123 + | ^~~~~ +input.y:2.14-16: error: redefining user token number of error + 2 | %token error 124 + | ^~~ +]]) + +# While at it, make sure we properly used the user's number for +# "error". +AT_DATA([input.y], +[[%token error 123 +%% +exp: +]]) + +AT_BISON_CHECK([input.y]) + +AT_CHECK([$EGREP -E '123|256' input.tab.c], [], +[[ 0, 123, 257 +#define YYERRCODE 123 +]]) + +AT_CLEANUP + + ## ------------------ ## ## Dangling aliases. ## ## ------------------ ## @@ -2069,6 +2115,25 @@ input.y:1.1-34: note: accepted value: 'consistent' input.y:1.1-34: note: accepted value: 'accepting' ]]) +# Check escapes. +AT_DATA([[input.y]], +[[%define lr.default-reduction {[$@]} +%% +start: %empty; +]]) +AT_BISON_CHECK([[-fcaret input.y]], [[1]], [[]], +[[input.y:1.1-35: warning: %define variable 'lr.default-reduction' requires keyword values [-Wdeprecated] + 1 | %define lr.default-reduction {[$@]} + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +input.y:1.1-35: error: invalid value for %define variable 'lr.default-reduction': '[$@]' + 1 | %define lr.default-reduction {[$@]} + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +input.y:1.1-35: note: accepted value: 'most' +input.y:1.1-35: note: accepted value: 'consistent' +input.y:1.1-35: note: accepted value: 'accepting' +]]) + + # Back-end. AT_DATA([[input.y]], [[%define api.push-pull neither @@ -2548,7 +2613,9 @@ AT_DATA_GRAMMAR([[input.y]], %printer { $%; @%; } <*> exp TOK; %{ $ @ %} // Should not warn. %% -exp: TOK { $%; @%; $$ = $1; }; +exp: TOK { $%; @%; $$ = $1; } + | 'a' { $<->1; $$ = 1; } + | 'b' { $bar>$; } %% $ @ // Should not warn. ]]) @@ -2562,6 +2629,7 @@ input.y:13.19: warning: stray '$' [-Wother] input.y:13.23: warning: stray '@' [-Wother] input.y:16.19: warning: stray '$' [-Wother] input.y:16.23: warning: stray '@' [-Wother] +input.y:17.19: warning: stray '$' [-Wother] ]]) AT_BISON_OPTION_POPDEFS