From 26c5f48c1c28a7c6c16ca578873bc83941ea4f76 Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Thu, 13 Feb 2020 19:11:16 +0100 Subject: [PATCH 01/19] maint: post-release administrivia * NEWS: Add header line for next release. * .prev-version: Record previous version. * cfg.mk (old_NEWS_hash): Auto-update. --- .prev-version | 2 +- NEWS | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.prev-version b/.prev-version index d5c0c991..87ce4929 100644 --- a/.prev-version +++ b/.prev-version @@ -1 +1 @@ -3.5.1 +3.5.2 diff --git a/NEWS b/NEWS index f2a5b060..d5eaa258 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,8 @@ GNU Bison NEWS +* Noteworthy changes in release ?.? (????-??-??) [?] + + * Noteworthy changes in release 3.5.2 (2020-02-13) [stable] ** Bug fixes From cefb538ab0523437aed094232c4b0c276aa06cfe Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Fri, 6 Mar 2020 08:25:52 +0100 Subject: [PATCH 02/19] gnulib: update --- gnulib | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnulib b/gnulib index 4fcedca0..d279bc6d 160000 --- a/gnulib +++ b/gnulib @@ -1 +1 @@ -Subproject commit 4fcedca004fd13aecb5c6f235a988a5548bcb9a4 +Subproject commit d279bc6d9f9323e19ad8c32b6d12ff96dfb0f5ba From b7942f2661b625548ff1f98171ba32639d888cbd Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Wed, 4 Mar 2020 08:19:37 +0100 Subject: [PATCH 03/19] README: point to tests/bison, and document --trace Reported by Victor Morales Cayuela. * README, README-hacking.md: here. --- README | 5 +++++ README-hacking.md | 17 ++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/README b/README index 78a44e60..febdf67f 100644 --- a/README +++ b/README @@ -17,6 +17,11 @@ See the file INSTALL for generic compilation and installation instructions. Bison requires GNU m4 1.4.6 or later. See https://ftp.gnu.org/gnu/m4/m4-1.4.6.tar.gz. +## Running a non installed bison +Once you ran `make`, you might want to toy with this fresh bison before +installing it. In that case, do not use `src/bison`: it would use the +*installed* files (skeletons, etc.), not the local ones. Use `tests/bison`. + ## Colored diagnostics As an experimental feature, diagnostics are now colored, controlled by the `--color` and `--style` options. diff --git a/README-hacking.md b/README-hacking.md index f6b3ee6c..2fbca3bc 100644 --- a/README-hacking.md +++ b/README-hacking.md @@ -34,13 +34,28 @@ Only user visible strings are to be translated: error messages, bits of the assert/abort), and all the --trace output which is meant for the maintainers only. -## Horizontal tabs +## Coding style +Follow the GNU Coding Standards. + +Don't reinvent the wheel: we use gnulib, which features many components. +Actually, Bison has legacy code that we should replace with gnulib modules +(e.g., many adhoc implementations of lists). + Do not add horizontal tab characters to any file in Bison's repository except where required. For example, do not use tabs to format C code. However, make files, ChangeLog, and some regular expressions require tabs. Also, test cases might need to contain tabs to check that Bison properly processes tabs in its input. +## Commit messages +Please, imitate the style we use. Use `git log` to get sources of +inspiration. + +## Debugging +Bison supports tracing of its various steps, via the `--trace` option. +Since it is not meant for the end user, it is not displayed by `bison +--help`, nor is it documented in the manual. Instead, run `bison +--trace=help`. # Working from the repository From aab3feb5a18c30c84b5f6cca3f25e54f0cb2e4a2 Mon Sep 17 00:00:00 2001 From: Adrian Vogelsgesang Date: Thu, 27 Feb 2020 09:52:03 +0100 Subject: [PATCH 04/19] typo: succesful -> successful * data/skeletons/lalr1.cc: here * etc/bench.pl.in: here * src/location.c: and here. --- data/skeletons/lalr1.cc | 2 +- etc/bench.pl.in | 2 +- src/location.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/data/skeletons/lalr1.cc b/data/skeletons/lalr1.cc index fba6ef8b..dc674a2d 100644 --- a/data/skeletons/lalr1.cc +++ b/data/skeletons/lalr1.cc @@ -1340,7 +1340,7 @@ b4_error_verbose_if([state_type yystate, const symbol_type& yyla], yyarg[yycount++] = yytname_[yytoken];]b4_lac_if([[ #if ]b4_api_PREFIX[DEBUG - // Execute LAC once. We don't care if it is succesful, we + // Execute LAC once. We don't care if it is successful, we // only do it for the sake of debugging output. if (!yy_lac_established_) yy_lac_check_ (yytoken); diff --git a/etc/bench.pl.in b/etc/bench.pl.in index 626be46a..8bc04da6 100755 --- a/etc/bench.pl.in +++ b/etc/bench.pl.in @@ -100,7 +100,7 @@ Artificial grammar with very long rules. =item B<-h>, B<--help> -Display this message and exit succesfully. The more verbose, the more +Display this message and exit successfully. The more verbose, the more details. =item B<-i>, B<--iterations>=I diff --git a/src/location.c b/src/location.c index 62ec9c72..1af8e673 100644 --- a/src/location.c +++ b/src/location.c @@ -317,7 +317,7 @@ caret_getc_internal (mbchar_t *res) /* Move CARET_INFO (which has a valid FILE) to the line number LINE. Compute and cache that line's length in CARET_INFO.LINE_LEN. - Return whether succesful.*/ + Return whether successful. */ static bool caret_set_line (int line) { From b493c173c9cabefd2de5b7c34aac2d8c5cba4ff1 Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Wed, 19 Feb 2020 19:01:14 +0100 Subject: [PATCH 05/19] style: remove useless declarations * src/reader.h: Don't duplicate what parse-gram.h already exposes. * src/lr0.h: Remove useless include. --- src/lr0.h | 2 -- src/reader.h | 6 ------ 2 files changed, 8 deletions(-) diff --git a/src/lr0.h b/src/lr0.h index 1c6ea1c2..31366dbd 100644 --- a/src/lr0.h +++ b/src/lr0.h @@ -21,8 +21,6 @@ #ifndef LR0_H_ # define LR0_H_ -# include "state.h" - void generate_states (void); #endif /* !LR0_H_ */ diff --git a/src/reader.h b/src/reader.h index 39c282f4..6b4f57ff 100644 --- a/src/reader.h +++ b/src/reader.h @@ -35,12 +35,6 @@ typedef struct merger_list location type_declaration_loc; } merger_list; -/* From the parser. */ -extern int gram_debug; -int gram_parse (void); - - -/* From reader.c. */ void grammar_start_symbol_set (symbol *sym, location loc); void grammar_current_rule_begin (symbol *lhs, location loc, named_ref *lhs_named_ref); From b437b166037c32ed2820d14e9b15c516c5758990 Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Sun, 1 Mar 2020 07:54:17 +0100 Subject: [PATCH 06/19] examples: use consistently the GFDL header for readmes * examples/c++/README.md, examples/c++/calc++/README.md, * examples/c/calc/README.md, examples/c/lexcalc/README.md, * examples/c/reccalc/README.md: Prefer the GFDL banner to the GPL one. --- README | 18 ++++++------------ examples/c++/README.md | 2 +- examples/c++/calc++/README.md | 22 +++++++--------------- examples/c/README.md | 2 ++ examples/c/calc/README.md | 17 ++++++----------- examples/c/lexcalc/README.md | 18 ++++++------------ examples/c/reccalc/README.md | 19 ++++++------------- 7 files changed, 34 insertions(+), 64 deletions(-) diff --git a/README b/README index febdf67f..83c5f0c4 100644 --- a/README +++ b/README @@ -90,18 +90,12 @@ Software Foundation, Inc. This file is part of GNU bison, the GNU Compiler Compiler. -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . +Permission is granted to copy, distribute and/or modify this document +under the terms of the GNU Free Documentation License, Version 1.3 or +any later version published by the Free Software Foundation; with no +Invariant Sections, with no Front-Cover Texts, and with no Back-Cover +Texts. A copy of the license is included in the "GNU Free +Documentation License" file as part of this distribution. Local Variables: mode: markdown diff --git a/examples/c++/README.md b/examples/c++/README.md index e5a11e01..3b8bad03 100644 --- a/examples/c++/README.md +++ b/examples/c++/README.md @@ -55,5 +55,5 @@ Invariant Sections, with no Front-Cover Texts, and with no Back-Cover Texts. A copy of the license is included in the "GNU Free Documentation License" file as part of this distribution. -# LocalWords: mfcalc calc parsers yy ispell american +LocalWords: mfcalc calc parsers yy ispell american ---> diff --git a/examples/c++/calc++/README.md b/examples/c++/calc++/README.md index a3371a68..1a6a8722 100644 --- a/examples/c++/calc++/README.md +++ b/examples/c++/calc++/README.md @@ -38,20 +38,12 @@ End: Copyright (C) 2018-2020 Free Software Foundation, Inc. -This file is part of Bison, the GNU Compiler Compiler. +Permission is granted to copy, distribute and/or modify this document +under the terms of the GNU Free Documentation License, Version 1.3 or +any later version published by the Free Software Foundation; with no +Invariant Sections, with no Front-Cover Texts, and with no Back-Cover +Texts. A copy of the license is included in the "GNU Free +Documentation License" file as part of this distribution. -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - -# LocalWords: calc parsers yy MERCHANTABILITY Ctrl ispell american +LocalWords: calc parsers yy MERCHANTABILITY Ctrl ispell american ---> diff --git a/examples/c/README.md b/examples/c/README.md index 55fd600e..55b43695 100644 --- a/examples/c/README.md +++ b/examples/c/README.md @@ -47,6 +47,8 @@ End: Copyright (C) 2018-2020 Free Software Foundation, Inc. +This file is part of GNU bison, the GNU Compiler Compiler. + Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.3 or any later version published by the Free Software Foundation; with no diff --git a/examples/c/calc/README.md b/examples/c/calc/README.md index 342f45d8..e994826f 100644 --- a/examples/c/calc/README.md +++ b/examples/c/calc/README.md @@ -13,16 +13,11 @@ Copyright (C) 2019-2020 Free Software Foundation, Inc. This file is part of Bison, the GNU Compiler Compiler. -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. +Permission is granted to copy, distribute and/or modify this document +under the terms of the GNU Free Documentation License, Version 1.3 or +any later version published by the Free Software Foundation; with no +Invariant Sections, with no Front-Cover Texts, and with no Back-Cover +Texts. A copy of the license is included in the "GNU Free +Documentation License" file as part of this distribution. -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . ---> diff --git a/examples/c/lexcalc/README.md b/examples/c/lexcalc/README.md index ddf693c4..8c95d870 100644 --- a/examples/c/lexcalc/README.md +++ b/examples/c/lexcalc/README.md @@ -13,16 +13,10 @@ Copyright (C) 2018-2020 Free Software Foundation, Inc. This file is part of Bison, the GNU Compiler Compiler. -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . +Permission is granted to copy, distribute and/or modify this document +under the terms of the GNU Free Documentation License, Version 1.3 or +any later version published by the Free Software Foundation; with no +Invariant Sections, with no Front-Cover Texts, and with no Back-Cover +Texts. A copy of the license is included in the "GNU Free +Documentation License" file as part of this distribution. ---> diff --git a/examples/c/reccalc/README.md b/examples/c/reccalc/README.md index e894c227..3bfab9e8 100644 --- a/examples/c/reccalc/README.md +++ b/examples/c/reccalc/README.md @@ -28,18 +28,11 @@ End: Copyright (C) 2018-2020 Free Software Foundation, Inc. -This file is part of Bison, the GNU Compiler Compiler. +Permission is granted to copy, distribute and/or modify this document +under the terms of the GNU Free Documentation License, Version 1.3 or +any later version published by the Free Software Foundation; with no +Invariant Sections, with no Front-Cover Texts, and with no Back-Cover +Texts. A copy of the license is included in the "GNU Free +Documentation License" file as part of this distribution. -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . ---> From 666df338a7ee8d3472df6e15f420fd431899affa Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Sat, 22 Feb 2020 20:25:12 +0100 Subject: [PATCH 07/19] style: comment changes * src/symtab.h, src/lr0.c: here. --- src/gram.h | 15 +++++++-------- src/lr0.c | 11 ++++++----- src/reader.c | 2 +- src/symtab.h | 3 ++- 4 files changed, 16 insertions(+), 15 deletions(-) diff --git a/src/gram.h b/src/gram.h index 582bb069..afae95a7 100644 --- a/src/gram.h +++ b/src/gram.h @@ -42,9 +42,9 @@ Internally, we cannot use the number 0 for a rule because for instance RITEM stores both symbol (the RHS) and rule numbers: the - symbols are shorts >= 0, and rule number are stored negative. + symbols are integers >= 0, and rule numbers are stored negative. Therefore 0 cannot be used, since it would be both the rule number - 0, and the token $end). + 0, and the token $end. Actions are accessed via the rule number. @@ -55,8 +55,7 @@ RULES[R].lhs -- the symbol of the left hand side of rule R. - RULES[R].rhs -- the index in RITEM of the beginning of the portion - for rule R. + RULES[R].rhs -- the beginning of the portion of RITEM for rule R. RULES[R].prec -- the symbol providing the precedence level of R. @@ -75,16 +74,16 @@ RULES[R].line -- the line where R was defined. - RULES[R].useful -- whether the rule is used (i.e., false if thrown - away by reduce). + RULES[R].useful -- whether the rule is used. False if thrown away + by reduce(). The right hand side is stored as symbol numbers in a portion of RITEM. The length of the portion is one greater than the number of symbols in the rule's right hand side. The last element in the portion - contains minus R, which identifies it as the end of a portion and - says which rule it is for. + contains -R, which identifies it as the end of a portion and says + which rule it is for. The portions of RITEM come in order of increasing rule number. NRITEMS is the total length of RITEM. Each element of RITEM is diff --git a/src/lr0.c b/src/lr0.c index 079fbdf9..2ebdcf8a 100644 --- a/src/lr0.c +++ b/src/lr0.c @@ -58,10 +58,11 @@ core_print (size_t core_size, item_number *core, FILE *out) } } -/*------------------------------------------------------------------. -| A state was just discovered from another state. Queue it for | -| later examination, in order to find its transitions. Return it. | -`------------------------------------------------------------------*/ +/*-----------------------------------------------------------------. +| A state was just discovered by transitioning on SYM from another | +| state. Queue this state for later examination, in order to find | +| its outgoing transitions. Return it. | +`-----------------------------------------------------------------*/ static state * state_list_append (symbol_number sym, size_t core_size, item_number *core) @@ -98,7 +99,7 @@ static state **shiftset; /* KERNEL_BASE[symbol-number] -> list of item numbers (offsets inside - RITEM) of lenngth KERNEL_SIZE[symbol-number]. */ + RITEM) of length KERNEL_SIZE[symbol-number]. */ static item_number **kernel_base; static int *kernel_size; diff --git a/src/reader.c b/src/reader.c index 954b0cc0..80d307c6 100644 --- a/src/reader.c +++ b/src/reader.c @@ -610,7 +610,7 @@ packgram (void) { int itemno = 0; ritem = xnmalloc (nritems + 1, sizeof *ritem); - /* This sentinel is used by build_relations in gram.c. */ + /* This sentinel is used by build_relations() in lalr.c. */ *ritem++ = 0; rule_number ruleno = 0; diff --git a/src/symtab.h b/src/symtab.h index a92e00a9..d6de8023 100644 --- a/src/symtab.h +++ b/src/symtab.h @@ -225,7 +225,8 @@ void symbol_precedence_set (symbol *sym, int prec, assoc a, location loc); /** Set the \c class associated with \c sym. Whether \c declaring means whether this class definition comes - from %nterm or %token (but not %type, prec/assoc, etc.). */ + from %nterm or %token (but not %type, prec/assoc, etc.). A symbol + can have "declaring" set only at most once. */ void symbol_class_set (symbol *sym, symbol_class class, location loc, bool declaring); From a4a3f08c11630a552933f00a92cce08e9c5f8c33 Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Sat, 15 Feb 2020 11:16:52 +0100 Subject: [PATCH 08/19] doc: update recommandation for libtextstyle * README: here. --- README | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README b/README index 83c5f0c4..f8d79755 100644 --- a/README +++ b/README @@ -28,7 +28,7 @@ As an experimental feature, diagnostics are now colored, controlled by the To use them, install the libtextstyle library before configuring Bison. It is available from https://alpha.gnu.org/gnu/gettext/, for instance -https://alpha.gnu.org/gnu/gettext/libtextstyle-0.8.tar.gz. +https://alpha.gnu.org/pub/gnu/gettext/libtextstyle-0.20.5.tar.gz. The option --color supports the following arguments: - always, yes: Enable colors. From 192e9fdf77376a8303e4204046fd513446d840d7 Mon Sep 17 00:00:00 2001 From: Akimn Demaille Date: Sat, 15 Feb 2020 10:49:14 +0100 Subject: [PATCH 09/19] build: fix typo * build-aux/cross-options.pl: here. --- build-aux/cross-options.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build-aux/cross-options.pl b/build-aux/cross-options.pl index 7f34c71b..31fe8065 100755 --- a/build-aux/cross-options.pl +++ b/build-aux/cross-options.pl @@ -17,7 +17,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . # -# Written by Akimn Demaille. +# Written by Akim Demaille. use warnings; use 5.005; From 641e326303753575664ca146fee7e9148d6bf5cf Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Fri, 6 Mar 2020 09:05:52 +0100 Subject: [PATCH 10/19] code: be robust to reference with invalid tags Because we want to support $b>$, we must accept -> in type tags, and reject $<->$, as it is unfinished. Reported by Ahcheong Lee. * src/scan-code.l (yylex): Make sure "tag" does not end with -, since -> does not close the tag. * tests/input.at (Stray $ or @): Check this. --- THANKS | 3 ++- src/scan-code.l | 2 +- tests/input.at | 5 ++++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/THANKS b/THANKS index db54776a..d8ef2c0c 100644 --- a/THANKS +++ b/THANKS @@ -4,8 +4,9 @@ it is today without the invaluable help of these people: Aaro Koskinen aaro.koskinen@iki.fi Аскар Сафин safinaskar@mail.ru Adam Sampson ats@offog.org +Ahcheong Lee dkcjd2000@gmail.com Airy Andre Airy.Andre@edf.fr -Akim Demaille akim@lrde.epita.fr +Akim Demaille akim@gnu.org Albert Chin-A-Young china@thewrittenword.com Alexander Belopolsky alexb@rentec.com Alexandre Duret-Lutz adl@lrde.epita.fr diff --git a/src/scan-code.l b/src/scan-code.l index 658c25b1..ef667146 100644 --- a/src/scan-code.l +++ b/src/scan-code.l @@ -81,7 +81,7 @@ static bool untyped_var_seen; historically almost any character is allowed in a tag. We disallow NUL and newline, as this simplifies our implementation. We allow "->" as a means to dereference a pointer. */ -tag ([^\0\n>]|->)+ +tag ([^\0\n>]|->)*[^-] /* Zero or more instances of backslash-newline. Following GCC, allow white space between the backslash and the newline. */ diff --git a/tests/input.at b/tests/input.at index c03b282f..b004ea9e 100644 --- a/tests/input.at +++ b/tests/input.at @@ -2548,7 +2548,9 @@ AT_DATA_GRAMMAR([[input.y]], %printer { $%; @%; } <*> exp TOK; %{ $ @ %} // Should not warn. %% -exp: TOK { $%; @%; $$ = $1; }; +exp: TOK { $%; @%; $$ = $1; } + | 'a' { $<->1; $$ = 1; } + | 'b' { $bar>$; } %% $ @ // Should not warn. ]]) @@ -2562,6 +2564,7 @@ input.y:13.19: warning: stray '$' [-Wother] input.y:13.23: warning: stray '@' [-Wother] input.y:16.19: warning: stray '$' [-Wother] input.y:16.23: warning: stray '@' [-Wother] +input.y:17.19: warning: stray '$' [-Wother] ]]) AT_BISON_OPTION_POPDEFS From b82b387da9b637a10edc97632435d53f5d340ba8 Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Sat, 7 Mar 2020 07:41:47 +0100 Subject: [PATCH 11/19] muscles: fix incorrect decoding of $ Bug introduced in 458171e6df5a0110a35ee45ad8b2e9f6fb426f1d. https://lists.gnu.org/archive/html/bison-patches/2013-11/msg00009.html Reported by Ahcheong Lee. https://lists.gnu.org/r/bug-bison/2020-03/msg00010.html * src/muscle-tab.c (COMMON_DECODE): "$" is coded as "$][", not "$[][". * tests/input.at ("%define" enum variables): Check that case. --- src/muscle-tab.c | 1 - src/system.h | 6 +++--- tests/input.at | 19 +++++++++++++++++++ 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/muscle-tab.c b/src/muscle-tab.c index 36773eb6..5778b8f3 100644 --- a/src/muscle-tab.c +++ b/src/muscle-tab.c @@ -292,7 +292,6 @@ muscle_location_grow (char const *key, location loc) #define COMMON_DECODE(Value) \ case '$': \ - ++(Value); aver (*(Value) == '['); \ ++(Value); aver (*(Value) == ']'); \ ++(Value); aver (*(Value) == '['); \ obstack_sgrow (&muscle_obstack, "$"); \ diff --git a/src/system.h b/src/system.h index b0ffb23a..0210f6c6 100644 --- a/src/system.h +++ b/src/system.h @@ -209,10 +209,10 @@ typedef size_t uintptr_t; /* Output Str both quoted for M4 (i.e., embed in [[...]]), and escaped for our postprocessing (i.e., escape M4 special characters). If - Str is empty (or NULL), output "[]" instead of "[[]]" as it make M4 - programming easier (m4_ifval can be used). + Str is empty (or NULL), output "[]" instead of "[[]]" as it makes + M4 programming easier (m4_ifval can be used). - For instance "[foo]" -> "[[@{foo@}]]", "$$" -> "[[$][$][]]". */ + For instance "[foo]" -> "[[@{foo@}]]", "$$" -> "[[$][$][]]". */ # define obstack_quote(Obs, Str) \ do { \ diff --git a/tests/input.at b/tests/input.at index b004ea9e..4c1f5b25 100644 --- a/tests/input.at +++ b/tests/input.at @@ -2069,6 +2069,25 @@ input.y:1.1-34: accepted value: 'consistent' input.y:1.1-34: accepted value: 'accepting' ]]) +# Check escapes. +AT_DATA([[input.y]], +[[%define lr.default-reduction {[$@]} +%% +start: %empty; +]]) +AT_BISON_CHECK([[-fcaret input.y]], [[1]], [[]], +[[input.y:1.1-35: warning: %define variable 'lr.default-reduction' requires keyword values [-Wdeprecated] + 1 | %define lr.default-reduction {[$@]} + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +input.y:1.1-35: error: invalid value for %define variable 'lr.default-reduction': '[$@]' + 1 | %define lr.default-reduction {[$@]} + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +input.y:1.1-35: accepted value: 'most' +input.y:1.1-35: accepted value: 'consistent' +input.y:1.1-35: accepted value: 'accepting' +]]) + + # Back-end. AT_DATA([[input.y]], [[%define api.push-pull neither From e21ff47f5d0b64da693a47b7dd200a1a44a5bbeb Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Sat, 7 Mar 2020 09:57:03 +0100 Subject: [PATCH 12/19] diagnostics: be sure to close the styling when lines are too short bar.y:4.12-17: error: redefining user token number of foo - 4 | %token foo 123 + 4 | %token foo 123 | ^~~~~~ * src/location.c (location_caret): Be sure to close. * tests/diagnostics.at (Line is too short, and then you die): New. --- src/location.c | 6 ++++++ tests/diagnostics.at | 43 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/src/location.c b/src/location.c index 1af8e673..dbcd67ec 100644 --- a/src/location.c +++ b/src/location.c @@ -461,6 +461,12 @@ location_caret (location loc, const char *style, FILE *out) break; } } + // The line is shorter than expected. + if (opened) + { + end_use_class (style, out); + opened = false; + } putc ('\n', out); } diff --git a/tests/diagnostics.at b/tests/diagnostics.at index b0b5ee3e..cbf56b77 100644 --- a/tests/diagnostics.at +++ b/tests/diagnostics.at @@ -152,6 +152,47 @@ input.y: warning: fix-its can be applied. Rerun with option ]]) + +## ------------------------------------- ## +## Line is too short, and then you die. ## +## ------------------------------------- ## + +# We trust the "#line", since that's what allows us to quote the +# actual source from which the gramar file was generated. But #line +# can also be wrong, and point to a line which is shorter that the bad +# one. In which case we can easily forget to close the styling. +# +# Be sure to have #line point to a line long enough to open the +# styling, but not enough to close it. + +AT_TEST([[Line is too short, and then you die]], +[[// Beware that there are 9 lines inserted before (including this one). +#line 12 +%token foo 123 +%token foo 123123 +%token foo 123 +%% +exp: +]], +[1], +[[input.y:13.8-10: warning: symbol foo redeclared [-Wother] + 13 | %token foo 123 + | ^~~ +input.y:12.8-10: previous declaration + 12 | %token foo 123123 + | ^~~ +input.y:13.12-17: error: redefining user token number of foo + 13 | %token foo 123 + | ^~~~~~ +input.y:14.8-10: warning: symbol foo redeclared [-Wother] + 14 | %% + | ^~~ +input.y:12.8-10: previous declaration + 12 | %token foo 123123 + | ^~~ +]]) + + ## -------------------------------------- ## ## Tabulations and multibyte characters. ## ## -------------------------------------- ## @@ -262,7 +303,7 @@ input.y:10.1-27: error: %define variable 'error2' is not used ## ----------------- ## # Carriage-return used to count as a newline in the scanner, and not -# in diagnostics. Resulting in all sort of nice bugs. +# in diagnostics. Resulting in all kinds of nice bugs. AT_TEST([[Carriage return]], [[^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M From b6386034773829b10c44ff93ce0a492980684c64 Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Sat, 7 Mar 2020 12:59:09 +0100 Subject: [PATCH 13/19] diagnostics: beware of zero-width characters Currenly we rely on (visual) width of the characters to decide where to open and close the styling of the quoted lines. This breaks when we deal with zero-width characters: we cannot just rely on (visual) columns, we need to know whether we are before, inside, or after the highlighted portion. * src/location.c (location_caret): col_end: no longer add 1, "regular" characters have a width of 1, only 0-width characters have 0-width. opened: replace with 'state', a three-valued enum. Don't reopen the style if we already did. * tests/diagnostics.at (Zero-width characters): New. --- src/location.c | 32 +++++++++++++++++++------------- src/location.h | 10 ++++------ tests/diagnostics.at | 26 ++++++++++++++++++++++---- 3 files changed, 45 insertions(+), 23 deletions(-) diff --git a/src/location.c b/src/location.c index dbcd67ec..9f929c00 100644 --- a/src/location.c +++ b/src/location.c @@ -421,12 +421,14 @@ location_caret (location loc, const char *style, FILE *out) { /* The last column to highlight. Only the first line of multiline locations are quoted, in which case the ending - column is the end of line. Single point locations (with - equal boundaries) denote the character that they - follow. */ - int col_end + column is the end of line. + + We used to work with byte offsets, and that was much + easier. However, we went back to using (visual) columns to + support truncating of long lines. */ + const int col_end = loc.start.line == loc.end.line - ? loc.end.column + (loc.start.column == loc.end.column) + ? loc.end.column : caret_info.line_len; /* Quote the file (at most the first line in the case of multiline locations). */ @@ -436,24 +438,28 @@ location_caret (location loc, const char *style, FILE *out) expected (maybe the file was changed since the scanner ran), we might reach the end before we actually saw the opening column. */ - bool opened = false; + enum { before, inside, after } state = before; while (!mb_iseof (c) && !mb_iseq (c, '\n')) { - if (caret_info.pos.column == loc.start.column) + // We might have already opened (and even closed!) the + // style and yet have the equality of the columns if we + // just saw zero-width characters. + if (state == before + && caret_info.pos.column == loc.start.column) { begin_use_class (style, out); - opened = true; + state = inside; } if (skip < caret_info.pos.column) mb_putc (c, out); boundary_compute (&caret_info.pos, mb_ptr (c), mb_len (c)); caret_getc (c); - if (opened + if (state == inside && (caret_info.pos.column == col_end || width < caret_info.pos.column - skip)) { end_use_class (style, out); - opened = false; + state = after; } if (width < caret_info.pos.column - skip) { @@ -461,11 +467,11 @@ location_caret (location loc, const char *style, FILE *out) break; } } - // The line is shorter than expected. - if (opened) + if (state == inside) { + // The line is shorter than expected. end_use_class (style, out); - opened = false; + state = after; } putc ('\n', out); } diff --git a/src/location.h b/src/location.h index ccb42e3c..cb3025c6 100644 --- a/src/location.h +++ b/src/location.h @@ -42,16 +42,14 @@ typedef struct /* If positive, the column (starting at 1) just after the boundary. This is neither a byte count, nor a character count; it is a - column count. If this is INT_MAX, the column number has + (visual) column count. If this is INT_MAX, the column number has overflowed. - Meaningless and not displayed if nonpositive. - */ + Meaningless and not displayed if nonpositive. */ int column; - /* If nonnegative, the byte number (starting at 0) in the current line. - Never displayed, used when printing error messages with colors to - know where colors start and end. */ + /* If nonnegative, the byte number (starting at 0) in the current + line. Not displayed (unless --trace=location). */ int byte; } boundary; diff --git a/tests/diagnostics.at b/tests/diagnostics.at index cbf56b77..1471934f 100644 --- a/tests/diagnostics.at +++ b/tests/diagnostics.at @@ -37,15 +37,15 @@ AT_BISON_OPTION_PUSHDEFS AT_DATA_GRAMMAR([[input.y]], [$2]) +AT_DATA([experr], [$4]) + # For some reason, literal ^M in the input are removed and don't end # in `input.y`. So use the two-character ^M represent it, and let # Perl insert real CR characters. -if grep '\^M' input.y >/dev/null; then - AT_PERL_REQUIRE([-pi -e 's{\^M}{\r}gx' input.y]) +if $EGREP ['\^M|\\[0-9][0-9][0-9]'] input.y experr >/dev/null; then + AT_PERL_REQUIRE([-pi -e 's{\^M}{\r}g;s{\\(\d{3}|.)}{$v = $[]1; $v =~ /\A\d+\z/ ? chr($v) : $v}ge' input.y experr]) fi -AT_DATA([experr], [$4]) - AT_CHECK([LC_ALL="$locale" $5 bison -fcaret --color=debug -Wall input.y], [$3], [], [experr]) # When no style, same messages, but without style. @@ -193,6 +193,24 @@ input.y:12.8-10: previous declaration ]]) +## ----------------------- ## +## Zero-width characters. ## +## ----------------------- ## + +# We used to open twice the styling for characters that have a +# zero-width on display (e.g., \005). + +AT_TEST([[Zero-width characters]], +[[%% +exp: an\005error. +]], +[1], +[[input.y:10.8: error: invalid character: '\\005' + 10 | exp: an\005error. + | ^ +]]) + + ## -------------------------------------- ## ## Tabulations and multibyte characters. ## ## -------------------------------------- ## From 2f02d9beae83156c60fa4ba7da05d78cd01e88da Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Sun, 8 Mar 2020 07:18:34 +0100 Subject: [PATCH 14/19] style: initialize some struct members * src/symtab.c (sym_content_new): Initialize all the location members. Not needed by the code, but disturbing values when using a debugger. --- src/symtab.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/symtab.c b/src/symtab.c index 35c0930e..b137bbf0 100644 --- a/src/symtab.c +++ b/src/symtab.c @@ -77,10 +77,12 @@ sym_content_new (symbol *s) res->symbol = s; res->type_name = NULL; + res->type_loc = empty_loc; for (int i = 0; i < CODE_PROPS_SIZE; ++i) code_props_none_init (&res->props[i]); res->number = NUMBER_UNDEFINED; + res->prec_loc = empty_loc; res->prec = 0; res->assoc = undef_assoc; res->user_token_number = USER_NUMBER_UNDEFINED; From cfcd823e160121d8be7463b8b47600ff50c3cbd3 Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Sun, 8 Mar 2020 07:27:57 +0100 Subject: [PATCH 15/19] diagnostics: don't crash because of repeated definitions of error According to https://www.unix.com/man-page/POSIX/1posix/yacc/, the user is allowed to specify her user number for the error token: The token error shall be reserved for error handling. The name error can be used in grammar rules. It indicates places where the parser can recover from a syntax error. The default value of error shall be 256. Its value can be changed using a %token declaration. The lexical analyzer should not return the value of error. I think this feature is useless, the user should not have to deal with that. The intend is probably to give the user a means to use 256 if she wants to, but provided "error" cleared the path first by being assigned another number. In the case of Bison, 256 is assigned to "error" at the end if the user did not use it for a token of hers. So this feature is useless. Yet it is valid, and if the user assigns twice a token number to "error", then the second time we want to complain about it and want to show the original definition. At this point, we try to display the built-in definition of "error", whose location is NULL, and we crash. Rather, the location of the first user definition of "error" should become its defining location. Reported byg Ahcheong Lee. https://lists.gnu.org/r/bug-bison/2020-03/msg00007.html * src/symtab.c (symbol_class_set): If this is a declaration and the symbol was not declared yet, keep this as defining location. * tests/input.at (Redefining the error token): New. --- src/location.c | 2 ++ src/symtab.c | 5 ++++- tests/input.at | 31 +++++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/location.c b/src/location.c index 9f929c00..4b7273d2 100644 --- a/src/location.c +++ b/src/location.c @@ -175,6 +175,8 @@ location_print (location loc, FILE *out) } else { + aver (loc.start.file); + aver (loc.end.file); int end_col = 0 != loc.end.column ? loc.end.column - 1 : 0; res += fprintf (out, "%s", quotearg_n_style (3, escape_quoting_style, loc.start.file)); diff --git a/src/symtab.c b/src/symtab.c index b137bbf0..b4106ea0 100644 --- a/src/symtab.c +++ b/src/symtab.c @@ -555,7 +555,10 @@ symbol_class_set (symbol *sym, symbol_class class, location loc, bool declaring) _("previous declaration")); } else - s->status = declared; + { + sym->location = loc; + s->status = declared; + } } } } diff --git a/tests/input.at b/tests/input.at index 4c1f5b25..dd5af739 100644 --- a/tests/input.at +++ b/tests/input.at @@ -298,6 +298,37 @@ input.y:8.14: error: syntax error, unexpected integer AT_CLEANUP +## ---------------------------- ## +## Redefining the error token. ## +## ---------------------------- ## + +AT_SETUP([Redefining the error token]) + +# We used to crash when trying to display the original definition of +# "error", which is a builtin without any location. + +AT_DATA([input.y], +[[%token error 123 +%token error 124 +%% +exp: +]]) + +AT_BISON_CHECK([-fcaret input.y], [1], [], +[[input.y:2.8-12: warning: symbol error redeclared [-Wother] + 2 | %token error 124 + | ^~~~~ +input.y:1.8-12: previous declaration + 1 | %token error 123 + | ^~~~~ +input.y:2.14-16: error: redefining user token number of error + 2 | %token error 124 + | ^~~ +]]) + +AT_CLEANUP + + ## ------------------ ## ## Dangling aliases. ## ## ------------------ ## From e3812bb8c317203014f615c6f0cb15fd657ae293 Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Sun, 8 Mar 2020 07:56:39 +0100 Subject: [PATCH 16/19] yacc.c: make sure we properly propagated the user's number for error * data/skeletons/yacc.c (YYERRCODE): Be truthful. * tests/input.at (Redefining the error token): Check that. --- TODO | 9 ++------- data/skeletons/yacc.c | 2 +- tests/input.at | 15 +++++++++++++++ 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/TODO b/TODO index 7a3580cc..a7b7b948 100644 --- a/TODO +++ b/TODO @@ -315,13 +315,8 @@ It would be a very nice source of inspiration for the other languages. Valentin Tolmer is working on this. ** YYERRCODE -Defined to 256, but not used, not documented. Probably the token -number for the error token, which POSIX wants to be 256, but which -Bison might renumber if the user used number 256. Keep fix and doc? -Throw away? - -Also, why don't we output the token name of the error token in the -output? It is explicitly skipped: +Why don't we output the token name of the error token in the output? It is +explicitly skipped: /* Skip error token and tokens without identifier. */ if (sym != errtoken && id) diff --git a/data/skeletons/yacc.c b/data/skeletons/yacc.c index 9a083037..add94351 100644 --- a/data/skeletons/yacc.c +++ b/data/skeletons/yacc.c @@ -673,7 +673,7 @@ static const ]b4_int_type_for([b4_toknum])[ yytoknum[] = /* Error token number */ #define YYTERROR 1 -#define YYERRCODE 256 +#define YYERRCODE ]b4_symbol(1, user_number)[ ]b4_locations_if([[ ]b4_yylloc_default_define[ diff --git a/tests/input.at b/tests/input.at index dd5af739..ccab6844 100644 --- a/tests/input.at +++ b/tests/input.at @@ -326,6 +326,21 @@ input.y:2.14-16: error: redefining user token number of error | ^~~ ]]) +# While at it, make sure we properly used the user's number for +# "error". +AT_DATA([input.y], +[[%token error 123 +%% +exp: +]]) + +AT_BISON_CHECK([input.y]) + +AT_CHECK([$EGREP -E '123|256' input.tab.c], [], +[[ 0, 123, 257 +#define YYERRCODE 123 +]]) + AT_CLEANUP From 044ad1288c123d3b07284296a7246db002f43c9b Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Sun, 8 Mar 2020 08:17:13 +0100 Subject: [PATCH 17/19] news: update for 3.5.3 --- NEWS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/NEWS b/NEWS index d5eaa258..8f623fc2 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,13 @@ GNU Bison NEWS * Noteworthy changes in release ?.? (????-??-??) [?] +** Bug fixes + + Error messages could quote lines containing zero-width characters (such as + \005) with incorrect styling. Fixes for similar issues with unexpectedly + short lines (e.g., the file was changed between parsing and diagnosing). + + Several unlikely crashes found by fuzzing have been fixed. * Noteworthy changes in release 3.5.2 (2020-02-13) [stable] From f49684a57705c72389750a3196e82df0d6c0217d Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Sun, 8 Mar 2020 08:30:41 +0100 Subject: [PATCH 18/19] version 3.5.3 * NEWS: Record release date. --- NEWS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 8f623fc2..7509f1c9 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,6 @@ GNU Bison NEWS -* Noteworthy changes in release ?.? (????-??-??) [?] +* Noteworthy changes in release 3.5.3 (2020-03-08) [stable] ** Bug fixes From 15ea35019fb366ca667ccb785ab138dd7a1aeda0 Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Sun, 8 Mar 2020 08:50:10 +0100 Subject: [PATCH 19/19] maint: post-release administrivia * NEWS: Add header line for next release. * .prev-version: Record previous version. * cfg.mk (old_NEWS_hash): Auto-update. --- .prev-version | 2 +- NEWS | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.prev-version b/.prev-version index 87ce4929..444877d4 100644 --- a/.prev-version +++ b/.prev-version @@ -1 +1 @@ -3.5.2 +3.5.3 diff --git a/NEWS b/NEWS index 7509f1c9..b727f6d4 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,8 @@ GNU Bison NEWS +* Noteworthy changes in release ?.? (????-??-??) [?] + + * Noteworthy changes in release 3.5.3 (2020-03-08) [stable] ** Bug fixes