From 251e1b137fd752be2907703c83de8fa52fd137f2 Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Sat, 13 Jun 2020 11:09:53 +0200 Subject: [PATCH] reports: the column width differs from the byte count MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From "number" shift, and go to state 1 "Ñùṃéℝô" shift, and go to state 2 to "number" shift, and go to state 1 "Ñùṃéℝô" shift, and go to state 2 * src/print.c: Use mbswidth, not strlen, to compute visual columns. * tests/report.at: Adjust. --- NEWS | 20 +++++++++++++++++++- src/print.c | 13 +++++++------ tests/report.at | 27 ++++++++++++++++----------- 3 files changed, 42 insertions(+), 18 deletions(-) diff --git a/NEWS b/NEWS index 1cec2252..1584d49e 100644 --- a/NEWS +++ b/NEWS @@ -27,6 +27,23 @@ GNU Bison NEWS header. This is disabled when the generated header is `y.tab.h`, to comply with Automake's ylwrap. +*** String aliases are faithfully propagated + + Bison used to interpret user strings (i.e., decoding backslash escapes) + when reading them, and to escape them (i.e., issue non-printable + characters as backslash escapes, taking the locale into account) when + outputting them. As a consequence non-ASCII strings (say in UTF-8) ended + up "ciphered" as sequences of backslash escapes. This happened not only + in the generated sources (where the compiler will reinterpret them), but + also in all the generated reports (text, xml, html, dot, etc.). Reports + were therefore not readable when string aliases were not pure ASCII. + Worse yet: the output depended on the user's locale. + + Now Bison faithfully treats the string aliases exactly the way the user + spelled them. This fixes all the aforementioned problems. However, now, + string aliases semantically equivalent but syntactically different (e.g., + "A", "\x41", "\101") are considered to be different. + ** New features *** File prefix mapping @@ -4205,7 +4222,8 @@ along with this program. If not, see . LocalWords: yysymbol yytnamerr yyreport ctx ARGMAX yysyntax stderr LPAREN LocalWords: symrec yypcontext TOKENMAX yyexpected YYEMPTY yypstate YYEOF LocalWords: autocompletion bistromathic submessages Cayuela lexcalc hoc - LocalWords: yytoken YYUNDEF YYerror + LocalWords: yytoken YYUNDEF YYerror basename Automake's UTF ifdef ffile + LocalWords: gotos readline Local Variables: ispell-dictionary: "american" diff --git a/src/print.c b/src/print.c index 1da0f9dd..91b44cb3 100644 --- a/src/print.c +++ b/src/print.c @@ -22,6 +22,7 @@ #include "system.h" #include +#include #include "closure.h" #include "conflicts.h" @@ -49,7 +50,7 @@ static bitset no_reduce_set; static void max_length (size_t *width, const char *str) { - size_t len = strlen (str); + size_t len = mbswidth (str, 0); if (len > *width) *width = len; } @@ -130,7 +131,7 @@ print_transitions (state *s, FILE *out, bool display_transitions_p) state *s1 = trans->states[i]; fprintf (out, " %s", tag); - for (int j = width - strlen (tag); j > 0; --j) + for (int j = width - mbswidth (tag, 0); j > 0; --j) fputc (' ', out); if (display_transitions_p) fprintf (out, _("shift, and go to state %d\n"), s1->number); @@ -168,7 +169,7 @@ print_errs (FILE *out, state *s) { const char *tag = errp->symbols[i]->tag; fprintf (out, " %s", tag); - for (int j = width - strlen (tag); j > 0; --j) + for (int j = width - mbswidth (tag, 0); j > 0; --j) fputc (' ', out); fputs (_("error (nonassociative)\n"), out); } @@ -187,7 +188,7 @@ print_reduction (FILE *out, size_t width, rule *r, bool enabled) { fprintf (out, " %s", lookahead_token); - for (int j = width - strlen (lookahead_token); j > 0; --j) + for (int j = width - mbswidth (lookahead_token, 0); j > 0; --j) fputc (' ', out); if (!enabled) fputc ('[', out); @@ -232,7 +233,7 @@ print_reductions (FILE *out, state *s) /* Compute the width of the lookahead token column. */ size_t width = 0; if (default_reduction) - width = strlen (_("$default")); + width = mbswidth (_("$default"), 0); if (reds->lookahead_tokens) for (int i = 0; i < ntokens; i++) @@ -404,7 +405,7 @@ print_nonterminal_symbols (FILE *out) break; } - int column = 4 + strlen (tag); + int column = 4 + mbswidth (tag, 0); fprintf (out, "%4s%s", "", tag); if (symbols[i]->content->type_name) column += fprintf (out, " <%s>", diff --git a/tests/report.at b/tests/report.at index 27d4e7b8..21708070 100644 --- a/tests/report.at +++ b/tests/report.at @@ -1150,6 +1150,11 @@ AT_SETUP([Reports with conflicts]) AT_KEYWORDS([report]) +# We need UTF-8 support for correct screen-width computation of UTF-8 +# characters. Skip the test if not available. +locale=`locale -a | $EGREP '^en_US\.(UTF-8|utf8)$' | sed 1q` +AT_SKIP_IF([test x == x"$locale"]) + AT_BISON_OPTION_PUSHDEFS AT_DATA([input.y], [[%left "+" @@ -1162,7 +1167,7 @@ exp | "Ñùṃéℝô" ]]) -AT_BISON_CHECK([-o input.cc -rall --graph=input.gv --xml input.y], [], [], +AT_CHECK([LC_ALL="$locale" $5 bison -fno-caret -o input.cc -rall --graph=input.gv --xml input.y], [], [], [[input.y: warning: 3 shift/reduce conflicts [-Wconflicts-sr] input.y: warning: 3 reduce/reduce conflicts [-Wconflicts-rr] input.y: warning: rerun with option '-Wcounterexamples' to generate conflict counterexamples [-Wother] @@ -1219,7 +1224,7 @@ State 0 4 | . "number" 5 | . "Ñùṃéℝô" - "number" shift, and go to state 1 + "number" shift, and go to state 1 "Ñùṃéℝô" shift, and go to state 2 exp go to state 3 @@ -1246,9 +1251,9 @@ State 3 2 | exp . "+" exp 3 | exp . "+" exp - $end shift, and go to state 4 - "+" shift, and go to state 5 - "⊕" shift, and go to state 6 + $end shift, and go to state 4 + "+" shift, and go to state 5 + "⊕" shift, and go to state 6 State 4 @@ -1268,7 +1273,7 @@ State 5 4 | . "number" 5 | . "Ñùṃéℝô" - "number" shift, and go to state 1 + "number" shift, and go to state 1 "Ñùṃéℝô" shift, and go to state 2 exp go to state 7 @@ -1283,7 +1288,7 @@ State 6 4 | . "number" 5 | . "Ñùṃéℝô" - "number" shift, and go to state 1 + "number" shift, and go to state 1 "Ñùṃéℝô" shift, and go to state 2 exp go to state 8 @@ -1303,8 +1308,8 @@ State 7 $end [reduce using rule 3 (exp)] "+" reduce using rule 2 (exp) "+" [reduce using rule 3 (exp)] - "⊕" [reduce using rule 2 (exp)] - "⊕" [reduce using rule 3 (exp)] + "⊕" [reduce using rule 2 (exp)] + "⊕" [reduce using rule 3 (exp)] $default reduce using rule 2 (exp) Conflict between rule 2 and token "+" resolved as reduce (%left "+"). @@ -1317,11 +1322,11 @@ State 8 2 | exp . "+" exp 3 | exp . "+" exp - "+" shift, and go to state 5 + "+" shift, and go to state 5 "⊕" shift, and go to state 6 "+" [reduce using rule 1 (exp)] - "⊕" [reduce using rule 1 (exp)] + "⊕" [reduce using rule 1 (exp)] $default reduce using rule 1 (exp) ]])