reports: the column width differs from the byte count

From "number" shift, and go to state 1 "Ñùṃéℝô" shift, and go to state 2 to "number" shift, and go to state 1 "Ñùṃéℝô" shift, and go to state 2 * src/print.c: Use mbswidth, not strlen, to compute visual columns. * tests/report.at: Adjust.
2026-06-14 11:42:12 +00:00 · 2020-06-13 11:09:53 +02:00
parent efbcadeca7
commit 251e1b137f
3 changed files with 42 additions and 18 deletions
@@ -27,6 +27,23 @@ GNU Bison NEWS
  header.  This is disabled when the generated header is `y.tab.h`, to
  comply with Automake's ylwrap.
 *** String aliases are faithfully propagated
  Bison used to interpret user strings (i.e., decoding backslash escapes)
  when reading them, and to escape them (i.e., issue non-printable
  characters as backslash escapes, taking the locale into account) when
  outputting them.  As a consequence non-ASCII strings (say in UTF-8) ended
  up "ciphered" as sequences of backslash escapes.  This happened not only
  in the generated sources (where the compiler will reinterpret them), but
  also in all the generated reports (text, xml, html, dot, etc.).  Reports
  were therefore not readable when string aliases were not pure ASCII.
  Worse yet: the output depended on the user's locale.
  Now Bison faithfully treats the string aliases exactly the way the user
  spelled them.  This fixes all the aforementioned problems.  However, now,
  string aliases semantically equivalent but syntactically different (e.g.,
  "A", "\x41", "\101") are considered to be different.
 ** New features
 *** File prefix mapping
@@ -4205,7 +4222,8 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
 LocalWords:  yysymbol yytnamerr yyreport ctx ARGMAX yysyntax stderr LPAREN
 LocalWords:  symrec yypcontext TOKENMAX yyexpected YYEMPTY yypstate YYEOF
 LocalWords:  autocompletion bistromathic submessages Cayuela lexcalc hoc
- LocalWords:  yytoken YYUNDEF YYerror
+ LocalWords:  yytoken YYUNDEF YYerror basename Automake's UTF ifdef ffile
 LocalWords:  gotos readline
 Local Variables:
 ispell-dictionary: "american"
@@ -22,6 +22,7 @@
 #include "system.h"
 #include <bitset.h>
 #include <mbswidth.h>
 #include "closure.h"
 #include "conflicts.h"
@@ -49,7 +50,7 @@ static bitset no_reduce_set;
 static void
 max_length (size_t *width, const char *str)
 {
-  size_t len = strlen (str);
+  size_t len = mbswidth (str, 0);
  if (len > *width)
    *width = len;
 }
@@ -130,7 +131,7 @@ print_transitions (state *s, FILE *out, bool display_transitions_p)
        state *s1 = trans->states[i];
        fprintf (out, "    %s", tag);
-        for (int j = width - strlen (tag); j > 0; --j)
+        for (int j = width - mbswidth (tag, 0); j > 0; --j)
          fputc (' ', out);
        if (display_transitions_p)
          fprintf (out, _("shift, and go to state %d\n"), s1->number);
@@ -168,7 +169,7 @@ print_errs (FILE *out, state *s)
      {
        const char *tag = errp->symbols[i]->tag;
        fprintf (out, "    %s", tag);
-        for (int j = width - strlen (tag); j > 0; --j)
+        for (int j = width - mbswidth (tag, 0); j > 0; --j)
          fputc (' ', out);
        fputs (_("error (nonassociative)\n"), out);
      }
@@ -187,7 +188,7 @@ print_reduction (FILE *out, size_t width,
                 rule *r, bool enabled)
 {
  fprintf (out, "    %s", lookahead_token);
-  for (int j = width - strlen (lookahead_token); j > 0; --j)
+  for (int j = width - mbswidth (lookahead_token, 0); j > 0; --j)
    fputc (' ', out);
  if (!enabled)
    fputc ('[', out);
@@ -232,7 +233,7 @@ print_reductions (FILE *out, state *s)
  /* Compute the width of the lookahead token column.  */
  size_t width = 0;
  if (default_reduction)
-    width = strlen (_("$default"));
+    width = mbswidth (_("$default"), 0);
  if (reds->lookahead_tokens)
    for (int i = 0; i < ntokens; i++)
@@ -404,7 +405,7 @@ print_nonterminal_symbols (FILE *out)
            break;
        }
-      int column = 4 + strlen (tag);
+      int column = 4 + mbswidth (tag, 0);
      fprintf (out, "%4s%s", "", tag);
      if (symbols[i]->content->type_name)
        column += fprintf (out, " <%s>",
@@ -1150,6 +1150,11 @@ AT_SETUP([Reports with conflicts])
 AT_KEYWORDS([report])
 # We need UTF-8 support for correct screen-width computation of UTF-8
 # characters.  Skip the test if not available.
 locale=`locale -a | $EGREP '^en_US\.(UTF-8|utf8)$' | sed 1q`
 AT_SKIP_IF([test x == x"$locale"])
 AT_BISON_OPTION_PUSHDEFS
 AT_DATA([input.y],
 [[%left "+"
@@ -1162,7 +1167,7 @@ exp
 | "Ñùṃéℝô"
 ]])
-AT_BISON_CHECK([-o input.cc -rall --graph=input.gv --xml input.y], [], [],
+AT_CHECK([LC_ALL="$locale" $5 bison -fno-caret -o input.cc -rall --graph=input.gv --xml input.y], [], [],
 [[input.y: warning: 3 shift/reduce conflicts [-Wconflicts-sr]
 input.y: warning: 3 reduce/reduce conflicts [-Wconflicts-rr]
 input.y: warning: rerun with option '-Wcounterexamples' to generate conflict counterexamples [-Wother]
@@ -1219,7 +1224,7 @@ State 0
    4    | . "number"
    5    | . "Ñùṃéℝô"
-    "number"          shift, and go to state 1
+    "number"  shift, and go to state 1
    "Ñùṃéℝô"  shift, and go to state 2
    exp  go to state 3
@@ -1246,9 +1251,9 @@ State 3
    2    | exp . "+" exp
    3    | exp . "+" exp
-    $end   shift, and go to state 4
+    $end  shift, and go to state 4
-    "+"    shift, and go to state 5
+    "+"   shift, and go to state 5
-    "⊕"  shift, and go to state 6
+    "⊕"   shift, and go to state 6
 State 4
@@ -1268,7 +1273,7 @@ State 5
    4    | . "number"
    5    | . "Ñùṃéℝô"
-    "number"          shift, and go to state 1
+    "number"  shift, and go to state 1
    "Ñùṃéℝô"  shift, and go to state 2
    exp  go to state 7
@@ -1283,7 +1288,7 @@ State 6
    4    | . "number"
    5    | . "Ñùṃéℝô"
-    "number"          shift, and go to state 1
+    "number"  shift, and go to state 1
    "Ñùṃéℝô"  shift, and go to state 2
    exp  go to state 8
@@ -1303,8 +1308,8 @@ State 7
    $end      [reduce using rule 3 (exp)]
    "+"       reduce using rule 2 (exp)
    "+"       [reduce using rule 3 (exp)]
-    "⊕"     [reduce using rule 2 (exp)]
+    "⊕"       [reduce using rule 2 (exp)]
-    "⊕"     [reduce using rule 3 (exp)]
+    "⊕"       [reduce using rule 3 (exp)]
    $default  reduce using rule 2 (exp)
    Conflict between rule 2 and token "+" resolved as reduce (%left "+").
@@ -1317,11 +1322,11 @@ State 8
    2    | exp . "+" exp
    3    | exp . "+" exp
-    "+"    shift, and go to state 5
+    "+"  shift, and go to state 5
    "⊕"  shift, and go to state 6
    "+"       [reduce using rule 1 (exp)]
-    "⊕"     [reduce using rule 1 (exp)]
+    "⊕"       [reduce using rule 1 (exp)]
    $default  reduce using rule 1 (exp)
 ]])