diagnostics: fix styling issues

Single point locations (equal boundaries) are troublesome, and we were incorrectly ending the style in their case. Which results in an abort in libtextstyle. There is also a confusion between columns as displayed on the screen (which take into account multibyte characters and tabulations), and the number of bytes. Counting the screen-column incrementally (character by character) is uneasy (because of multibyte characters), and I don't want to maintain a buffer of the current line when displaying the diagnostic. So I believe the simplest solution is to track the byte number in addition to the screen column. * src/location.h, src/location.c (boundary): Add the byte-column. Adjust dependencies. * src/getargs.c, src/scan-gram.l: Adjust. * tests/diagnostics.at: Check zero-width locations.
2026-07-24 23:10:33 +00:00 · 2019-04-23 18:29:10 +02:00
parent 520d474ec6
commit 1b70f687fa
8 changed files with 86 additions and 29 deletions
@@ -577,7 +577,7 @@ command_line_location (void)
 {
  location res;
  /* "<command line>" is used in GCC's messages about -D. */
-  boundary_set (&res.start, uniqstr_new ("<command line>"), optind - 1, -1);
+  boundary_set (&res.start, uniqstr_new ("<command line>"), optind - 1, -1, -1);
  res.end = res.start;
  return res;
 }
@@ -51,6 +51,7 @@ location_compute (location *loc, boundary *cur, char const *token, size_t size)
 {
  int line = cur->line;
  int column = cur->column;
+  int byte = cur->byte;
  char const *p0 = token;
  char const *p = token;
  char const *lim = token + size;
@@ -63,6 +64,7 @@ location_compute (location *loc, boundary *cur, char const *token, size_t size)
      case '\n':
        line += line < INT_MAX;
        column = 1;
+        byte = 1;
        p0 = p + 1;
        break;

@@ -70,14 +72,17 @@ location_compute (location *loc, boundary *cur, char const *token, size_t size)
        column = add_column_width (column, p0, p - p0);
        column = add_column_width (column, NULL, 8 - ((column - 1) & 7));
        p0 = p + 1;
+        byte += byte < INT_MAX;
        break;

      default:
+        byte += byte < INT_MAX;
        break;
      }

  cur->line = line;
  cur->column = column = add_column_width (column, p0, p - p0);
+  cur->byte = byte;

  loc->end = *cur;

@@ -85,6 +90,8 @@ location_compute (location *loc, boundary *cur, char const *token, size_t size)
    complain (loc, Wother, _("line number overflow"));
  if (column == INT_MAX && loc->start.column != INT_MAX)
    complain (loc, Wother, _("column number overflow"));
+  if (byte == INT_MAX && loc->start.byte != INT_MAX)
+    complain (loc, Wother, _("byte number overflow"));
 }


@@ -135,6 +142,7 @@ struct caret_info
 {
  FILE *source;
  size_t line;
+  /* Offset in SOURCE where line LINE starts.  */
  size_t offset;
 };

@@ -153,8 +161,6 @@ caret_free ()
 void
 location_caret (location loc, const char *style, FILE *out)
 {
-  /* FIXME: find a way to support multifile locations, and only open once each
-     file. That would make the procedure future-proof.  */
  if (! (caret_info.source
         || (caret_info.source = fopen (loc.start.file, "r")))
      || loc.start.column == -1 || loc.start.line == -1)
@@ -186,22 +192,25 @@ location_caret (location loc, const char *style, FILE *out)
        /* Quote the file (at most first line in the case of multiline
           location).  Indent by a single column.  */
        fputc (' ', out);
-        int col = 0;
-        do
+        bool single_line = loc.start.line == loc.end.line;
+        /* Consider that single point location (with equal boundaries)
+           actually denote the character that they follow.  */
+        int byte_end = loc.end.byte +
+          (single_line && loc.start.byte == loc.end.byte);
+        /* Byte number.  */
+        int byte = 1;
+        while (c != EOF && c != '\n')
          {
-            ++col;
-            if (col == loc.start.column)
+            if (byte == loc.start.byte)
              begin_use_class (style, out);
            fputc (c, out);
-            if (loc.start.column == loc.end.column
-                ? col == loc.end.column
-                : col + 1 == loc.end.column)
+            c = getc (caret_info.source);
+            ++byte;
+            if (single_line
+                ? byte == byte_end
+                : c == '\n' || c == EOF)
              end_use_class (style, out);
          }
-        while ((c = getc (caret_info.source)) != EOF && c != '\n');
-        /* Close the style, in the case of a multiline location.  */
-        if (loc.start.line != loc.end.line)
-          end_use_class (style, out);
        putc ('\n', out);

        {
@@ -49,15 +49,21 @@ typedef struct
  */
  int column;

+  /* If nonnegative, (origin-0) bytes number in the current line.
+     Never displayed, used when printing error messages with colors to
+     know where colors start and ends.  */
+  int byte;
+
 } boundary;

-/* Set the position of \a a. */
+/* Set the position of \a p. */
 static inline void
-boundary_set (boundary *b, const char *f, int l, int c)
+boundary_set (boundary *p, const char *f, int l, int c, int b)
 {
-  b->file = f;
-  b->line = l;
-  b->column = c;
+  p->file = f;
+  p->line = l;
+  p->column = c;
+  p->byte = b;
 }

 /* Return -1, 0, 1, depending whether a is before, equal, or
@@ -95,7 +101,7 @@ typedef struct

 # define GRAM_LTYPE location

-# define EMPTY_LOCATION_INIT {{NULL, 0, 0}, {NULL, 0, 0}}
+# define EMPTY_LOCATION_INIT {{NULL, 0, 0, 0}, {NULL, 0, 0, 0}}
 extern location const empty_location;

 /* Set *LOC and adjust scanner cursor to account for token TOKEN of
@@ -1787,8 +1787,8 @@ YYLTYPE yylloc = yyloc_default;
 {
  /* Bison's grammar can initial empty locations, hence a default
     location is needed. */
-  boundary_set (&yylloc.start, current_file, 1, 1);
-  boundary_set (&yylloc.end, current_file, 1, 1);
+  boundary_set (&yylloc.start, current_file, 1, 1, 1);
+  boundary_set (&yylloc.end, current_file, 1, 1, 1);
 }


@@ -130,8 +130,8 @@
 {
  /* Bison's grammar can initial empty locations, hence a default
     location is needed. */
-  boundary_set (&@$.start, current_file, 1, 1);
-  boundary_set (&@$.end, current_file, 1, 1);
+  boundary_set (&@$.start, current_file, 1, 1, 1);
+  boundary_set (&@$.end, current_file, 1, 1, 1);
 }

 /* Define the tokens together with their human representation.  */
@@ -71,6 +71,7 @@ static size_t no_cr_read (FILE *, char *, size_t);
 #define ROLLBACK_CURRENT_TOKEN                                  \
  do {                                                          \
    scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);     \
+    scanner_cursor.byte -= yyleng;                              \
    yyless (0);                                                 \
  } while (0)

@@ -78,6 +79,7 @@ static size_t no_cr_read (FILE *, char *, size_t);
  do {                                                          \
    deprecated_directive (loc, yytext, Msg);                    \
    scanner_cursor.column -= mbsnwidth (Msg, strlen (Msg), 0);  \
+    scanner_cursor.byte -= strlen (Msg);                        \
    for (size_t i = strlen (Msg); i != 0; --i)                  \
      unput (Msg[i - 1]);                                       \
  } while (0)
@@ -944,7 +946,7 @@ handle_syncline (char *args, location loc)
      *strchr (file + 1, '"') = '\0';
      current_file = uniqstr_new (file + 1);
    }
-  boundary_set (&scanner_cursor, current_file, lineno, 1);
+  boundary_set (&scanner_cursor, current_file, lineno, 1, 1);
 }


@@ -965,6 +967,7 @@ unexpected_end (boundary start, char const *msgid, char const *token_end)
  /* Adjust scanner cursor so that any later message does not count
     the characters about to be inserted.  */
  scanner_cursor.column -= i;
+  scanner_cursor.byte -= i;

  while (i != 0)
    unput (token_end[--i]);
@@ -28,11 +28,11 @@ AT_BISON_OPTION_PUSHDEFS
 AT_DATA_GRAMMAR([[input.y]], [$2])

 AT_DATA([experr], [$3])
-AT_BISON_CHECK([-fcaret --style=debug -Wother input.y], [], [], [experr])
+AT_BISON_CHECK([-fcaret --style=debug -Wall input.y], [], [], [experr])

 # When no style, same messages, except the style.
 AT_CHECK([perl -pi -e 's{</?\w+>}{}g' experr])
-AT_BISON_CHECK([-fcaret -Wother input.y], [], [], [experr])
+AT_BISON_CHECK([-fcaret -Wall input.y], [], [], [experr])

 AT_BISON_OPTION_POPDEFS

@@ -48,7 +48,7 @@ AT_TEST([[Warnings]],
 [[%token FOO FOO FOO
 %token FOO  FOO  FOO
 %%
-exp:;
+exp: %empty;
 ]],
 [[input.y:9.12-14: <warning>warning:</warning> symbol FOO redeclared [<warning>-Wother</warning>]
 %token FOO <warning>FOO</warning> FOO
@@ -68,4 +68,43 @@ input.y:10.18-20: <warning>warning:</warning> symbol FOO redeclared [<warning>-W
 ]])


+## ------------------------ ##
+## Single point locations.  ##
+## ------------------------ ##
+
+# Single point locations (equal boundaries) are troublesome: it's easy
+# to mess up the opening/closing of style.  They come from the parser,
+# rules with empty rhs.  Their position is therefore debatable
+# (between the previous token and the next one).
+
+AT_TEST([[Single point locations]],
+[[%%
+exp: a b c d e
+a: {}
+b:{
+};
+c:
+d
+:
+e:
+]],
+[[input.y:11.4-5: <warning>warning:</warning> empty rule without %empty [<warning>-Wempty-rule</warning>]
+ a: <warning>{}</warning>
+    <warning>^~</warning>
+input.y:12.3-13.1: <warning>warning:</warning> empty rule without %empty [<warning>-Wempty-rule</warning>]
+ b:<warning>{</warning>
+   <warning>^</warning>
+input.y:14.2: <warning>warning:</warning> empty rule without %empty [<warning>-Wempty-rule</warning>]
+ c<warning>:</warning>
+  <warning>^</warning>
+input.y:15.2: <warning>warning:</warning> empty rule without %empty [<warning>-Wempty-rule</warning>]
+ d
+  <warning>^</warning>
+input.y:17.2: <warning>warning:</warning> empty rule without %empty [<warning>-Wempty-rule</warning>]
+ e<warning>:</warning>
+  <warning>^</warning>
+]])
+
+
+
 m4_popdef([AT_TEST])