diagnostics: fix styling issues

Single point locations (equal boundaries) are troublesome, and we were
incorrectly ending the style in their case.  Which results in an abort
in libtextstyle.

There is also a confusion between columns as displayed on the
screen (which take into account multibyte characters and tabulations),
and the number of bytes.  Counting the screen-column
incrementally (character by character) is uneasy (because of multibyte
characters), and I don't want to maintain a buffer of the current line
when displaying the diagnostic.  So I believe the simplest solution is
to track the byte number in addition to the screen column.

* src/location.h, src/location.c (boundary): Add the byte-column.
Adjust dependencies.
* src/getargs.c, src/scan-gram.l: Adjust.
* tests/diagnostics.at: Check zero-width locations.
This commit is contained in:
Akim Demaille
2019-04-20 18:25:21 +02:00
parent 520d474ec6
commit 1b70f687fa
8 changed files with 86 additions and 29 deletions

2
gnulib

Submodule gnulib updated: 0d8714b7ce...d6af24178c

View File

@@ -577,7 +577,7 @@ command_line_location (void)
{ {
location res; location res;
/* "<command line>" is used in GCC's messages about -D. */ /* "<command line>" is used in GCC's messages about -D. */
boundary_set (&res.start, uniqstr_new ("<command line>"), optind - 1, -1); boundary_set (&res.start, uniqstr_new ("<command line>"), optind - 1, -1, -1);
res.end = res.start; res.end = res.start;
return res; return res;
} }

View File

@@ -51,6 +51,7 @@ location_compute (location *loc, boundary *cur, char const *token, size_t size)
{ {
int line = cur->line; int line = cur->line;
int column = cur->column; int column = cur->column;
int byte = cur->byte;
char const *p0 = token; char const *p0 = token;
char const *p = token; char const *p = token;
char const *lim = token + size; char const *lim = token + size;
@@ -63,6 +64,7 @@ location_compute (location *loc, boundary *cur, char const *token, size_t size)
case '\n': case '\n':
line += line < INT_MAX; line += line < INT_MAX;
column = 1; column = 1;
byte = 1;
p0 = p + 1; p0 = p + 1;
break; break;
@@ -70,14 +72,17 @@ location_compute (location *loc, boundary *cur, char const *token, size_t size)
column = add_column_width (column, p0, p - p0); column = add_column_width (column, p0, p - p0);
column = add_column_width (column, NULL, 8 - ((column - 1) & 7)); column = add_column_width (column, NULL, 8 - ((column - 1) & 7));
p0 = p + 1; p0 = p + 1;
byte += byte < INT_MAX;
break; break;
default: default:
byte += byte < INT_MAX;
break; break;
} }
cur->line = line; cur->line = line;
cur->column = column = add_column_width (column, p0, p - p0); cur->column = column = add_column_width (column, p0, p - p0);
cur->byte = byte;
loc->end = *cur; loc->end = *cur;
@@ -85,6 +90,8 @@ location_compute (location *loc, boundary *cur, char const *token, size_t size)
complain (loc, Wother, _("line number overflow")); complain (loc, Wother, _("line number overflow"));
if (column == INT_MAX && loc->start.column != INT_MAX) if (column == INT_MAX && loc->start.column != INT_MAX)
complain (loc, Wother, _("column number overflow")); complain (loc, Wother, _("column number overflow"));
if (byte == INT_MAX && loc->start.byte != INT_MAX)
complain (loc, Wother, _("byte number overflow"));
} }
@@ -135,6 +142,7 @@ struct caret_info
{ {
FILE *source; FILE *source;
size_t line; size_t line;
/* Offset in SOURCE where line LINE starts. */
size_t offset; size_t offset;
}; };
@@ -153,8 +161,6 @@ caret_free ()
void void
location_caret (location loc, const char *style, FILE *out) location_caret (location loc, const char *style, FILE *out)
{ {
/* FIXME: find a way to support multifile locations, and only open once each
file. That would make the procedure future-proof. */
if (! (caret_info.source if (! (caret_info.source
|| (caret_info.source = fopen (loc.start.file, "r"))) || (caret_info.source = fopen (loc.start.file, "r")))
|| loc.start.column == -1 || loc.start.line == -1) || loc.start.column == -1 || loc.start.line == -1)
@@ -186,22 +192,25 @@ location_caret (location loc, const char *style, FILE *out)
/* Quote the file (at most first line in the case of multiline /* Quote the file (at most first line in the case of multiline
location). Indent by a single column. */ location). Indent by a single column. */
fputc (' ', out); fputc (' ', out);
int col = 0; bool single_line = loc.start.line == loc.end.line;
do /* Consider that single point location (with equal boundaries)
actually denote the character that they follow. */
int byte_end = loc.end.byte +
(single_line && loc.start.byte == loc.end.byte);
/* Byte number. */
int byte = 1;
while (c != EOF && c != '\n')
{ {
++col; if (byte == loc.start.byte)
if (col == loc.start.column)
begin_use_class (style, out); begin_use_class (style, out);
fputc (c, out); fputc (c, out);
if (loc.start.column == loc.end.column c = getc (caret_info.source);
? col == loc.end.column ++byte;
: col + 1 == loc.end.column) if (single_line
? byte == byte_end
: c == '\n' || c == EOF)
end_use_class (style, out); end_use_class (style, out);
} }
while ((c = getc (caret_info.source)) != EOF && c != '\n');
/* Close the style, in the case of a multiline location. */
if (loc.start.line != loc.end.line)
end_use_class (style, out);
putc ('\n', out); putc ('\n', out);
{ {

View File

@@ -49,15 +49,21 @@ typedef struct
*/ */
int column; int column;
/* If nonnegative, (origin-0) bytes number in the current line.
Never displayed, used when printing error messages with colors to
know where colors start and ends. */
int byte;
} boundary; } boundary;
/* Set the position of \a a. */ /* Set the position of \a p. */
static inline void static inline void
boundary_set (boundary *b, const char *f, int l, int c) boundary_set (boundary *p, const char *f, int l, int c, int b)
{ {
b->file = f; p->file = f;
b->line = l; p->line = l;
b->column = c; p->column = c;
p->byte = b;
} }
/* Return -1, 0, 1, depending whether a is before, equal, or /* Return -1, 0, 1, depending whether a is before, equal, or
@@ -95,7 +101,7 @@ typedef struct
# define GRAM_LTYPE location # define GRAM_LTYPE location
# define EMPTY_LOCATION_INIT {{NULL, 0, 0}, {NULL, 0, 0}} # define EMPTY_LOCATION_INIT {{NULL, 0, 0, 0}, {NULL, 0, 0, 0}}
extern location const empty_location; extern location const empty_location;
/* Set *LOC and adjust scanner cursor to account for token TOKEN of /* Set *LOC and adjust scanner cursor to account for token TOKEN of

View File

@@ -1787,8 +1787,8 @@ YYLTYPE yylloc = yyloc_default;
{ {
/* Bison's grammar can initial empty locations, hence a default /* Bison's grammar can initial empty locations, hence a default
location is needed. */ location is needed. */
boundary_set (&yylloc.start, current_file, 1, 1); boundary_set (&yylloc.start, current_file, 1, 1, 1);
boundary_set (&yylloc.end, current_file, 1, 1); boundary_set (&yylloc.end, current_file, 1, 1, 1);
} }

View File

@@ -130,8 +130,8 @@
{ {
/* Bison's grammar can initial empty locations, hence a default /* Bison's grammar can initial empty locations, hence a default
location is needed. */ location is needed. */
boundary_set (&@$.start, current_file, 1, 1); boundary_set (&@$.start, current_file, 1, 1, 1);
boundary_set (&@$.end, current_file, 1, 1); boundary_set (&@$.end, current_file, 1, 1, 1);
} }
/* Define the tokens together with their human representation. */ /* Define the tokens together with their human representation. */

View File

@@ -71,6 +71,7 @@ static size_t no_cr_read (FILE *, char *, size_t);
#define ROLLBACK_CURRENT_TOKEN \ #define ROLLBACK_CURRENT_TOKEN \
do { \ do { \
scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0); \ scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0); \
scanner_cursor.byte -= yyleng; \
yyless (0); \ yyless (0); \
} while (0) } while (0)
@@ -78,6 +79,7 @@ static size_t no_cr_read (FILE *, char *, size_t);
do { \ do { \
deprecated_directive (loc, yytext, Msg); \ deprecated_directive (loc, yytext, Msg); \
scanner_cursor.column -= mbsnwidth (Msg, strlen (Msg), 0); \ scanner_cursor.column -= mbsnwidth (Msg, strlen (Msg), 0); \
scanner_cursor.byte -= strlen (Msg); \
for (size_t i = strlen (Msg); i != 0; --i) \ for (size_t i = strlen (Msg); i != 0; --i) \
unput (Msg[i - 1]); \ unput (Msg[i - 1]); \
} while (0) } while (0)
@@ -944,7 +946,7 @@ handle_syncline (char *args, location loc)
*strchr (file + 1, '"') = '\0'; *strchr (file + 1, '"') = '\0';
current_file = uniqstr_new (file + 1); current_file = uniqstr_new (file + 1);
} }
boundary_set (&scanner_cursor, current_file, lineno, 1); boundary_set (&scanner_cursor, current_file, lineno, 1, 1);
} }
@@ -965,6 +967,7 @@ unexpected_end (boundary start, char const *msgid, char const *token_end)
/* Adjust scanner cursor so that any later message does not count /* Adjust scanner cursor so that any later message does not count
the characters about to be inserted. */ the characters about to be inserted. */
scanner_cursor.column -= i; scanner_cursor.column -= i;
scanner_cursor.byte -= i;
while (i != 0) while (i != 0)
unput (token_end[--i]); unput (token_end[--i]);

View File

@@ -28,11 +28,11 @@ AT_BISON_OPTION_PUSHDEFS
AT_DATA_GRAMMAR([[input.y]], [$2]) AT_DATA_GRAMMAR([[input.y]], [$2])
AT_DATA([experr], [$3]) AT_DATA([experr], [$3])
AT_BISON_CHECK([-fcaret --style=debug -Wother input.y], [], [], [experr]) AT_BISON_CHECK([-fcaret --style=debug -Wall input.y], [], [], [experr])
# When no style, same messages, except the style. # When no style, same messages, except the style.
AT_CHECK([perl -pi -e 's{</?\w+>}{}g' experr]) AT_CHECK([perl -pi -e 's{</?\w+>}{}g' experr])
AT_BISON_CHECK([-fcaret -Wother input.y], [], [], [experr]) AT_BISON_CHECK([-fcaret -Wall input.y], [], [], [experr])
AT_BISON_OPTION_POPDEFS AT_BISON_OPTION_POPDEFS
@@ -48,7 +48,7 @@ AT_TEST([[Warnings]],
[[%token FOO FOO FOO [[%token FOO FOO FOO
%token FOO FOO FOO %token FOO FOO FOO
%% %%
exp:; exp: %empty;
]], ]],
[[input.y:9.12-14: <warning>warning:</warning> symbol FOO redeclared [<warning>-Wother</warning>] [[input.y:9.12-14: <warning>warning:</warning> symbol FOO redeclared [<warning>-Wother</warning>]
%token FOO <warning>FOO</warning> FOO %token FOO <warning>FOO</warning> FOO
@@ -68,4 +68,43 @@ input.y:10.18-20: <warning>warning:</warning> symbol FOO redeclared [<warning>-W
]]) ]])
## ------------------------ ##
## Single point locations. ##
## ------------------------ ##
# Single point locations (equal boundaries) are troublesome: it's easy
# to mess up the opening/closing of style. They come from the parser,
# rules with empty rhs. Their position is therefore debatable
# (between the previous token and the next one).
AT_TEST([[Single point locations]],
[[%%
exp: a b c d e
a: {}
b:{
};
c:
d
:
e:
]],
[[input.y:11.4-5: <warning>warning:</warning> empty rule without %empty [<warning>-Wempty-rule</warning>]
a: <warning>{}</warning>
<warning>^~</warning>
input.y:12.3-13.1: <warning>warning:</warning> empty rule without %empty [<warning>-Wempty-rule</warning>]
b:<warning>{</warning>
<warning>^</warning>
input.y:14.2: <warning>warning:</warning> empty rule without %empty [<warning>-Wempty-rule</warning>]
c<warning>:</warning>
<warning>^</warning>
input.y:15.2: <warning>warning:</warning> empty rule without %empty [<warning>-Wempty-rule</warning>]
d
<warning>^</warning>
input.y:17.2: <warning>warning:</warning> empty rule without %empty [<warning>-Wempty-rule</warning>]
e<warning>:</warning>
<warning>^</warning>
]])
m4_popdef([AT_TEST]) m4_popdef([AT_TEST])