diagnostics: beware of unexpected EOF when quoting the source file

When the input file contains lone CRs (aka, ^M, \r), the locations see
a new line.  Diagnostics look only at \n as end-of-line, so sometimes
there is an offset in diagnostics.  Worse yet: sometimes we loop
endlessly waiting for \n to come from a continuous stream of EOF.

Fix that:
- check for EOF
- beware not to call end_use_class if begin_use_class was not
  called (which would abort).  This could happen if the actual
  line is shorter that the expected one.

Prompted by a (private) report from Marc Schönefeld.

* src/location.c (location_caret): here.
* tests/diagnostics.at (Carriage return): New.
This commit is contained in:
Akim Demaille
2019-09-09 20:13:04 +02:00
parent 741a58a504
commit d120a07e6b
2 changed files with 70 additions and 29 deletions

View File

@@ -229,7 +229,13 @@ location_caret (location loc, const char *style, FILE *out)
/* Advance to the line's position, keeping track of the offset. */
while (caret_info.line < loc.start.line)
caret_info.line += getc (caret_info.source) == '\n';
{
int c = getc (caret_info.source);
if (c == EOF)
/* Something is wrong, that line number does not exist. */
return;
caret_info.line += c == '\n';
}
caret_info.offset = ftell (caret_info.source);
/* Read the actual line. Don't update the offset, so that we keep a pointer
@@ -238,32 +244,43 @@ location_caret (location loc, const char *style, FILE *out)
int c = getc (caret_info.source);
if (c != EOF)
{
bool single_line = loc.start.line == loc.end.line;
/* Quote the file (at most the first line in the case of
multiline locations). */
fprintf (out, "%5d | ", loc.start.line);
bool single_line = loc.start.line == loc.end.line;
/* Consider that single point location (with equal boundaries)
actually denote the character that they follow. */
int byte_end = loc.end.byte +
(single_line && loc.start.byte == loc.end.byte);
/* Byte number. */
int byte = 1;
while (c != EOF && c != '\n')
{
if (byte == loc.start.byte)
begin_use_class (style, out);
fputc (c, out);
c = getc (caret_info.source);
++byte;
if (single_line
? byte == byte_end
: c == '\n' || c == EOF)
end_use_class (style, out);
}
putc ('\n', out);
{
/* Print the carets with the same indentation as above. */
fprintf (out, "%5d | ", loc.start.line);
/* Consider that single point location (with equal boundaries)
actually denote the character that they follow. */
int byte_end = loc.end.byte +
(single_line && loc.start.byte == loc.end.byte);
/* Byte number. */
int byte = 1;
/* Whether we opened the style. If the line is not as
expected (maybe the file was changed since the scanner
ran), we might reach the end before we actually saw the
opening column. */
bool opened = false;
while (c != EOF && c != '\n')
{
if (byte == loc.start.byte)
{
begin_use_class (style, out);
opened = true;
}
fputc (c, out);
c = getc (caret_info.source);
++byte;
if (opened
&& (single_line
? byte == byte_end
: c == '\n' || c == EOF))
end_use_class (style, out);
}
putc ('\n', out);
}
/* Print the carets with the same indentation as above. */
{
fprintf (out, " | %*s", loc.start.column - 1, "");
begin_use_class (style, out);
putc ('^', out);
@@ -275,8 +292,8 @@ location_caret (location loc, const char *style, FILE *out)
for (int i = loc.start.column + 1; i < len; ++i)
putc ('~', out);
end_use_class (style, out);
putc ('\n', out);
}
putc ('\n', out);
}
}
}

View File

@@ -35,17 +35,23 @@ AT_BISON_OPTION_PUSHDEFS
AT_DATA_GRAMMAR([[input.y]], [$2])
AT_DATA([experr.orig], [$4])
# For some reason, literal ^M in the input are removed and don't end
# in `input.y`. So use the two-character ^M represent it, and let
# Perl insert real CR characters.
AT_CHECK([perl -pi -e 's{\^M}{\r}gx' input.y])
AT_DATA([experr], [$4])
AT_CHECK([LC_ALL=en_US.UTF-8 bison -fcaret --color=debug -Wall input.y], [$3], [], [experr])
# When no style, same messages, but without style.
AT_CHECK([perl -p -e 's{</?\w+>}{}g' <experr.orig >experr])
AT_CHECK([perl -pi -e 's{(</?\w+>)}{ $[]1 eq "<tag>" ? $[]1 : "" }ge' experr])
# Cannot use AT_BISON_CHECK easily as we need to change the
# environment.
# FIXME: Enhance AT_BISON_CHECK.
AT_CHECK([LC_ALL=en_US.UTF-8 bison -fcaret -Wall input.y], [$3], [], [experr])
AT_CHECK([cp experr.orig experr])
AT_CHECK([LC_ALL=en_US.UTF-8 bison -fcaret --color=debug -Wall input.y], [$3], [], [experr])
AT_BISON_OPTION_POPDEFS
@@ -255,6 +261,24 @@ input.y: <warning>warning:</warning> fix-its can be applied. Rerun with option
]])
## ----------------- ##
## Carriage return. ##
## ----------------- ##
# Carriage-return used to count as a newline in the scanner, and not
# in diagnostics. Resulting in all sort of nice bugs.
AT_TEST([[Carriage return]],
[[^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M
%token "
%%
]],
[1],
[[input.y:37.8-38.0: <error>error:</error> missing '"' at end of line
input.y:37.8-38.0: <error>error:</error> syntax error, unexpected string, expecting char or identifier or <tag>
]])
m4_popdef([AT_TEST])