mirror of
https://git.savannah.gnu.org/git/bison.git
synced 2026-03-09 20:33:03 +00:00
diagnostics: beware of unexpected EOF when quoting the source file
When the input file contains lone CRs (aka, ^M, \r), the locations see a new line. Diagnostics look only at \n as end-of-line, so sometimes there is an offset in diagnostics. Worse yet: sometimes we loop endlessly waiting for \n to come from a continuous stream of EOF. Fix that: - check for EOF - beware not to call end_use_class if begin_use_class was not called (which would abort). This could happen if the actual line is shorter that the expected one. Prompted by a (private) report from Marc Schönefeld. * src/location.c (location_caret): here. * tests/diagnostics.at (Carriage return): New.
This commit is contained in:
@@ -229,7 +229,13 @@ location_caret (location loc, const char *style, FILE *out)
|
||||
|
||||
/* Advance to the line's position, keeping track of the offset. */
|
||||
while (caret_info.line < loc.start.line)
|
||||
caret_info.line += getc (caret_info.source) == '\n';
|
||||
{
|
||||
int c = getc (caret_info.source);
|
||||
if (c == EOF)
|
||||
/* Something is wrong, that line number does not exist. */
|
||||
return;
|
||||
caret_info.line += c == '\n';
|
||||
}
|
||||
caret_info.offset = ftell (caret_info.source);
|
||||
|
||||
/* Read the actual line. Don't update the offset, so that we keep a pointer
|
||||
@@ -238,32 +244,43 @@ location_caret (location loc, const char *style, FILE *out)
|
||||
int c = getc (caret_info.source);
|
||||
if (c != EOF)
|
||||
{
|
||||
bool single_line = loc.start.line == loc.end.line;
|
||||
/* Quote the file (at most the first line in the case of
|
||||
multiline locations). */
|
||||
fprintf (out, "%5d | ", loc.start.line);
|
||||
bool single_line = loc.start.line == loc.end.line;
|
||||
/* Consider that single point location (with equal boundaries)
|
||||
actually denote the character that they follow. */
|
||||
int byte_end = loc.end.byte +
|
||||
(single_line && loc.start.byte == loc.end.byte);
|
||||
/* Byte number. */
|
||||
int byte = 1;
|
||||
while (c != EOF && c != '\n')
|
||||
{
|
||||
if (byte == loc.start.byte)
|
||||
begin_use_class (style, out);
|
||||
fputc (c, out);
|
||||
c = getc (caret_info.source);
|
||||
++byte;
|
||||
if (single_line
|
||||
? byte == byte_end
|
||||
: c == '\n' || c == EOF)
|
||||
end_use_class (style, out);
|
||||
}
|
||||
putc ('\n', out);
|
||||
|
||||
{
|
||||
/* Print the carets with the same indentation as above. */
|
||||
fprintf (out, "%5d | ", loc.start.line);
|
||||
/* Consider that single point location (with equal boundaries)
|
||||
actually denote the character that they follow. */
|
||||
int byte_end = loc.end.byte +
|
||||
(single_line && loc.start.byte == loc.end.byte);
|
||||
/* Byte number. */
|
||||
int byte = 1;
|
||||
/* Whether we opened the style. If the line is not as
|
||||
expected (maybe the file was changed since the scanner
|
||||
ran), we might reach the end before we actually saw the
|
||||
opening column. */
|
||||
bool opened = false;
|
||||
while (c != EOF && c != '\n')
|
||||
{
|
||||
if (byte == loc.start.byte)
|
||||
{
|
||||
begin_use_class (style, out);
|
||||
opened = true;
|
||||
}
|
||||
fputc (c, out);
|
||||
c = getc (caret_info.source);
|
||||
++byte;
|
||||
if (opened
|
||||
&& (single_line
|
||||
? byte == byte_end
|
||||
: c == '\n' || c == EOF))
|
||||
end_use_class (style, out);
|
||||
}
|
||||
putc ('\n', out);
|
||||
}
|
||||
|
||||
/* Print the carets with the same indentation as above. */
|
||||
{
|
||||
fprintf (out, " | %*s", loc.start.column - 1, "");
|
||||
begin_use_class (style, out);
|
||||
putc ('^', out);
|
||||
@@ -275,8 +292,8 @@ location_caret (location loc, const char *style, FILE *out)
|
||||
for (int i = loc.start.column + 1; i < len; ++i)
|
||||
putc ('~', out);
|
||||
end_use_class (style, out);
|
||||
putc ('\n', out);
|
||||
}
|
||||
putc ('\n', out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,17 +35,23 @@ AT_BISON_OPTION_PUSHDEFS
|
||||
|
||||
AT_DATA_GRAMMAR([[input.y]], [$2])
|
||||
|
||||
AT_DATA([experr.orig], [$4])
|
||||
# For some reason, literal ^M in the input are removed and don't end
|
||||
# in `input.y`. So use the two-character ^M represent it, and let
|
||||
# Perl insert real CR characters.
|
||||
AT_CHECK([perl -pi -e 's{\^M}{\r}gx' input.y])
|
||||
|
||||
AT_DATA([experr], [$4])
|
||||
|
||||
AT_CHECK([LC_ALL=en_US.UTF-8 bison -fcaret --color=debug -Wall input.y], [$3], [], [experr])
|
||||
|
||||
# When no style, same messages, but without style.
|
||||
AT_CHECK([perl -p -e 's{</?\w+>}{}g' <experr.orig >experr])
|
||||
AT_CHECK([perl -pi -e 's{(</?\w+>)}{ $[]1 eq "<tag>" ? $[]1 : "" }ge' experr])
|
||||
|
||||
# Cannot use AT_BISON_CHECK easily as we need to change the
|
||||
# environment.
|
||||
# FIXME: Enhance AT_BISON_CHECK.
|
||||
AT_CHECK([LC_ALL=en_US.UTF-8 bison -fcaret -Wall input.y], [$3], [], [experr])
|
||||
|
||||
AT_CHECK([cp experr.orig experr])
|
||||
AT_CHECK([LC_ALL=en_US.UTF-8 bison -fcaret --color=debug -Wall input.y], [$3], [], [experr])
|
||||
|
||||
AT_BISON_OPTION_POPDEFS
|
||||
|
||||
@@ -255,6 +261,24 @@ input.y: <warning>warning:</warning> fix-its can be applied. Rerun with option
|
||||
]])
|
||||
|
||||
|
||||
## ----------------- ##
|
||||
## Carriage return. ##
|
||||
## ----------------- ##
|
||||
|
||||
# Carriage-return used to count as a newline in the scanner, and not
|
||||
# in diagnostics. Resulting in all sort of nice bugs.
|
||||
|
||||
AT_TEST([[Carriage return]],
|
||||
[[^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M
|
||||
%token "
|
||||
%%
|
||||
]],
|
||||
[1],
|
||||
[[input.y:37.8-38.0: <error>error:</error> missing '"' at end of line
|
||||
input.y:37.8-38.0: <error>error:</error> syntax error, unexpected string, expecting char or identifier or <tag>
|
||||
]])
|
||||
|
||||
|
||||
|
||||
m4_popdef([AT_TEST])
|
||||
|
||||
|
||||
Reference in New Issue
Block a user