mirror of
https://git.savannah.gnu.org/git/bison.git
synced 2026-03-16 15:53:03 +00:00
diagnostics: beware of unexpected EOF when quoting the source file
When the input file contains lone CRs (aka, ^M, \r), the locations see a new line. Diagnostics look only at \n as end-of-line, so sometimes there is an offset in diagnostics. Worse yet: sometimes we loop endlessly waiting for \n to come from a continuous stream of EOF. Fix that: - check for EOF - beware not to call end_use_class if begin_use_class was not called (which would abort). This could happen if the actual line is shorter that the expected one. Prompted by a (private) report from Marc Schönefeld. * src/location.c (location_caret): here. * tests/diagnostics.at (Carriage return): New.
This commit is contained in:
@@ -229,7 +229,13 @@ location_caret (location loc, const char *style, FILE *out)
|
|||||||
|
|
||||||
/* Advance to the line's position, keeping track of the offset. */
|
/* Advance to the line's position, keeping track of the offset. */
|
||||||
while (caret_info.line < loc.start.line)
|
while (caret_info.line < loc.start.line)
|
||||||
caret_info.line += getc (caret_info.source) == '\n';
|
{
|
||||||
|
int c = getc (caret_info.source);
|
||||||
|
if (c == EOF)
|
||||||
|
/* Something is wrong, that line number does not exist. */
|
||||||
|
return;
|
||||||
|
caret_info.line += c == '\n';
|
||||||
|
}
|
||||||
caret_info.offset = ftell (caret_info.source);
|
caret_info.offset = ftell (caret_info.source);
|
||||||
|
|
||||||
/* Read the actual line. Don't update the offset, so that we keep a pointer
|
/* Read the actual line. Don't update the offset, so that we keep a pointer
|
||||||
@@ -238,32 +244,43 @@ location_caret (location loc, const char *style, FILE *out)
|
|||||||
int c = getc (caret_info.source);
|
int c = getc (caret_info.source);
|
||||||
if (c != EOF)
|
if (c != EOF)
|
||||||
{
|
{
|
||||||
|
bool single_line = loc.start.line == loc.end.line;
|
||||||
/* Quote the file (at most the first line in the case of
|
/* Quote the file (at most the first line in the case of
|
||||||
multiline locations). */
|
multiline locations). */
|
||||||
fprintf (out, "%5d | ", loc.start.line);
|
|
||||||
bool single_line = loc.start.line == loc.end.line;
|
|
||||||
/* Consider that single point location (with equal boundaries)
|
|
||||||
actually denote the character that they follow. */
|
|
||||||
int byte_end = loc.end.byte +
|
|
||||||
(single_line && loc.start.byte == loc.end.byte);
|
|
||||||
/* Byte number. */
|
|
||||||
int byte = 1;
|
|
||||||
while (c != EOF && c != '\n')
|
|
||||||
{
|
|
||||||
if (byte == loc.start.byte)
|
|
||||||
begin_use_class (style, out);
|
|
||||||
fputc (c, out);
|
|
||||||
c = getc (caret_info.source);
|
|
||||||
++byte;
|
|
||||||
if (single_line
|
|
||||||
? byte == byte_end
|
|
||||||
: c == '\n' || c == EOF)
|
|
||||||
end_use_class (style, out);
|
|
||||||
}
|
|
||||||
putc ('\n', out);
|
|
||||||
|
|
||||||
{
|
{
|
||||||
/* Print the carets with the same indentation as above. */
|
fprintf (out, "%5d | ", loc.start.line);
|
||||||
|
/* Consider that single point location (with equal boundaries)
|
||||||
|
actually denote the character that they follow. */
|
||||||
|
int byte_end = loc.end.byte +
|
||||||
|
(single_line && loc.start.byte == loc.end.byte);
|
||||||
|
/* Byte number. */
|
||||||
|
int byte = 1;
|
||||||
|
/* Whether we opened the style. If the line is not as
|
||||||
|
expected (maybe the file was changed since the scanner
|
||||||
|
ran), we might reach the end before we actually saw the
|
||||||
|
opening column. */
|
||||||
|
bool opened = false;
|
||||||
|
while (c != EOF && c != '\n')
|
||||||
|
{
|
||||||
|
if (byte == loc.start.byte)
|
||||||
|
{
|
||||||
|
begin_use_class (style, out);
|
||||||
|
opened = true;
|
||||||
|
}
|
||||||
|
fputc (c, out);
|
||||||
|
c = getc (caret_info.source);
|
||||||
|
++byte;
|
||||||
|
if (opened
|
||||||
|
&& (single_line
|
||||||
|
? byte == byte_end
|
||||||
|
: c == '\n' || c == EOF))
|
||||||
|
end_use_class (style, out);
|
||||||
|
}
|
||||||
|
putc ('\n', out);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Print the carets with the same indentation as above. */
|
||||||
|
{
|
||||||
fprintf (out, " | %*s", loc.start.column - 1, "");
|
fprintf (out, " | %*s", loc.start.column - 1, "");
|
||||||
begin_use_class (style, out);
|
begin_use_class (style, out);
|
||||||
putc ('^', out);
|
putc ('^', out);
|
||||||
@@ -275,8 +292,8 @@ location_caret (location loc, const char *style, FILE *out)
|
|||||||
for (int i = loc.start.column + 1; i < len; ++i)
|
for (int i = loc.start.column + 1; i < len; ++i)
|
||||||
putc ('~', out);
|
putc ('~', out);
|
||||||
end_use_class (style, out);
|
end_use_class (style, out);
|
||||||
|
putc ('\n', out);
|
||||||
}
|
}
|
||||||
putc ('\n', out);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -35,17 +35,23 @@ AT_BISON_OPTION_PUSHDEFS
|
|||||||
|
|
||||||
AT_DATA_GRAMMAR([[input.y]], [$2])
|
AT_DATA_GRAMMAR([[input.y]], [$2])
|
||||||
|
|
||||||
AT_DATA([experr.orig], [$4])
|
# For some reason, literal ^M in the input are removed and don't end
|
||||||
|
# in `input.y`. So use the two-character ^M represent it, and let
|
||||||
|
# Perl insert real CR characters.
|
||||||
|
AT_CHECK([perl -pi -e 's{\^M}{\r}gx' input.y])
|
||||||
|
|
||||||
|
AT_DATA([experr], [$4])
|
||||||
|
|
||||||
|
AT_CHECK([LC_ALL=en_US.UTF-8 bison -fcaret --color=debug -Wall input.y], [$3], [], [experr])
|
||||||
|
|
||||||
# When no style, same messages, but without style.
|
# When no style, same messages, but without style.
|
||||||
AT_CHECK([perl -p -e 's{</?\w+>}{}g' <experr.orig >experr])
|
AT_CHECK([perl -pi -e 's{(</?\w+>)}{ $[]1 eq "<tag>" ? $[]1 : "" }ge' experr])
|
||||||
|
|
||||||
# Cannot use AT_BISON_CHECK easily as we need to change the
|
# Cannot use AT_BISON_CHECK easily as we need to change the
|
||||||
# environment.
|
# environment.
|
||||||
# FIXME: Enhance AT_BISON_CHECK.
|
# FIXME: Enhance AT_BISON_CHECK.
|
||||||
AT_CHECK([LC_ALL=en_US.UTF-8 bison -fcaret -Wall input.y], [$3], [], [experr])
|
AT_CHECK([LC_ALL=en_US.UTF-8 bison -fcaret -Wall input.y], [$3], [], [experr])
|
||||||
|
|
||||||
AT_CHECK([cp experr.orig experr])
|
|
||||||
AT_CHECK([LC_ALL=en_US.UTF-8 bison -fcaret --color=debug -Wall input.y], [$3], [], [experr])
|
|
||||||
|
|
||||||
AT_BISON_OPTION_POPDEFS
|
AT_BISON_OPTION_POPDEFS
|
||||||
|
|
||||||
@@ -255,6 +261,24 @@ input.y: <warning>warning:</warning> fix-its can be applied. Rerun with option
|
|||||||
]])
|
]])
|
||||||
|
|
||||||
|
|
||||||
|
## ----------------- ##
|
||||||
|
## Carriage return. ##
|
||||||
|
## ----------------- ##
|
||||||
|
|
||||||
|
# Carriage-return used to count as a newline in the scanner, and not
|
||||||
|
# in diagnostics. Resulting in all sort of nice bugs.
|
||||||
|
|
||||||
|
AT_TEST([[Carriage return]],
|
||||||
|
[[^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M
|
||||||
|
%token "
|
||||||
|
%%
|
||||||
|
]],
|
||||||
|
[1],
|
||||||
|
[[input.y:37.8-38.0: <error>error:</error> missing '"' at end of line
|
||||||
|
input.y:37.8-38.0: <error>error:</error> syntax error, unexpected string, expecting char or identifier or <tag>
|
||||||
|
]])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
m4_popdef([AT_TEST])
|
m4_popdef([AT_TEST])
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user