diff --git a/src/location.c b/src/location.c index 80e71fb8..40fbc04e 100644 --- a/src/location.c +++ b/src/location.c @@ -229,7 +229,13 @@ location_caret (location loc, const char *style, FILE *out) /* Advance to the line's position, keeping track of the offset. */ while (caret_info.line < loc.start.line) - caret_info.line += getc (caret_info.source) == '\n'; + { + int c = getc (caret_info.source); + if (c == EOF) + /* Something is wrong, that line number does not exist. */ + return; + caret_info.line += c == '\n'; + } caret_info.offset = ftell (caret_info.source); /* Read the actual line. Don't update the offset, so that we keep a pointer @@ -238,32 +244,43 @@ location_caret (location loc, const char *style, FILE *out) int c = getc (caret_info.source); if (c != EOF) { + bool single_line = loc.start.line == loc.end.line; /* Quote the file (at most the first line in the case of multiline locations). */ - fprintf (out, "%5d | ", loc.start.line); - bool single_line = loc.start.line == loc.end.line; - /* Consider that single point location (with equal boundaries) - actually denote the character that they follow. */ - int byte_end = loc.end.byte + - (single_line && loc.start.byte == loc.end.byte); - /* Byte number. */ - int byte = 1; - while (c != EOF && c != '\n') - { - if (byte == loc.start.byte) - begin_use_class (style, out); - fputc (c, out); - c = getc (caret_info.source); - ++byte; - if (single_line - ? byte == byte_end - : c == '\n' || c == EOF) - end_use_class (style, out); - } - putc ('\n', out); - { - /* Print the carets with the same indentation as above. */ + fprintf (out, "%5d | ", loc.start.line); + /* Consider that single point location (with equal boundaries) + actually denote the character that they follow. */ + int byte_end = loc.end.byte + + (single_line && loc.start.byte == loc.end.byte); + /* Byte number. */ + int byte = 1; + /* Whether we opened the style. If the line is not as + expected (maybe the file was changed since the scanner + ran), we might reach the end before we actually saw the + opening column. */ + bool opened = false; + while (c != EOF && c != '\n') + { + if (byte == loc.start.byte) + { + begin_use_class (style, out); + opened = true; + } + fputc (c, out); + c = getc (caret_info.source); + ++byte; + if (opened + && (single_line + ? byte == byte_end + : c == '\n' || c == EOF)) + end_use_class (style, out); + } + putc ('\n', out); + } + + /* Print the carets with the same indentation as above. */ + { fprintf (out, " | %*s", loc.start.column - 1, ""); begin_use_class (style, out); putc ('^', out); @@ -275,8 +292,8 @@ location_caret (location loc, const char *style, FILE *out) for (int i = loc.start.column + 1; i < len; ++i) putc ('~', out); end_use_class (style, out); + putc ('\n', out); } - putc ('\n', out); } } } diff --git a/tests/diagnostics.at b/tests/diagnostics.at index 15815db3..d9398dd5 100644 --- a/tests/diagnostics.at +++ b/tests/diagnostics.at @@ -35,17 +35,23 @@ AT_BISON_OPTION_PUSHDEFS AT_DATA_GRAMMAR([[input.y]], [$2]) -AT_DATA([experr.orig], [$4]) +# For some reason, literal ^M in the input are removed and don't end +# in `input.y`. So use the two-character ^M represent it, and let +# Perl insert real CR characters. +AT_CHECK([perl -pi -e 's{\^M}{\r}gx' input.y]) + +AT_DATA([experr], [$4]) + +AT_CHECK([LC_ALL=en_US.UTF-8 bison -fcaret --color=debug -Wall input.y], [$3], [], [experr]) # When no style, same messages, but without style. -AT_CHECK([perl -p -e 's{}{}g' experr]) +AT_CHECK([perl -pi -e 's{()}{ $[]1 eq "" ? $[]1 : "" }ge' experr]) + # Cannot use AT_BISON_CHECK easily as we need to change the # environment. # FIXME: Enhance AT_BISON_CHECK. AT_CHECK([LC_ALL=en_US.UTF-8 bison -fcaret -Wall input.y], [$3], [], [experr]) -AT_CHECK([cp experr.orig experr]) -AT_CHECK([LC_ALL=en_US.UTF-8 bison -fcaret --color=debug -Wall input.y], [$3], [], [experr]) AT_BISON_OPTION_POPDEFS @@ -255,6 +261,24 @@ input.y: warning: fix-its can be applied. Rerun with option ]]) +## ----------------- ## +## Carriage return. ## +## ----------------- ## + +# Carriage-return used to count as a newline in the scanner, and not +# in diagnostics. Resulting in all sort of nice bugs. + +AT_TEST([[Carriage return]], +[[^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M^M +%token " +%% +]], +[1], +[[input.y:37.8-38.0: error: missing '"' at end of line +input.y:37.8-38.0: error: syntax error, unexpected string, expecting char or identifier or +]]) + + m4_popdef([AT_TEST])