diff --git a/src/location.c b/src/location.c
index dbcd67ec..9f929c00 100644
--- a/src/location.c
+++ b/src/location.c
@@ -421,12 +421,14 @@ location_caret (location loc, const char *style, FILE *out)
{
/* The last column to highlight. Only the first line of
multiline locations are quoted, in which case the ending
- column is the end of line. Single point locations (with
- equal boundaries) denote the character that they
- follow. */
- int col_end
+ column is the end of line.
+
+ We used to work with byte offsets, and that was much
+ easier. However, we went back to using (visual) columns to
+ support truncating of long lines. */
+ const int col_end
= loc.start.line == loc.end.line
- ? loc.end.column + (loc.start.column == loc.end.column)
+ ? loc.end.column
: caret_info.line_len;
/* Quote the file (at most the first line in the case of
multiline locations). */
@@ -436,24 +438,28 @@ location_caret (location loc, const char *style, FILE *out)
expected (maybe the file was changed since the scanner
ran), we might reach the end before we actually saw the
opening column. */
- bool opened = false;
+ enum { before, inside, after } state = before;
while (!mb_iseof (c) && !mb_iseq (c, '\n'))
{
- if (caret_info.pos.column == loc.start.column)
+ // We might have already opened (and even closed!) the
+ // style and yet have the equality of the columns if we
+ // just saw zero-width characters.
+ if (state == before
+ && caret_info.pos.column == loc.start.column)
{
begin_use_class (style, out);
- opened = true;
+ state = inside;
}
if (skip < caret_info.pos.column)
mb_putc (c, out);
boundary_compute (&caret_info.pos, mb_ptr (c), mb_len (c));
caret_getc (c);
- if (opened
+ if (state == inside
&& (caret_info.pos.column == col_end
|| width < caret_info.pos.column - skip))
{
end_use_class (style, out);
- opened = false;
+ state = after;
}
if (width < caret_info.pos.column - skip)
{
@@ -461,11 +467,11 @@ location_caret (location loc, const char *style, FILE *out)
break;
}
}
- // The line is shorter than expected.
- if (opened)
+ if (state == inside)
{
+ // The line is shorter than expected.
end_use_class (style, out);
- opened = false;
+ state = after;
}
putc ('\n', out);
}
diff --git a/src/location.h b/src/location.h
index ccb42e3c..cb3025c6 100644
--- a/src/location.h
+++ b/src/location.h
@@ -42,16 +42,14 @@ typedef struct
/* If positive, the column (starting at 1) just after the boundary.
This is neither a byte count, nor a character count; it is a
- column count. If this is INT_MAX, the column number has
+ (visual) column count. If this is INT_MAX, the column number has
overflowed.
- Meaningless and not displayed if nonpositive.
- */
+ Meaningless and not displayed if nonpositive. */
int column;
- /* If nonnegative, the byte number (starting at 0) in the current line.
- Never displayed, used when printing error messages with colors to
- know where colors start and end. */
+ /* If nonnegative, the byte number (starting at 0) in the current
+ line. Not displayed (unless --trace=location). */
int byte;
} boundary;
diff --git a/tests/diagnostics.at b/tests/diagnostics.at
index cbf56b77..1471934f 100644
--- a/tests/diagnostics.at
+++ b/tests/diagnostics.at
@@ -37,15 +37,15 @@ AT_BISON_OPTION_PUSHDEFS
AT_DATA_GRAMMAR([[input.y]], [$2])
+AT_DATA([experr], [$4])
+
# For some reason, literal ^M in the input are removed and don't end
# in `input.y`. So use the two-character ^M represent it, and let
# Perl insert real CR characters.
-if grep '\^M' input.y >/dev/null; then
- AT_PERL_REQUIRE([-pi -e 's{\^M}{\r}gx' input.y])
+if $EGREP ['\^M|\\[0-9][0-9][0-9]'] input.y experr >/dev/null; then
+ AT_PERL_REQUIRE([-pi -e 's{\^M}{\r}g;s{\\(\d{3}|.)}{$v = $[]1; $v =~ /\A\d+\z/ ? chr($v) : $v}ge' input.y experr])
fi
-AT_DATA([experr], [$4])
-
AT_CHECK([LC_ALL="$locale" $5 bison -fcaret --color=debug -Wall input.y], [$3], [], [experr])
# When no style, same messages, but without style.
@@ -193,6 +193,24 @@ input.y:12.8-10: previous declaration
]])
+## ----------------------- ##
+## Zero-width characters. ##
+## ----------------------- ##
+
+# We used to open twice the styling for characters that have a
+# zero-width on display (e.g., \005).
+
+AT_TEST([[Zero-width characters]],
+[[%%
+exp: an\005error.
+]],
+[1],
+[[input.y:10.8: error: invalid character: '\\005'
+ 10 | exp: an\005error.
+ | ^
+]])
+
+
## -------------------------------------- ##
## Tabulations and multibyte characters. ##
## -------------------------------------- ##