diff --git a/src/location.c b/src/location.c index dbcd67ec..9f929c00 100644 --- a/src/location.c +++ b/src/location.c @@ -421,12 +421,14 @@ location_caret (location loc, const char *style, FILE *out) { /* The last column to highlight. Only the first line of multiline locations are quoted, in which case the ending - column is the end of line. Single point locations (with - equal boundaries) denote the character that they - follow. */ - int col_end + column is the end of line. + + We used to work with byte offsets, and that was much + easier. However, we went back to using (visual) columns to + support truncating of long lines. */ + const int col_end = loc.start.line == loc.end.line - ? loc.end.column + (loc.start.column == loc.end.column) + ? loc.end.column : caret_info.line_len; /* Quote the file (at most the first line in the case of multiline locations). */ @@ -436,24 +438,28 @@ location_caret (location loc, const char *style, FILE *out) expected (maybe the file was changed since the scanner ran), we might reach the end before we actually saw the opening column. */ - bool opened = false; + enum { before, inside, after } state = before; while (!mb_iseof (c) && !mb_iseq (c, '\n')) { - if (caret_info.pos.column == loc.start.column) + // We might have already opened (and even closed!) the + // style and yet have the equality of the columns if we + // just saw zero-width characters. + if (state == before + && caret_info.pos.column == loc.start.column) { begin_use_class (style, out); - opened = true; + state = inside; } if (skip < caret_info.pos.column) mb_putc (c, out); boundary_compute (&caret_info.pos, mb_ptr (c), mb_len (c)); caret_getc (c); - if (opened + if (state == inside && (caret_info.pos.column == col_end || width < caret_info.pos.column - skip)) { end_use_class (style, out); - opened = false; + state = after; } if (width < caret_info.pos.column - skip) { @@ -461,11 +467,11 @@ location_caret (location loc, const char *style, FILE *out) break; } } - // The line is shorter than expected. - if (opened) + if (state == inside) { + // The line is shorter than expected. end_use_class (style, out); - opened = false; + state = after; } putc ('\n', out); } diff --git a/src/location.h b/src/location.h index ccb42e3c..cb3025c6 100644 --- a/src/location.h +++ b/src/location.h @@ -42,16 +42,14 @@ typedef struct /* If positive, the column (starting at 1) just after the boundary. This is neither a byte count, nor a character count; it is a - column count. If this is INT_MAX, the column number has + (visual) column count. If this is INT_MAX, the column number has overflowed. - Meaningless and not displayed if nonpositive. - */ + Meaningless and not displayed if nonpositive. */ int column; - /* If nonnegative, the byte number (starting at 0) in the current line. - Never displayed, used when printing error messages with colors to - know where colors start and end. */ + /* If nonnegative, the byte number (starting at 0) in the current + line. Not displayed (unless --trace=location). */ int byte; } boundary; diff --git a/tests/diagnostics.at b/tests/diagnostics.at index cbf56b77..1471934f 100644 --- a/tests/diagnostics.at +++ b/tests/diagnostics.at @@ -37,15 +37,15 @@ AT_BISON_OPTION_PUSHDEFS AT_DATA_GRAMMAR([[input.y]], [$2]) +AT_DATA([experr], [$4]) + # For some reason, literal ^M in the input are removed and don't end # in `input.y`. So use the two-character ^M represent it, and let # Perl insert real CR characters. -if grep '\^M' input.y >/dev/null; then - AT_PERL_REQUIRE([-pi -e 's{\^M}{\r}gx' input.y]) +if $EGREP ['\^M|\\[0-9][0-9][0-9]'] input.y experr >/dev/null; then + AT_PERL_REQUIRE([-pi -e 's{\^M}{\r}g;s{\\(\d{3}|.)}{$v = $[]1; $v =~ /\A\d+\z/ ? chr($v) : $v}ge' input.y experr]) fi -AT_DATA([experr], [$4]) - AT_CHECK([LC_ALL="$locale" $5 bison -fcaret --color=debug -Wall input.y], [$3], [], [experr]) # When no style, same messages, but without style. @@ -193,6 +193,24 @@ input.y:12.8-10: previous declaration ]]) +## ----------------------- ## +## Zero-width characters. ## +## ----------------------- ## + +# We used to open twice the styling for characters that have a +# zero-width on display (e.g., \005). + +AT_TEST([[Zero-width characters]], +[[%% +exp: an\005error. +]], +[1], +[[input.y:10.8: error: invalid character: '\\005' + 10 | exp: an\005error. + | ^ +]]) + + ## -------------------------------------- ## ## Tabulations and multibyte characters. ## ## -------------------------------------- ##