diagnostics: learn how to count column number with multibyte chars

So far diagnostics were cheating: in addition to the 'column' field of
locations (based on actual screen width per multibyte characters and
on tabulation expansion), the scanner sets the 'byte' field.
Diagnostics used this byte count to decide where to insert (color)
style.

We want to be able to truncate the quoted lines when there are too
wide to fit the screen.  This requires that the diagnostics learn how
to count columns, the byte-in-boundary trick no longer works.

Bytes are still used for fix-its.

* bootstrap.conf: We need mbfile for mbf_getc.
* src/location.c (caret_info): We need an mbfile.
(caret_set_file): Initialize it.
(caret_getc): Convert to mbfile.
(location_caret): Instead of relying on the byte position to decide
where to insert the color style, count the current column using
boundary_compute.
This commit is contained in:
Akim Demaille
2019-09-16 08:19:35 +02:00
parent 1ef407d923
commit 945b917da2
4 changed files with 44 additions and 28 deletions

View File

@@ -30,7 +30,7 @@ gnulib_modules='
ldexpl ldexpl
libtextstyle-optional libtextstyle-optional
malloc-gnu malloc-gnu
mbswidth mbfile mbswidth
non-recursive-gnulib-prefix-hack non-recursive-gnulib-prefix-hack
obstack obstack
obstack-printf obstack-printf

5
lib/.gitignore vendored
View File

@@ -133,6 +133,7 @@
/isnanf.c /isnanf.c
/isnanl-nolibm.h /isnanl-nolibm.h
/isnanl.c /isnanl.c
/iswblank.c
/itold.c /itold.c
/ldexpl.c /ldexpl.c
/libc-config.h /libc-config.h
@@ -149,6 +150,10 @@
/math.c /math.c
/math.h /math.h
/math.in.h /math.in.h
/mbchar.c
/mbchar.h
/mbfile.c
/mbfile.h
/mbrtowc.c /mbrtowc.c
/mbsinit.c /mbsinit.c
/mbswidth.c /mbswidth.c

3
m4/.gitignore vendored
View File

@@ -68,6 +68,7 @@
/isnand.m4 /isnand.m4
/isnanf.m4 /isnanf.m4
/isnanl.m4 /isnanl.m4
/iswblank.m4
/javacomp.m4 /javacomp.m4
/javaexec.m4 /javaexec.m4
/largefile.m4 /largefile.m4
@@ -92,6 +93,8 @@
/malloc.m4 /malloc.m4
/malloca.m4 /malloca.m4
/math_h.m4 /math_h.m4
/mbchar.m4
/mbfile.m4
/mbrtowc.m4 /mbrtowc.m4
/mbsinit.m4 /mbsinit.m4
/mbstate_t.m4 /mbstate_t.m4

View File

@@ -21,6 +21,7 @@
#include <config.h> #include <config.h>
#include "system.h" #include "system.h"
#include <mbfile.h>
#include <mbswidth.h> #include <mbswidth.h>
#include <quotearg.h> #include <quotearg.h>
#include <stdio.h> /* fileno */ #include <stdio.h> /* fileno */
@@ -167,6 +168,8 @@ static struct
{ {
/* Raw input file. */ /* Raw input file. */
FILE *file; FILE *file;
/* Input file as a stream of multibyte characters. */
mb_file_t mbfile;
/* The position within the last file we quoted. If POS.FILE is non /* The position within the last file we quoted. If POS.FILE is non
NULL, but FILE is NULL, it means this file is special and should NULL, but FILE is NULL, it means this file is special and should
not be quoted. */ not be quoted. */
@@ -201,7 +204,7 @@ caret_set_file (const char *file)
&& buf.st_mode & S_IFREG) && buf.st_mode & S_IFREG)
{ {
caret_info.pos.line = 1; caret_info.pos.line = 1;
caret_info.offset = 0; mbf_init (caret_info.mbfile, caret_info.file);
} }
else else
caret_free (); caret_free ();
@@ -221,22 +224,23 @@ caret_free (void)
} }
/* Getc, but smash \r\n as \n. */ /* Getc, but smash \r\n as \n. */
static int static void
caret_getc (void) caret_getc_internal (mbchar_t *res)
{ {
FILE *f = caret_info.file; mbf_getc (*res, caret_info.mbfile);
int res = getc (f); if (mb_iseq (*res, '\r'))
if (res == '\r')
{ {
int c = getc (f); mbchar_t c;
if (c == '\n') mbf_getc (c, caret_info.mbfile);
res = c; if (mb_iseq (c, '\n'))
mb_copy (res, &c);
else else
ungetc (c, f); mbf_ungetc (c, caret_info.mbfile);
} }
return res;
} }
#define caret_getc(Var) caret_getc_internal(&Var)
void void
location_caret (location loc, const char *style, FILE *out) location_caret (location loc, const char *style, FILE *out)
{ {
@@ -259,19 +263,25 @@ location_caret (location loc, const char *style, FILE *out)
/* Advance to the line's position, keeping track of the offset. */ /* Advance to the line's position, keeping track of the offset. */
while (caret_info.pos.line < loc.start.line) while (caret_info.pos.line < loc.start.line)
{ {
int c = caret_getc (); mbchar_t c;
if (c == EOF) caret_getc (c);
if (mb_iseof (c))
/* Something is wrong, that line number does not exist. */ /* Something is wrong, that line number does not exist. */
return; return;
caret_info.pos.line += c == '\n'; caret_info.pos.line += mb_iseq (c, '\n');
} }
caret_info.offset = ftell (caret_info.file); caret_info.offset = ftell (caret_info.file);
caret_info.pos.column = 1;
/* Reset mbf's internal state.
FIXME: should be done in mbfile. */
caret_info.mbfile.eof_seen = 0;
/* Read the actual line. Don't update the offset, so that we keep a pointer /* Read the actual line. Don't update the offset, so that we keep a pointer
to the start of the line. */ to the start of the line. */
{ {
int c = caret_getc (); mbchar_t c;
if (c != EOF) caret_getc (c);
if (!mb_iseof (c))
{ {
bool single_line = loc.start.line == loc.end.line; bool single_line = loc.start.line == loc.end.line;
/* Quote the file (at most the first line in the case of /* Quote the file (at most the first line in the case of
@@ -280,29 +290,27 @@ location_caret (location loc, const char *style, FILE *out)
fprintf (out, "%5d | ", loc.start.line); fprintf (out, "%5d | ", loc.start.line);
/* Consider that single point location (with equal boundaries) /* Consider that single point location (with equal boundaries)
actually denote the character that they follow. */ actually denote the character that they follow. */
int byte_end = loc.end.byte + int col_end = loc.end.column +
(single_line && loc.start.byte == loc.end.byte); (single_line && loc.start.column == loc.end.column);
/* Byte number. */
int byte = 1;
/* Whether we opened the style. If the line is not as /* Whether we opened the style. If the line is not as
expected (maybe the file was changed since the scanner expected (maybe the file was changed since the scanner
ran), we might reach the end before we actually saw the ran), we might reach the end before we actually saw the
opening column. */ opening column. */
bool opened = false; bool opened = false;
while (c != EOF && c != '\n') while (!mb_iseof (c) && !mb_iseq (c, '\n'))
{ {
if (byte == loc.start.byte) if (caret_info.pos.column == loc.start.column)
{ {
begin_use_class (style, out); begin_use_class (style, out);
opened = true; opened = true;
} }
fputc (c, out); mb_putc (c, out);
c = caret_getc (); boundary_compute (&caret_info.pos, mb_ptr (c), mb_len (c));
++byte; caret_getc (c);
if (opened if (opened
&& (single_line && (single_line
? byte == byte_end ? caret_info.pos.column == col_end
: c == '\n' || c == EOF)) : mb_iseq (c, '\n') || mb_iseof (c)))
end_use_class (style, out); end_use_class (style, out);
} }
putc ('\n', out); putc ('\n', out);