input: stop treating lone CRs as end-of-lines

We used to treat lone CRs (\r, aka ^M) as regular NLs (\n), probably
to please Classic MacOS.  As of today, it makes more sense to treat \r
like a plain white space character.

https://lists.gnu.org/archive/html/bison-patches/2019-09/msg00027.html

* src/scan-gram.l (no_cr_read): Remove.  Instead, use...
(eol): this new abbreviation denoting end-of-line.
* src/location.c (caret_getc): New.
(location_caret): Use it.
* tests/diagnostics.at (Carriage return): Adjust expectations.
(CR NL): New.
This commit is contained in:
Akim Demaille
2019-09-10 18:51:25 +02:00
parent 5e4133175d
commit 19da501e06
4 changed files with 76 additions and 71 deletions

6
NEWS
View File

@@ -2,6 +2,12 @@ GNU Bison NEWS
* Noteworthy changes in release ?.? (????-??-??) [?]
** Backward incompatible changes
Lone carriage-return characters (aka \r or ^M) in the grammar files are no
longer treated as end-of-lines. This changes the diagnostics, and in
particular their locations.
** Bug fixes
In Java, %define api.prefix was ignored. It now behaves as expected.

View File

@@ -169,7 +169,7 @@ static struct
} caret_info;
void
caret_free ()
caret_free (void)
{
if (caret_info.source)
{
@@ -178,6 +178,23 @@ caret_free ()
}
}
/* Getc, but smash \r\n as \n. */
static int
caret_getc (void)
{
FILE *f = caret_info.source;
int res = getc (f);
if (res == '\r')
{
int c = getc (f);
if (c == '\n')
res = c;
else
ungetc (c, f);
}
return res;
}
void
location_caret (location loc, const char *style, FILE *out)
{
@@ -230,7 +247,7 @@ location_caret (location loc, const char *style, FILE *out)
/* Advance to the line's position, keeping track of the offset. */
while (caret_info.line < loc.start.line)
{
int c = getc (caret_info.source);
int c = caret_getc ();
if (c == EOF)
/* Something is wrong, that line number does not exist. */
return;
@@ -241,7 +258,7 @@ location_caret (location loc, const char *style, FILE *out)
/* Read the actual line. Don't update the offset, so that we keep a pointer
to the start of the line. */
{
int c = getc (caret_info.source);
int c = caret_getc ();
if (c != EOF)
{
bool single_line = loc.start.line == loc.end.line;
@@ -268,7 +285,7 @@ location_caret (location loc, const char *style, FILE *out)
opened = true;
}
fputc (c, out);
c = getc (caret_info.source);
c = caret_getc ();
++byte;
if (opened
&& (single_line

View File

@@ -49,9 +49,6 @@ static boundary scanner_cursor;
#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
static size_t no_cr_read (FILE *, char *, size_t);
#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
/* Report that yytext is an extension, and evaluate to its token type. */
#define BISON_DIRECTIVE(Directive) \
(bison_directive (loc, yytext), PERCENT_ ## Directive)
@@ -139,12 +136,14 @@ id {letter}({letter}|[-0-9])*
int [0-9]+
xint 0[xX][0-9abcdefABCDEF]+
eol \n|\r\n
/* UTF-8 Encoded Unicode Code Point, from Flex's documentation. */
mbchar [\x09\x0A\x0D\x20-\x7E]|[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF]([\x80-\xBF]{2})|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x\90-\xBF]([\x80-\xBF]{2})|[\xF1-\xF3]([\x80-\xBF]{3})|\xF4[\x80-\x8F]([\x80-\xBF]{2})
/* Zero or more instances of backslash-newline. Following GCC, allow
white space between the backslash and the newline. */
splice (\\[ \f\t\v]*\n)*
splice (\\[ \f\t\v]*{eol})*
/* An equal sign, with optional leading whitespaces. This is used in some
deprecated constructs. */
@@ -193,7 +192,7 @@ eqopt ({sp}=)?
"," {
complain (loc, Wother, _("stray ',' treated as white space"));
}
[ \f\n\t\v] |
[ \f\t\v\r]|{eol} |
"//".* continue;
"/*" {
token_start = loc->start;
@@ -201,9 +200,7 @@ eqopt ({sp}=)?
BEGIN SC_YACC_COMMENT;
}
/* #line directives are not documented, and may be withdrawn or
modified in future versions of Bison. */
^"#line "{int}(" \"".*"\"")?"\n" {
^"#line "{int}(" \"".*"\"")?{eol} {
handle_syncline (yytext + sizeof "#line " - 1, *loc);
}
}
@@ -329,7 +326,7 @@ eqopt ({sp}=)?
}
/* Semantic predicate. */
"%?"[ \f\n\t\v]*"{" {
"%?"([ \f\t\v]|{eol})*"{" {
nesting = 0;
code_start = loc->start;
BEGIN SC_PREDICATE;
@@ -358,7 +355,7 @@ eqopt ({sp}=)?
BEGIN SC_BRACKETED_ID;
}
[^\[%A-Za-z0-9_<>{}\"\'*;|=/, \f\n\t\v]+|. {
[^\[%A-Za-z0-9_<>{}\"\'*;|=/, \f\r\n\t\v]+|. {
complain (loc, complaint, "%s: %s",
ngettext ("invalid character", "invalid characters", yyleng),
quote_mem (yytext, yyleng));
@@ -457,7 +454,7 @@ eqopt ({sp}=)?
complain (loc, complaint, _("an identifier expected"));
}
[^\].A-Za-z0-9_/ \f\n\t\v]+|. {
[^\].A-Za-z0-9_/ \f\r\n\t\v]+|. {
complain (loc, complaint, "%s: %s",
ngettext ("invalid character in bracketed name",
"invalid characters in bracketed name", yyleng),
@@ -490,7 +487,7 @@ eqopt ({sp}=)?
<SC_YACC_COMMENT>
{
"*/" BEGIN context_state;
.|\n continue;
.|{eol} continue;
<<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
}
@@ -512,7 +509,7 @@ eqopt ({sp}=)?
<SC_LINE_COMMENT>
{
"\n" STRING_GROW; BEGIN context_state;
{eol} STRING_GROW; BEGIN context_state;
{splice} STRING_GROW;
<<EOF>> BEGIN context_state;
}
@@ -534,7 +531,7 @@ eqopt ({sp}=)?
RETURN_VALUE (STRING, last_string);
}
<<EOF>> unexpected_eof (token_start, "\"");
"\n" unexpected_newline (token_start, "\"");
{eol} unexpected_newline (token_start, "\"");
}
/*----------------------------------------------------------.
@@ -563,7 +560,7 @@ eqopt ({sp}=)?
BEGIN INITIAL;
return CHAR;
}
"\n" unexpected_newline (token_start, "'");
{eol} unexpected_newline (token_start, "'");
<<EOF>> unexpected_eof (token_start, "'");
}
@@ -640,7 +637,7 @@ eqopt ({sp}=)?
else
obstack_1grow (&obstack_for_string, c);
}
\\(.|\n) {
\\(.|{eol}) {
char const *p = yytext + 1;
/* Quote only if escaping won't make the character visible. */
if (c_isspace ((unsigned char) *p) && c_isprint ((unsigned char) *p))
@@ -664,14 +661,14 @@ eqopt ({sp}=)?
<SC_CHARACTER>
{
"'" STRING_GROW; BEGIN context_state;
\n unexpected_newline (token_start, "'");
{eol} unexpected_newline (token_start, "'");
<<EOF>> unexpected_eof (token_start, "'");
}
<SC_STRING>
{
"\"" STRING_GROW; BEGIN context_state;
\n unexpected_newline (token_start, "\"");
{eol} unexpected_newline (token_start, "\"");
<<EOF>> unexpected_eof (token_start, "\"");
}
@@ -808,53 +805,6 @@ eqopt ({sp}=)?
%%
/* Read bytes from FP into buffer BUF of size SIZE. Return the
number of bytes read. Remove '\r' from input, treating \r\n
and isolated \r as \n. */
static size_t
no_cr_read (FILE *fp, char *buf, size_t size)
{
size_t bytes_read = fread (buf, 1, size, fp);
if (bytes_read)
{
char *w = memchr (buf, '\r', bytes_read);
if (w)
{
char const *r = ++w;
char const *lim = buf + bytes_read;
for (;;)
{
/* Found an '\r'. Treat it like '\n', but ignore any
'\n' that immediately follows. */
w[-1] = '\n';
if (r == lim)
{
int ch = getc (fp);
if (ch != '\n' && ungetc (ch, fp) != ch)
break;
}
else if (*r == '\n')
r++;
/* Copy until the next '\r'. */
do
{
if (r == lim)
return w - buf;
}
while ((*w++ = *r++) != '\r');
}
return w - buf;
}
}
return bytes_read;
}
/*------------------------------------------------------.
| Scan NUMBER for a base-BASE integer at location LOC. |

View File

@@ -274,11 +274,43 @@ AT_TEST([[Carriage return]],
%%
]],
[1],
[[input.y:37.8-38.0: <error>error:</error> missing '"' at end of line
input.y:37.8-38.0: <error>error:</error> syntax error, unexpected string, expecting char or identifier or <tag>
[[input.y:10.8-11.0: <error>error:</error> missing '"' at end of line
10 | %token <error>"</error>
| <error>^</error>
input.y:10.8-11.0: <error>error:</error> syntax error, unexpected string, expecting char or identifier or <tag>
10 | %token <error>"</error>
| <error>^</error>
]])
## ------- ##
## CR NL. ##
## ------- ##
# Check Windows EOLs.
AT_TEST([[CR NL]],
[[^M
%token ^M FOO^M
%token ^M FOO^M
%%^M
exp:^M
]],
[0],
[[input.y:11.9-11: <warning>warning:</warning> symbol FOO redeclared [<warning>-Wother</warning>]
11 | %token
<warning>FOO</warning>
| <warning>^~~</warning>
input.y:10.9-11: previous declaration
10 | %token
<note>FOO</note>
| <note>^~~</note>
input.y:13.5: <warning>warning:</warning> empty rule without %empty [<warning>-Wempty-rule</warning>]
13 | exp:
| <warning>^</warning>
input.y: <warning>warning:</warning> fix-its can be applied. Rerun with option '--update'. [<warning>-Wother</warning>]
]])