mirror of
https://git.savannah.gnu.org/git/bison.git
synced 2026-03-09 12:23:04 +00:00
input: stop treating lone CRs as end-of-lines
We used to treat lone CRs (\r, aka ^M) as regular NLs (\n), probably to please Classic MacOS. As of today, it makes more sense to treat \r like a plain white space character. https://lists.gnu.org/archive/html/bison-patches/2019-09/msg00027.html * src/scan-gram.l (no_cr_read): Remove. Instead, use... (eol): this new abbreviation denoting end-of-line. * src/location.c (caret_getc): New. (location_caret): Use it. * tests/diagnostics.at (Carriage return): Adjust expectations. (CR NL): New.
This commit is contained in:
6
NEWS
6
NEWS
@@ -2,6 +2,12 @@ GNU Bison NEWS
|
||||
|
||||
* Noteworthy changes in release ?.? (????-??-??) [?]
|
||||
|
||||
** Backward incompatible changes
|
||||
|
||||
Lone carriage-return characters (aka \r or ^M) in the grammar files are no
|
||||
longer treated as end-of-lines. This changes the diagnostics, and in
|
||||
particular their locations.
|
||||
|
||||
** Bug fixes
|
||||
|
||||
In Java, %define api.prefix was ignored. It now behaves as expected.
|
||||
|
||||
@@ -169,7 +169,7 @@ static struct
|
||||
} caret_info;
|
||||
|
||||
void
|
||||
caret_free ()
|
||||
caret_free (void)
|
||||
{
|
||||
if (caret_info.source)
|
||||
{
|
||||
@@ -178,6 +178,23 @@ caret_free ()
|
||||
}
|
||||
}
|
||||
|
||||
/* Getc, but smash \r\n as \n. */
|
||||
static int
|
||||
caret_getc (void)
|
||||
{
|
||||
FILE *f = caret_info.source;
|
||||
int res = getc (f);
|
||||
if (res == '\r')
|
||||
{
|
||||
int c = getc (f);
|
||||
if (c == '\n')
|
||||
res = c;
|
||||
else
|
||||
ungetc (c, f);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
void
|
||||
location_caret (location loc, const char *style, FILE *out)
|
||||
{
|
||||
@@ -230,7 +247,7 @@ location_caret (location loc, const char *style, FILE *out)
|
||||
/* Advance to the line's position, keeping track of the offset. */
|
||||
while (caret_info.line < loc.start.line)
|
||||
{
|
||||
int c = getc (caret_info.source);
|
||||
int c = caret_getc ();
|
||||
if (c == EOF)
|
||||
/* Something is wrong, that line number does not exist. */
|
||||
return;
|
||||
@@ -241,7 +258,7 @@ location_caret (location loc, const char *style, FILE *out)
|
||||
/* Read the actual line. Don't update the offset, so that we keep a pointer
|
||||
to the start of the line. */
|
||||
{
|
||||
int c = getc (caret_info.source);
|
||||
int c = caret_getc ();
|
||||
if (c != EOF)
|
||||
{
|
||||
bool single_line = loc.start.line == loc.end.line;
|
||||
@@ -268,7 +285,7 @@ location_caret (location loc, const char *style, FILE *out)
|
||||
opened = true;
|
||||
}
|
||||
fputc (c, out);
|
||||
c = getc (caret_info.source);
|
||||
c = caret_getc ();
|
||||
++byte;
|
||||
if (opened
|
||||
&& (single_line
|
||||
|
||||
@@ -49,9 +49,6 @@ static boundary scanner_cursor;
|
||||
|
||||
#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
|
||||
|
||||
static size_t no_cr_read (FILE *, char *, size_t);
|
||||
#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
|
||||
|
||||
/* Report that yytext is an extension, and evaluate to its token type. */
|
||||
#define BISON_DIRECTIVE(Directive) \
|
||||
(bison_directive (loc, yytext), PERCENT_ ## Directive)
|
||||
@@ -139,12 +136,14 @@ id {letter}({letter}|[-0-9])*
|
||||
int [0-9]+
|
||||
xint 0[xX][0-9abcdefABCDEF]+
|
||||
|
||||
eol \n|\r\n
|
||||
|
||||
/* UTF-8 Encoded Unicode Code Point, from Flex's documentation. */
|
||||
mbchar [\x09\x0A\x0D\x20-\x7E]|[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF]([\x80-\xBF]{2})|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x\90-\xBF]([\x80-\xBF]{2})|[\xF1-\xF3]([\x80-\xBF]{3})|\xF4[\x80-\x8F]([\x80-\xBF]{2})
|
||||
|
||||
/* Zero or more instances of backslash-newline. Following GCC, allow
|
||||
white space between the backslash and the newline. */
|
||||
splice (\\[ \f\t\v]*\n)*
|
||||
splice (\\[ \f\t\v]*{eol})*
|
||||
|
||||
/* An equal sign, with optional leading whitespaces. This is used in some
|
||||
deprecated constructs. */
|
||||
@@ -193,7 +192,7 @@ eqopt ({sp}=)?
|
||||
"," {
|
||||
complain (loc, Wother, _("stray ',' treated as white space"));
|
||||
}
|
||||
[ \f\n\t\v] |
|
||||
[ \f\t\v\r]|{eol} |
|
||||
"//".* continue;
|
||||
"/*" {
|
||||
token_start = loc->start;
|
||||
@@ -201,9 +200,7 @@ eqopt ({sp}=)?
|
||||
BEGIN SC_YACC_COMMENT;
|
||||
}
|
||||
|
||||
/* #line directives are not documented, and may be withdrawn or
|
||||
modified in future versions of Bison. */
|
||||
^"#line "{int}(" \"".*"\"")?"\n" {
|
||||
^"#line "{int}(" \"".*"\"")?{eol} {
|
||||
handle_syncline (yytext + sizeof "#line " - 1, *loc);
|
||||
}
|
||||
}
|
||||
@@ -329,7 +326,7 @@ eqopt ({sp}=)?
|
||||
}
|
||||
|
||||
/* Semantic predicate. */
|
||||
"%?"[ \f\n\t\v]*"{" {
|
||||
"%?"([ \f\t\v]|{eol})*"{" {
|
||||
nesting = 0;
|
||||
code_start = loc->start;
|
||||
BEGIN SC_PREDICATE;
|
||||
@@ -358,7 +355,7 @@ eqopt ({sp}=)?
|
||||
BEGIN SC_BRACKETED_ID;
|
||||
}
|
||||
|
||||
[^\[%A-Za-z0-9_<>{}\"\'*;|=/, \f\n\t\v]+|. {
|
||||
[^\[%A-Za-z0-9_<>{}\"\'*;|=/, \f\r\n\t\v]+|. {
|
||||
complain (loc, complaint, "%s: %s",
|
||||
ngettext ("invalid character", "invalid characters", yyleng),
|
||||
quote_mem (yytext, yyleng));
|
||||
@@ -457,7 +454,7 @@ eqopt ({sp}=)?
|
||||
complain (loc, complaint, _("an identifier expected"));
|
||||
}
|
||||
|
||||
[^\].A-Za-z0-9_/ \f\n\t\v]+|. {
|
||||
[^\].A-Za-z0-9_/ \f\r\n\t\v]+|. {
|
||||
complain (loc, complaint, "%s: %s",
|
||||
ngettext ("invalid character in bracketed name",
|
||||
"invalid characters in bracketed name", yyleng),
|
||||
@@ -490,7 +487,7 @@ eqopt ({sp}=)?
|
||||
<SC_YACC_COMMENT>
|
||||
{
|
||||
"*/" BEGIN context_state;
|
||||
.|\n continue;
|
||||
.|{eol} continue;
|
||||
<<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
|
||||
}
|
||||
|
||||
@@ -512,7 +509,7 @@ eqopt ({sp}=)?
|
||||
|
||||
<SC_LINE_COMMENT>
|
||||
{
|
||||
"\n" STRING_GROW; BEGIN context_state;
|
||||
{eol} STRING_GROW; BEGIN context_state;
|
||||
{splice} STRING_GROW;
|
||||
<<EOF>> BEGIN context_state;
|
||||
}
|
||||
@@ -534,7 +531,7 @@ eqopt ({sp}=)?
|
||||
RETURN_VALUE (STRING, last_string);
|
||||
}
|
||||
<<EOF>> unexpected_eof (token_start, "\"");
|
||||
"\n" unexpected_newline (token_start, "\"");
|
||||
{eol} unexpected_newline (token_start, "\"");
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------.
|
||||
@@ -563,7 +560,7 @@ eqopt ({sp}=)?
|
||||
BEGIN INITIAL;
|
||||
return CHAR;
|
||||
}
|
||||
"\n" unexpected_newline (token_start, "'");
|
||||
{eol} unexpected_newline (token_start, "'");
|
||||
<<EOF>> unexpected_eof (token_start, "'");
|
||||
}
|
||||
|
||||
@@ -640,7 +637,7 @@ eqopt ({sp}=)?
|
||||
else
|
||||
obstack_1grow (&obstack_for_string, c);
|
||||
}
|
||||
\\(.|\n) {
|
||||
\\(.|{eol}) {
|
||||
char const *p = yytext + 1;
|
||||
/* Quote only if escaping won't make the character visible. */
|
||||
if (c_isspace ((unsigned char) *p) && c_isprint ((unsigned char) *p))
|
||||
@@ -664,14 +661,14 @@ eqopt ({sp}=)?
|
||||
<SC_CHARACTER>
|
||||
{
|
||||
"'" STRING_GROW; BEGIN context_state;
|
||||
\n unexpected_newline (token_start, "'");
|
||||
{eol} unexpected_newline (token_start, "'");
|
||||
<<EOF>> unexpected_eof (token_start, "'");
|
||||
}
|
||||
|
||||
<SC_STRING>
|
||||
{
|
||||
"\"" STRING_GROW; BEGIN context_state;
|
||||
\n unexpected_newline (token_start, "\"");
|
||||
{eol} unexpected_newline (token_start, "\"");
|
||||
<<EOF>> unexpected_eof (token_start, "\"");
|
||||
}
|
||||
|
||||
@@ -808,53 +805,6 @@ eqopt ({sp}=)?
|
||||
|
||||
%%
|
||||
|
||||
/* Read bytes from FP into buffer BUF of size SIZE. Return the
|
||||
number of bytes read. Remove '\r' from input, treating \r\n
|
||||
and isolated \r as \n. */
|
||||
|
||||
static size_t
|
||||
no_cr_read (FILE *fp, char *buf, size_t size)
|
||||
{
|
||||
size_t bytes_read = fread (buf, 1, size, fp);
|
||||
if (bytes_read)
|
||||
{
|
||||
char *w = memchr (buf, '\r', bytes_read);
|
||||
if (w)
|
||||
{
|
||||
char const *r = ++w;
|
||||
char const *lim = buf + bytes_read;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
/* Found an '\r'. Treat it like '\n', but ignore any
|
||||
'\n' that immediately follows. */
|
||||
w[-1] = '\n';
|
||||
if (r == lim)
|
||||
{
|
||||
int ch = getc (fp);
|
||||
if (ch != '\n' && ungetc (ch, fp) != ch)
|
||||
break;
|
||||
}
|
||||
else if (*r == '\n')
|
||||
r++;
|
||||
|
||||
/* Copy until the next '\r'. */
|
||||
do
|
||||
{
|
||||
if (r == lim)
|
||||
return w - buf;
|
||||
}
|
||||
while ((*w++ = *r++) != '\r');
|
||||
}
|
||||
|
||||
return w - buf;
|
||||
}
|
||||
}
|
||||
|
||||
return bytes_read;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*------------------------------------------------------.
|
||||
| Scan NUMBER for a base-BASE integer at location LOC. |
|
||||
|
||||
@@ -274,11 +274,43 @@ AT_TEST([[Carriage return]],
|
||||
%%
|
||||
]],
|
||||
[1],
|
||||
[[input.y:37.8-38.0: <error>error:</error> missing '"' at end of line
|
||||
input.y:37.8-38.0: <error>error:</error> syntax error, unexpected string, expecting char or identifier or <tag>
|
||||
[[input.y:10.8-11.0: <error>error:</error> missing '"' at end of line
|
||||
10 | %token <error>"</error>
|
||||
| <error>^</error>
|
||||
input.y:10.8-11.0: <error>error:</error> syntax error, unexpected string, expecting char or identifier or <tag>
|
||||
10 | %token <error>"</error>
|
||||
| <error>^</error>
|
||||
]])
|
||||
|
||||
|
||||
## ------- ##
|
||||
## CR NL. ##
|
||||
## ------- ##
|
||||
|
||||
# Check Windows EOLs.
|
||||
|
||||
AT_TEST([[CR NL]],
|
||||
[[^M
|
||||
%token ^M FOO^M
|
||||
%token ^M FOO^M
|
||||
%%^M
|
||||
exp:^M
|
||||
]],
|
||||
[0],
|
||||
[[input.y:11.9-11: <warning>warning:</warning> symbol FOO redeclared [<warning>-Wother</warning>]
|
||||
11 | %token
|
||||
<warning>FOO</warning>
|
||||
| <warning>^~~</warning>
|
||||
input.y:10.9-11: previous declaration
|
||||
10 | %token
|
||||
<note>FOO</note>
|
||||
| <note>^~~</note>
|
||||
input.y:13.5: <warning>warning:</warning> empty rule without %empty [<warning>-Wempty-rule</warning>]
|
||||
13 | exp:
|
||||
| <warning>^</warning>
|
||||
input.y: <warning>warning:</warning> fix-its can be applied. Rerun with option '--update'. [<warning>-Wother</warning>]
|
||||
]])
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user