mirror of
https://git.savannah.gnu.org/git/bison.git
synced 2026-03-21 02:03:03 +00:00
input: stop treating lone CRs as end-of-lines
We used to treat lone CRs (\r, aka ^M) as regular NLs (\n), probably to please Classic MacOS. As of today, it makes more sense to treat \r like a plain white space character. https://lists.gnu.org/archive/html/bison-patches/2019-09/msg00027.html * src/scan-gram.l (no_cr_read): Remove. Instead, use... (eol): this new abbreviation denoting end-of-line. * src/location.c (caret_getc): New. (location_caret): Use it. * tests/diagnostics.at (Carriage return): Adjust expectations. (CR NL): New.
This commit is contained in:
6
NEWS
6
NEWS
@@ -2,6 +2,12 @@ GNU Bison NEWS
|
|||||||
|
|
||||||
* Noteworthy changes in release ?.? (????-??-??) [?]
|
* Noteworthy changes in release ?.? (????-??-??) [?]
|
||||||
|
|
||||||
|
** Backward incompatible changes
|
||||||
|
|
||||||
|
Lone carriage-return characters (aka \r or ^M) in the grammar files are no
|
||||||
|
longer treated as end-of-lines. This changes the diagnostics, and in
|
||||||
|
particular their locations.
|
||||||
|
|
||||||
** Bug fixes
|
** Bug fixes
|
||||||
|
|
||||||
In Java, %define api.prefix was ignored. It now behaves as expected.
|
In Java, %define api.prefix was ignored. It now behaves as expected.
|
||||||
|
|||||||
@@ -169,7 +169,7 @@ static struct
|
|||||||
} caret_info;
|
} caret_info;
|
||||||
|
|
||||||
void
|
void
|
||||||
caret_free ()
|
caret_free (void)
|
||||||
{
|
{
|
||||||
if (caret_info.source)
|
if (caret_info.source)
|
||||||
{
|
{
|
||||||
@@ -178,6 +178,23 @@ caret_free ()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Getc, but smash \r\n as \n. */
|
||||||
|
static int
|
||||||
|
caret_getc (void)
|
||||||
|
{
|
||||||
|
FILE *f = caret_info.source;
|
||||||
|
int res = getc (f);
|
||||||
|
if (res == '\r')
|
||||||
|
{
|
||||||
|
int c = getc (f);
|
||||||
|
if (c == '\n')
|
||||||
|
res = c;
|
||||||
|
else
|
||||||
|
ungetc (c, f);
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
location_caret (location loc, const char *style, FILE *out)
|
location_caret (location loc, const char *style, FILE *out)
|
||||||
{
|
{
|
||||||
@@ -230,7 +247,7 @@ location_caret (location loc, const char *style, FILE *out)
|
|||||||
/* Advance to the line's position, keeping track of the offset. */
|
/* Advance to the line's position, keeping track of the offset. */
|
||||||
while (caret_info.line < loc.start.line)
|
while (caret_info.line < loc.start.line)
|
||||||
{
|
{
|
||||||
int c = getc (caret_info.source);
|
int c = caret_getc ();
|
||||||
if (c == EOF)
|
if (c == EOF)
|
||||||
/* Something is wrong, that line number does not exist. */
|
/* Something is wrong, that line number does not exist. */
|
||||||
return;
|
return;
|
||||||
@@ -241,7 +258,7 @@ location_caret (location loc, const char *style, FILE *out)
|
|||||||
/* Read the actual line. Don't update the offset, so that we keep a pointer
|
/* Read the actual line. Don't update the offset, so that we keep a pointer
|
||||||
to the start of the line. */
|
to the start of the line. */
|
||||||
{
|
{
|
||||||
int c = getc (caret_info.source);
|
int c = caret_getc ();
|
||||||
if (c != EOF)
|
if (c != EOF)
|
||||||
{
|
{
|
||||||
bool single_line = loc.start.line == loc.end.line;
|
bool single_line = loc.start.line == loc.end.line;
|
||||||
@@ -268,7 +285,7 @@ location_caret (location loc, const char *style, FILE *out)
|
|||||||
opened = true;
|
opened = true;
|
||||||
}
|
}
|
||||||
fputc (c, out);
|
fputc (c, out);
|
||||||
c = getc (caret_info.source);
|
c = caret_getc ();
|
||||||
++byte;
|
++byte;
|
||||||
if (opened
|
if (opened
|
||||||
&& (single_line
|
&& (single_line
|
||||||
|
|||||||
@@ -49,9 +49,6 @@ static boundary scanner_cursor;
|
|||||||
|
|
||||||
#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
|
#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
|
||||||
|
|
||||||
static size_t no_cr_read (FILE *, char *, size_t);
|
|
||||||
#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
|
|
||||||
|
|
||||||
/* Report that yytext is an extension, and evaluate to its token type. */
|
/* Report that yytext is an extension, and evaluate to its token type. */
|
||||||
#define BISON_DIRECTIVE(Directive) \
|
#define BISON_DIRECTIVE(Directive) \
|
||||||
(bison_directive (loc, yytext), PERCENT_ ## Directive)
|
(bison_directive (loc, yytext), PERCENT_ ## Directive)
|
||||||
@@ -139,12 +136,14 @@ id {letter}({letter}|[-0-9])*
|
|||||||
int [0-9]+
|
int [0-9]+
|
||||||
xint 0[xX][0-9abcdefABCDEF]+
|
xint 0[xX][0-9abcdefABCDEF]+
|
||||||
|
|
||||||
|
eol \n|\r\n
|
||||||
|
|
||||||
/* UTF-8 Encoded Unicode Code Point, from Flex's documentation. */
|
/* UTF-8 Encoded Unicode Code Point, from Flex's documentation. */
|
||||||
mbchar [\x09\x0A\x0D\x20-\x7E]|[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF]([\x80-\xBF]{2})|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x\90-\xBF]([\x80-\xBF]{2})|[\xF1-\xF3]([\x80-\xBF]{3})|\xF4[\x80-\x8F]([\x80-\xBF]{2})
|
mbchar [\x09\x0A\x0D\x20-\x7E]|[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF]([\x80-\xBF]{2})|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x\90-\xBF]([\x80-\xBF]{2})|[\xF1-\xF3]([\x80-\xBF]{3})|\xF4[\x80-\x8F]([\x80-\xBF]{2})
|
||||||
|
|
||||||
/* Zero or more instances of backslash-newline. Following GCC, allow
|
/* Zero or more instances of backslash-newline. Following GCC, allow
|
||||||
white space between the backslash and the newline. */
|
white space between the backslash and the newline. */
|
||||||
splice (\\[ \f\t\v]*\n)*
|
splice (\\[ \f\t\v]*{eol})*
|
||||||
|
|
||||||
/* An equal sign, with optional leading whitespaces. This is used in some
|
/* An equal sign, with optional leading whitespaces. This is used in some
|
||||||
deprecated constructs. */
|
deprecated constructs. */
|
||||||
@@ -193,7 +192,7 @@ eqopt ({sp}=)?
|
|||||||
"," {
|
"," {
|
||||||
complain (loc, Wother, _("stray ',' treated as white space"));
|
complain (loc, Wother, _("stray ',' treated as white space"));
|
||||||
}
|
}
|
||||||
[ \f\n\t\v] |
|
[ \f\t\v\r]|{eol} |
|
||||||
"//".* continue;
|
"//".* continue;
|
||||||
"/*" {
|
"/*" {
|
||||||
token_start = loc->start;
|
token_start = loc->start;
|
||||||
@@ -201,9 +200,7 @@ eqopt ({sp}=)?
|
|||||||
BEGIN SC_YACC_COMMENT;
|
BEGIN SC_YACC_COMMENT;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* #line directives are not documented, and may be withdrawn or
|
^"#line "{int}(" \"".*"\"")?{eol} {
|
||||||
modified in future versions of Bison. */
|
|
||||||
^"#line "{int}(" \"".*"\"")?"\n" {
|
|
||||||
handle_syncline (yytext + sizeof "#line " - 1, *loc);
|
handle_syncline (yytext + sizeof "#line " - 1, *loc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -329,7 +326,7 @@ eqopt ({sp}=)?
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Semantic predicate. */
|
/* Semantic predicate. */
|
||||||
"%?"[ \f\n\t\v]*"{" {
|
"%?"([ \f\t\v]|{eol})*"{" {
|
||||||
nesting = 0;
|
nesting = 0;
|
||||||
code_start = loc->start;
|
code_start = loc->start;
|
||||||
BEGIN SC_PREDICATE;
|
BEGIN SC_PREDICATE;
|
||||||
@@ -358,7 +355,7 @@ eqopt ({sp}=)?
|
|||||||
BEGIN SC_BRACKETED_ID;
|
BEGIN SC_BRACKETED_ID;
|
||||||
}
|
}
|
||||||
|
|
||||||
[^\[%A-Za-z0-9_<>{}\"\'*;|=/, \f\n\t\v]+|. {
|
[^\[%A-Za-z0-9_<>{}\"\'*;|=/, \f\r\n\t\v]+|. {
|
||||||
complain (loc, complaint, "%s: %s",
|
complain (loc, complaint, "%s: %s",
|
||||||
ngettext ("invalid character", "invalid characters", yyleng),
|
ngettext ("invalid character", "invalid characters", yyleng),
|
||||||
quote_mem (yytext, yyleng));
|
quote_mem (yytext, yyleng));
|
||||||
@@ -457,7 +454,7 @@ eqopt ({sp}=)?
|
|||||||
complain (loc, complaint, _("an identifier expected"));
|
complain (loc, complaint, _("an identifier expected"));
|
||||||
}
|
}
|
||||||
|
|
||||||
[^\].A-Za-z0-9_/ \f\n\t\v]+|. {
|
[^\].A-Za-z0-9_/ \f\r\n\t\v]+|. {
|
||||||
complain (loc, complaint, "%s: %s",
|
complain (loc, complaint, "%s: %s",
|
||||||
ngettext ("invalid character in bracketed name",
|
ngettext ("invalid character in bracketed name",
|
||||||
"invalid characters in bracketed name", yyleng),
|
"invalid characters in bracketed name", yyleng),
|
||||||
@@ -490,7 +487,7 @@ eqopt ({sp}=)?
|
|||||||
<SC_YACC_COMMENT>
|
<SC_YACC_COMMENT>
|
||||||
{
|
{
|
||||||
"*/" BEGIN context_state;
|
"*/" BEGIN context_state;
|
||||||
.|\n continue;
|
.|{eol} continue;
|
||||||
<<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
|
<<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -512,7 +509,7 @@ eqopt ({sp}=)?
|
|||||||
|
|
||||||
<SC_LINE_COMMENT>
|
<SC_LINE_COMMENT>
|
||||||
{
|
{
|
||||||
"\n" STRING_GROW; BEGIN context_state;
|
{eol} STRING_GROW; BEGIN context_state;
|
||||||
{splice} STRING_GROW;
|
{splice} STRING_GROW;
|
||||||
<<EOF>> BEGIN context_state;
|
<<EOF>> BEGIN context_state;
|
||||||
}
|
}
|
||||||
@@ -534,7 +531,7 @@ eqopt ({sp}=)?
|
|||||||
RETURN_VALUE (STRING, last_string);
|
RETURN_VALUE (STRING, last_string);
|
||||||
}
|
}
|
||||||
<<EOF>> unexpected_eof (token_start, "\"");
|
<<EOF>> unexpected_eof (token_start, "\"");
|
||||||
"\n" unexpected_newline (token_start, "\"");
|
{eol} unexpected_newline (token_start, "\"");
|
||||||
}
|
}
|
||||||
|
|
||||||
/*----------------------------------------------------------.
|
/*----------------------------------------------------------.
|
||||||
@@ -563,7 +560,7 @@ eqopt ({sp}=)?
|
|||||||
BEGIN INITIAL;
|
BEGIN INITIAL;
|
||||||
return CHAR;
|
return CHAR;
|
||||||
}
|
}
|
||||||
"\n" unexpected_newline (token_start, "'");
|
{eol} unexpected_newline (token_start, "'");
|
||||||
<<EOF>> unexpected_eof (token_start, "'");
|
<<EOF>> unexpected_eof (token_start, "'");
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -640,7 +637,7 @@ eqopt ({sp}=)?
|
|||||||
else
|
else
|
||||||
obstack_1grow (&obstack_for_string, c);
|
obstack_1grow (&obstack_for_string, c);
|
||||||
}
|
}
|
||||||
\\(.|\n) {
|
\\(.|{eol}) {
|
||||||
char const *p = yytext + 1;
|
char const *p = yytext + 1;
|
||||||
/* Quote only if escaping won't make the character visible. */
|
/* Quote only if escaping won't make the character visible. */
|
||||||
if (c_isspace ((unsigned char) *p) && c_isprint ((unsigned char) *p))
|
if (c_isspace ((unsigned char) *p) && c_isprint ((unsigned char) *p))
|
||||||
@@ -664,14 +661,14 @@ eqopt ({sp}=)?
|
|||||||
<SC_CHARACTER>
|
<SC_CHARACTER>
|
||||||
{
|
{
|
||||||
"'" STRING_GROW; BEGIN context_state;
|
"'" STRING_GROW; BEGIN context_state;
|
||||||
\n unexpected_newline (token_start, "'");
|
{eol} unexpected_newline (token_start, "'");
|
||||||
<<EOF>> unexpected_eof (token_start, "'");
|
<<EOF>> unexpected_eof (token_start, "'");
|
||||||
}
|
}
|
||||||
|
|
||||||
<SC_STRING>
|
<SC_STRING>
|
||||||
{
|
{
|
||||||
"\"" STRING_GROW; BEGIN context_state;
|
"\"" STRING_GROW; BEGIN context_state;
|
||||||
\n unexpected_newline (token_start, "\"");
|
{eol} unexpected_newline (token_start, "\"");
|
||||||
<<EOF>> unexpected_eof (token_start, "\"");
|
<<EOF>> unexpected_eof (token_start, "\"");
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -808,53 +805,6 @@ eqopt ({sp}=)?
|
|||||||
|
|
||||||
%%
|
%%
|
||||||
|
|
||||||
/* Read bytes from FP into buffer BUF of size SIZE. Return the
|
|
||||||
number of bytes read. Remove '\r' from input, treating \r\n
|
|
||||||
and isolated \r as \n. */
|
|
||||||
|
|
||||||
static size_t
|
|
||||||
no_cr_read (FILE *fp, char *buf, size_t size)
|
|
||||||
{
|
|
||||||
size_t bytes_read = fread (buf, 1, size, fp);
|
|
||||||
if (bytes_read)
|
|
||||||
{
|
|
||||||
char *w = memchr (buf, '\r', bytes_read);
|
|
||||||
if (w)
|
|
||||||
{
|
|
||||||
char const *r = ++w;
|
|
||||||
char const *lim = buf + bytes_read;
|
|
||||||
|
|
||||||
for (;;)
|
|
||||||
{
|
|
||||||
/* Found an '\r'. Treat it like '\n', but ignore any
|
|
||||||
'\n' that immediately follows. */
|
|
||||||
w[-1] = '\n';
|
|
||||||
if (r == lim)
|
|
||||||
{
|
|
||||||
int ch = getc (fp);
|
|
||||||
if (ch != '\n' && ungetc (ch, fp) != ch)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
else if (*r == '\n')
|
|
||||||
r++;
|
|
||||||
|
|
||||||
/* Copy until the next '\r'. */
|
|
||||||
do
|
|
||||||
{
|
|
||||||
if (r == lim)
|
|
||||||
return w - buf;
|
|
||||||
}
|
|
||||||
while ((*w++ = *r++) != '\r');
|
|
||||||
}
|
|
||||||
|
|
||||||
return w - buf;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return bytes_read;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*------------------------------------------------------.
|
/*------------------------------------------------------.
|
||||||
| Scan NUMBER for a base-BASE integer at location LOC. |
|
| Scan NUMBER for a base-BASE integer at location LOC. |
|
||||||
|
|||||||
@@ -274,11 +274,43 @@ AT_TEST([[Carriage return]],
|
|||||||
%%
|
%%
|
||||||
]],
|
]],
|
||||||
[1],
|
[1],
|
||||||
[[input.y:37.8-38.0: <error>error:</error> missing '"' at end of line
|
[[input.y:10.8-11.0: <error>error:</error> missing '"' at end of line
|
||||||
input.y:37.8-38.0: <error>error:</error> syntax error, unexpected string, expecting char or identifier or <tag>
|
10 | %token <error>"</error>
|
||||||
|
| <error>^</error>
|
||||||
|
input.y:10.8-11.0: <error>error:</error> syntax error, unexpected string, expecting char or identifier or <tag>
|
||||||
|
10 | %token <error>"</error>
|
||||||
|
| <error>^</error>
|
||||||
]])
|
]])
|
||||||
|
|
||||||
|
|
||||||
|
## ------- ##
|
||||||
|
## CR NL. ##
|
||||||
|
## ------- ##
|
||||||
|
|
||||||
|
# Check Windows EOLs.
|
||||||
|
|
||||||
|
AT_TEST([[CR NL]],
|
||||||
|
[[^M
|
||||||
|
%token ^M FOO^M
|
||||||
|
%token ^M FOO^M
|
||||||
|
%%^M
|
||||||
|
exp:^M
|
||||||
|
]],
|
||||||
|
[0],
|
||||||
|
[[input.y:11.9-11: <warning>warning:</warning> symbol FOO redeclared [<warning>-Wother</warning>]
|
||||||
|
11 | %token
|
||||||
|
<warning>FOO</warning>
|
||||||
|
| <warning>^~~</warning>
|
||||||
|
input.y:10.9-11: previous declaration
|
||||||
|
10 | %token
|
||||||
|
<note>FOO</note>
|
||||||
|
| <note>^~~</note>
|
||||||
|
input.y:13.5: <warning>warning:</warning> empty rule without %empty [<warning>-Wempty-rule</warning>]
|
||||||
|
13 | exp:
|
||||||
|
| <warning>^</warning>
|
||||||
|
input.y: <warning>warning:</warning> fix-its can be applied. Rerun with option '--update'. [<warning>-Wother</warning>]
|
||||||
|
]])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user