lalr1, yacc: use the default location as initial error location

Currently lalr1.cc makes an out-of-bound access when trying to read @1
in rules with an empty rhs (i.e., when there is no @1) that raises an
error (YYERROR).

glr.c already gracefully handles this by using @$ as initial location
for the errors.  Let's do that in yacc.c and lalr1.cc.

* data/lalr1.cc, data/yacc.c: Use @$ to initialize the error location.
* tests/actions.at: Check that case.
This commit is contained in:
Akim Demaille
2015-08-11 13:48:57 +02:00
parent 55fb68aabf
commit 476c1cca59
4 changed files with 97 additions and 12 deletions

11
NEWS
View File

@@ -2,6 +2,17 @@ GNU Bison NEWS
* Noteworthy changes in release ?.? (????-??-??) [?]
** Bug fixes
*** Location of errors
In C++ parsers, out-of-bounds errors can happen when a rule with an empty
ride-hand side raises a syntax error. The behavior of the default parser
(yacc.c) in such a condition was undefined.
Now all the parsers match the behavior of glr.c: @$ is used as the
location of the error. This handles gracefully rules with and without
rhs.
* Noteworthy changes in release 3.0.4 (2015-01-23) [stable]

View File

@@ -848,6 +848,7 @@ b4_dollar_popdef])[]dnl
{
slice<stack_symbol_type, stack_type> slice (yystack_, yylen);
YYLLOC_DEFAULT (yylhs.location, slice, yylen);
yyerror_range[1].location = yylhs.location;
}]])[
// Perform the reduction.
@@ -918,8 +919,7 @@ b4_dollar_popdef])[]dnl
YYERROR and the label yyerrorlab therefore never appears in user
code. */
if (false)
goto yyerrorlab;]b4_locations_if([[
yyerror_range[1].location = yystack_[yylen - 1].location;]])[
goto yyerrorlab;
/* Do not reclaim the symbols of the rule whose action triggered
this YYERROR. */
yypop_ (yylen);

View File

@@ -1641,8 +1641,9 @@ yyreduce:
yyval = yyvsp[1-yylen];
]b4_locations_if(
[[ /* Default location. */
YYLLOC_DEFAULT (yyloc, (yylsp - yylen), yylen);]])[
[[ /* Default location. */
YYLLOC_DEFAULT (yyloc, (yylsp - yylen), yylen);
yyerror_range[1] = yyloc;]])[
YY_REDUCE_PRINT (yyn);]b4_lac_if([[
{
int yychar_backup = yychar;
@@ -1782,8 +1783,7 @@ yyerrorlab:
if (/*CONSTCOND*/ 0)
goto yyerrorlab;
]b4_locations_if([[ yyerror_range[1] = yylsp[1-yylen];
]])[ /* Do not reclaim the symbols of the rule whose action triggered
/* Do not reclaim the symbols of the rule whose action triggered
this YYERROR. */
YYPOPSTACK (yylen);
yylen = 0;

View File

@@ -475,7 +475,7 @@ AT_DATA_GRAMMAR([[input.y]],
/* Display the symbol type Symbol. */
#define V(Symbol, Value, Location, Sep) \
fprintf (stderr, #Symbol " (%d@%d-%d)" Sep, Value, RANGE(Location))
fprintf (stderr, #Symbol " (%d@%d-%d)%s", Value, RANGE(Location), Sep)
}
$5
@@ -490,15 +490,17 @@ AT_LALR1_CC_IF([typedef yy::location YYLTYPE;])[
]AT_LALR1_CC_IF([], [AT_YYERROR_DECLARE])
[}
]m4_ifval([$6], [%type <ival> '(' 'x' 'y' ')' ';' thing line input END])[
]m4_ifval([$6],
[%type <ival> '(' 'x' 'y' ')' ';' thing line input
'!' raise check-spontaneous-errors END])[
/* FIXME: This %printer isn't actually tested. */
%printer
{
]AT_LALR1_CC_IF([debug_stream () << $$;],
[fprintf (yyoutput, "%d", $$)])[;
]AT_LALR1_CC_IF([yyo << $$;],
[fprintf (yyo, "%d", $$)])[;
}
input line thing 'x' 'y'
'(' 'x' 'y' ')' ';' thing line input '!' raise check-spontaneous-errors END
%destructor
{ fprintf (stderr, "Freeing nterm input (%d@%d-%d)\n", $$, RANGE (@$)); }
@@ -512,6 +514,14 @@ AT_LALR1_CC_IF([typedef yy::location YYLTYPE;])[
{ fprintf (stderr, "Freeing nterm thing (%d@%d-%d)\n", $$, RANGE (@$)); }
thing
%destructor
{ fprintf (stderr, "Freeing raise thing (%d@%d-%d)\n", $$, RANGE (@$)); }
raise
%destructor
{ fprintf (stderr, "Freeing check-spontaneous-errors thing (%d@%d-%d)\n", $$, RANGE (@$)); }
check-spontaneous-errors
%destructor
{ fprintf (stderr, "Freeing token 'x' (%d@%d-%d)\n", $$, RANGE (@$)); }
'x'
@@ -534,7 +544,7 @@ AT_LALR1_CC_IF([typedef yy::location YYLTYPE;])[
*/
input:
/* Nothing. */
%empty
{
$$ = 0;
V(input, $$, @$, ": /* Nothing */\n");
@@ -547,6 +557,38 @@ input:
V(line, $1, @1, " ");
V(input, $2, @2, "\n");
}
| '!' check-spontaneous-errors
{
$$ = $2;
}
;
check-spontaneous-errors:
raise { abort(); $$ = $1; }
| '(' raise ')' { abort(); $$ = $2; }
| error
{
$$ = 5;
V(check-spontaneous-errors, $$, @$, ": ");
fprintf (stderr, "error (@%d-%d)\n", RANGE(@1));
}
;
raise:
%empty
{
$$ = 4;
V(raise, $$, @$, ": %empty\n");
YYERROR;
}
| '!' '!'
{
$$ = 5;
V(raise, $$, @$, ": ");
V(!, $1, @2, " ");
V(!, $2, @2, "\n");
YYERROR;
}
;
line:
@@ -668,6 +710,38 @@ Freeing nterm input (2@0-29)
Successful parse.
]])
# Check the location of empty reductions raising an error
# -------------------------------------------------------
# Here, the error is after token "!@0-9", so the error is raised from
# @9-9, and the error recovery detects that it starts from @9-9 and
# ends where starts the next token: END@10-19.
#
# So error recovery reports error@9-19.
AT_PARSER_CHECK([./input '!'], 0, [],
[[sending: '!' (0@0-9)
sending: END (1@10-19)
raise (4@9-9): %empty
check-spontaneous-errors (5@9-19): error (@9-19)
Freeing token END (1@10-19)
Freeing nterm input (5@0-19)
Successful parse.
]])
# Check the location of not empty reductions raising an error
# -----------------------------------------------------------
# This time the error is raised from a rule with 2 rhs symbols: @10-29.
# It is recovered @10-29.
AT_PARSER_CHECK([[./input '!!!']], 0, [],
[[sending: '!' (0@0-9)
sending: '!' (1@10-19)
sending: '!' (2@20-29)
raise (5@10-29): ! (1@20-29) ! (2@20-29)
check-spontaneous-errors (5@10-29): error (@10-29)
sending: END (3@30-39)
Freeing token END (3@30-39)
Freeing nterm input (5@0-29)
Successful parse.
]])
# Check locations in error recovery
# ---------------------------------