mirror of
https://git.savannah.gnu.org/git/bison.git
synced 2026-03-09 12:23:04 +00:00
lalr1.cc: add LAC support
Implement lookahead correction (LAC) for the C++ skeleton. LAC is a mechanism to make sure that we report the correct list of expected tokens if a syntax error occurs. So far, LAC was only supported for the C skeleton "yacc.c". * data/skeletons/lalr1.cc: Add LAC support. * doc/bison.texi: Update.
This commit is contained in:
committed by
Akim Demaille
parent
996abe62d7
commit
72d4ae5306
@@ -20,6 +20,14 @@ m4_include(b4_skeletonsdir/[c++.m4])
|
|||||||
# api.value.type=variant is valid.
|
# api.value.type=variant is valid.
|
||||||
m4_define([b4_value_type_setup_variant])
|
m4_define([b4_value_type_setup_variant])
|
||||||
|
|
||||||
|
# Check the value of %define parse.lac, where LAC stands for lookahead
|
||||||
|
# correction.
|
||||||
|
b4_percent_define_default([[parse.lac]], [[none]])
|
||||||
|
b4_define_flag_if([lac])
|
||||||
|
m4_define([b4_lac_flag],
|
||||||
|
[m4_if(b4_percent_define_get([[parse.lac]]),
|
||||||
|
[none], [[0]], [[1]])])
|
||||||
|
|
||||||
|
|
||||||
# b4_integral_parser_table_declare(TABLE-NAME, CONTENT, COMMENT)
|
# b4_integral_parser_table_declare(TABLE-NAME, CONTENT, COMMENT)
|
||||||
# --------------------------------------------------------------
|
# --------------------------------------------------------------
|
||||||
@@ -220,7 +228,18 @@ m4_define([b4_shared_declarations],
|
|||||||
private:
|
private:
|
||||||
/// This class is not copyable.
|
/// This class is not copyable.
|
||||||
]b4_parser_class[ (const ]b4_parser_class[&);
|
]b4_parser_class[ (const ]b4_parser_class[&);
|
||||||
]b4_parser_class[& operator= (const ]b4_parser_class[&);
|
]b4_parser_class[& operator= (const ]b4_parser_class[&);]b4_lac_if([[
|
||||||
|
|
||||||
|
/// Check the lookahead yytoken.
|
||||||
|
/// \returns true iff the token will be eventually shifted.
|
||||||
|
bool yy_lac_check_ (int yytoken) const;
|
||||||
|
/// Establish the initial context if no initial context currently exists.
|
||||||
|
/// \returns true iff the token will be eventually shifted.
|
||||||
|
bool yy_lac_establish_ (int yytoken);
|
||||||
|
/// Discard any previous initial lookahead context because of event.
|
||||||
|
/// \param event the event which caused the lookahead to be discarded.
|
||||||
|
/// Only used for debbuging output.
|
||||||
|
void yy_lac_discard_ (const char* event);]])[
|
||||||
|
|
||||||
/// State numbers.
|
/// State numbers.
|
||||||
typedef int state_type;
|
typedef int state_type;
|
||||||
@@ -344,7 +363,16 @@ m4_define([b4_shared_declarations],
|
|||||||
typedef stack<stack_symbol_type> stack_type;
|
typedef stack<stack_symbol_type> stack_type;
|
||||||
|
|
||||||
/// The stack.
|
/// The stack.
|
||||||
stack_type yystack_;
|
stack_type yystack_;]b4_lac_if([[
|
||||||
|
/// The stack for LAC.
|
||||||
|
/// Logically, the yy_lac_stack's lifetime is confined to the function
|
||||||
|
/// yy_lac_check_. We just store it as a member of this class to hold
|
||||||
|
/// on to the memory and to avoid frequent reallocations.
|
||||||
|
/// Since yy_lac_check_ is const, this member must be mutable.
|
||||||
|
mutable std::vector<state_type> yylac_stack_;
|
||||||
|
/// Whether an initial LAC context was established.
|
||||||
|
bool yy_lac_established_;
|
||||||
|
]])[
|
||||||
|
|
||||||
/// Push a new state on the stack.
|
/// Push a new state on the stack.
|
||||||
/// \param m a debug message to display
|
/// \param m a debug message to display
|
||||||
@@ -774,7 +802,11 @@ m4_if(b4_prefix, [yy], [],
|
|||||||
stack_symbol_type yyerror_range[3];]])[
|
stack_symbol_type yyerror_range[3];]])[
|
||||||
|
|
||||||
/// The return value of parse ().
|
/// The return value of parse ().
|
||||||
int yyresult;
|
int yyresult;]b4_lac_if([[
|
||||||
|
|
||||||
|
/// Discard the LAC context in case there still is one left from a
|
||||||
|
/// previous invocation.
|
||||||
|
yy_lac_discard_ ("init");]])[
|
||||||
|
|
||||||
#if YY_EXCEPTIONS
|
#if YY_EXCEPTIONS
|
||||||
try
|
try
|
||||||
@@ -843,14 +875,21 @@ b4_dollar_popdef])[]dnl
|
|||||||
to detect an error, take that action. */
|
to detect an error, take that action. */
|
||||||
yyn += yyla.type_get ();
|
yyn += yyla.type_get ();
|
||||||
if (yyn < 0 || yylast_ < yyn || yycheck_[yyn] != yyla.type_get ())
|
if (yyn < 0 || yylast_ < yyn || yycheck_[yyn] != yyla.type_get ())
|
||||||
goto yydefault;
|
{]b4_lac_if([[
|
||||||
|
if (!yy_lac_establish_ (yyla.type_get ()))
|
||||||
|
goto yyerrlab;]])[
|
||||||
|
goto yydefault;
|
||||||
|
}
|
||||||
|
|
||||||
// Reduce or error.
|
// Reduce or error.
|
||||||
yyn = yytable_[yyn];
|
yyn = yytable_[yyn];
|
||||||
if (yyn <= 0)
|
if (yyn <= 0)
|
||||||
{
|
{
|
||||||
if (yy_table_value_is_error_ (yyn))
|
if (yy_table_value_is_error_ (yyn))
|
||||||
goto yyerrlab;
|
goto yyerrlab;]b4_lac_if([[
|
||||||
|
if (!yy_lac_establish_ (yyla.type_get ()))
|
||||||
|
goto yyerrlab;
|
||||||
|
]])[
|
||||||
yyn = -yyn;
|
yyn = -yyn;
|
||||||
goto yyreduce;
|
goto yyreduce;
|
||||||
}
|
}
|
||||||
@@ -860,7 +899,8 @@ b4_dollar_popdef])[]dnl
|
|||||||
--yyerrstatus_;
|
--yyerrstatus_;
|
||||||
|
|
||||||
// Shift the lookahead token.
|
// Shift the lookahead token.
|
||||||
yypush_ ("Shifting", yyn, YY_MOVE (yyla));
|
yypush_ ("Shifting", yyn, YY_MOVE (yyla));]b4_lac_if([[
|
||||||
|
yy_lac_discard_ ("shift");]])[
|
||||||
goto yynewstate;
|
goto yynewstate;
|
||||||
|
|
||||||
|
|
||||||
@@ -1020,7 +1060,8 @@ b4_dollar_popdef])[]dnl
|
|||||||
yyerror_range[2].location = yyla.location;
|
yyerror_range[2].location = yyla.location;
|
||||||
YYLLOC_DEFAULT (error_token.location, yyerror_range, 2);]])[
|
YYLLOC_DEFAULT (error_token.location, yyerror_range, 2);]])[
|
||||||
|
|
||||||
// Shift the error token.
|
// Shift the error token.]b4_lac_if([[
|
||||||
|
yy_lac_discard_ ("error recovery");]])[
|
||||||
error_token.state = yyn;
|
error_token.state = yyn;
|
||||||
yypush_ ("Shifting", YY_MOVE (error_token));
|
yypush_ ("Shifting", YY_MOVE (error_token));
|
||||||
}
|
}
|
||||||
@@ -1085,8 +1126,148 @@ b4_dollar_popdef])[]dnl
|
|||||||
{
|
{
|
||||||
error (]b4_join(b4_locations_if([yyexc.location]),
|
error (]b4_join(b4_locations_if([yyexc.location]),
|
||||||
[[yyexc.what ()]])[);
|
[[yyexc.what ()]])[);
|
||||||
|
}]b4_lac_if([[
|
||||||
|
|
||||||
|
bool
|
||||||
|
]b4_parser_class[::yy_lac_check_ (int yytoken) const
|
||||||
|
{
|
||||||
|
// Logically, the yylac_stack's lifetime is confined to this function.
|
||||||
|
// Clear it, to get rid of potential left-overs from previous call.
|
||||||
|
yylac_stack_.clear ();
|
||||||
|
// Reduce until we encounter a shift and thereby accept the token.
|
||||||
|
#if ]b4_api_PREFIX[DEBUG
|
||||||
|
YYCDEBUG << "LAC: checking lookahead " << yytname_[yytoken] << ':';
|
||||||
|
#endif
|
||||||
|
size_t lac_top = 0;
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
state_type top_state = (yylac_stack_.empty ()
|
||||||
|
? yystack_[lac_top].state
|
||||||
|
: yylac_stack_.back ());
|
||||||
|
int yyrule = yypact_[top_state];
|
||||||
|
if (yy_pact_value_is_default_ (yyrule)
|
||||||
|
|| (yyrule += yytoken) < 0 || yylast_ < yyrule
|
||||||
|
|| yycheck_[yyrule] != yytoken)
|
||||||
|
{
|
||||||
|
// Use the default action.
|
||||||
|
yyrule = yydefact_[top_state];
|
||||||
|
if (yyrule == 0)
|
||||||
|
{
|
||||||
|
YYCDEBUG << " Err\n";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Use the action from yytable.
|
||||||
|
yyrule = yytable_[yyrule];
|
||||||
|
if (yy_table_value_is_error_ (yyrule))
|
||||||
|
{
|
||||||
|
YYCDEBUG << " Err\n";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (0 < yyrule)
|
||||||
|
{
|
||||||
|
YYCDEBUG << " S" << yyrule << '\n';
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
yyrule = -yyrule;
|
||||||
|
}
|
||||||
|
// By now we know we have to simulate a reduce.
|
||||||
|
YYCDEBUG << " R" << yyrule - 1;
|
||||||
|
// Pop the corresponding number of values from the stack.
|
||||||
|
{
|
||||||
|
size_t yylen = yyr2_[yyrule];
|
||||||
|
// First pop from the LAC stack as many tokens as possible.
|
||||||
|
size_t lac_size = yylac_stack_.size ();
|
||||||
|
if (yylen < lac_size)
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < yylen; ++i)
|
||||||
|
yylac_stack_.pop_back ();
|
||||||
|
yylen = 0;
|
||||||
|
}
|
||||||
|
else if (lac_size)
|
||||||
|
{
|
||||||
|
yylac_stack_.clear ();
|
||||||
|
yylen -= lac_size;
|
||||||
|
}
|
||||||
|
// Only aftwerwards look at the main stack.
|
||||||
|
// We simulate popping elements by incrementing lac_top.
|
||||||
|
lac_top += yylen;
|
||||||
|
}
|
||||||
|
// Keep top_state in sync with the updated stack.
|
||||||
|
top_state = (yylac_stack_.empty ()
|
||||||
|
? yystack_[lac_top].state
|
||||||
|
: yylac_stack_.back ());
|
||||||
|
// Push the resulting state of the reduction.
|
||||||
|
state_type state = yy_lr_goto_state_ (top_state, yyr1_[yyrule]);
|
||||||
|
YYCDEBUG << " G" << state;
|
||||||
|
yylac_stack_.push_back (state);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Establish the initial context if no initial context currently exists.
|
||||||
|
bool
|
||||||
|
]b4_parser_class[::yy_lac_establish_ (int yytoken)
|
||||||
|
{
|
||||||
|
/* Establish the initial context for the current lookahead if no initial
|
||||||
|
context is currently established.
|
||||||
|
|
||||||
|
We define a context as a snapshot of the parser stacks. We define
|
||||||
|
the initial context for a lookahead as the context in which the
|
||||||
|
parser initially examines that lookahead in order to select a
|
||||||
|
syntactic action. Thus, if the lookahead eventually proves
|
||||||
|
syntactically unacceptable (possibly in a later context reached via a
|
||||||
|
series of reductions), the initial context can be used to determine
|
||||||
|
the exact set of tokens that would be syntactically acceptable in the
|
||||||
|
lookahead's place. Moreover, it is the context after which any
|
||||||
|
further semantic actions would be erroneous because they would be
|
||||||
|
determined by a syntactically unacceptable token.
|
||||||
|
|
||||||
|
yy_lac_establish_ should be invoked when a reduction is about to be
|
||||||
|
performed in an inconsistent state (which, for the purposes of LAC,
|
||||||
|
includes consistent states that don't know they're consistent because
|
||||||
|
their default reductions have been disabled).
|
||||||
|
|
||||||
|
For parse.lac=full, the implementation of yy_lac_establish_ is as
|
||||||
|
follows. If no initial context is currently established for the
|
||||||
|
current lookahead, then check if that lookahead can eventually be
|
||||||
|
shifted if syntactic actions continue from the current context. */
|
||||||
|
if (!yy_lac_established_)
|
||||||
|
{
|
||||||
|
#if ]b4_api_PREFIX[DEBUG
|
||||||
|
YYCDEBUG << "LAC: initial context established for "
|
||||||
|
<< yytname_[yytoken] << '\n';
|
||||||
|
#endif
|
||||||
|
yy_lac_established_ = true;
|
||||||
|
return yy_lac_check_ (yytoken);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Discard any previous initial lookahead context.
|
||||||
|
void
|
||||||
|
]b4_parser_class[::yy_lac_discard_ (const char* evt)
|
||||||
|
{
|
||||||
|
/* Discard any previous initial lookahead context because of Event,
|
||||||
|
which may be a lookahead change or an invalidation of the currently
|
||||||
|
established initial context for the current lookahead.
|
||||||
|
|
||||||
|
The most common example of a lookahead change is a shift. An example
|
||||||
|
of both cases is syntax error recovery. That is, a syntax error
|
||||||
|
occurs when the lookahead is syntactically erroneous for the
|
||||||
|
currently established initial context, so error recovery manipulates
|
||||||
|
the parser stacks to try to find a new initial context in which the
|
||||||
|
current lookahead is syntactically acceptable. If it fails to find
|
||||||
|
such a context, it discards the lookahead. */
|
||||||
|
if (yy_lac_established_)
|
||||||
|
{
|
||||||
|
YYCDEBUG << "LAC: initial context discarded due to "
|
||||||
|
<< evt << '\n';
|
||||||
|
yy_lac_established_ = false;
|
||||||
|
}
|
||||||
|
}]])[
|
||||||
|
|
||||||
// Generate an error message.
|
// Generate an error message.
|
||||||
std::string
|
std::string
|
||||||
]b4_parser_class[::yysyntax_error_ (]dnl
|
]b4_parser_class[::yysyntax_error_ (]dnl
|
||||||
@@ -1115,24 +1296,40 @@ b4_error_verbose_if([state_type yystate, const symbol_type& yyla],
|
|||||||
a consistent state with a default action. There might have
|
a consistent state with a default action. There might have
|
||||||
been a previous inconsistent state, consistent state with a
|
been a previous inconsistent state, consistent state with a
|
||||||
non-default action, or user semantic action that manipulated
|
non-default action, or user semantic action that manipulated
|
||||||
yyla. (However, yyla is currently not documented for users.)
|
yyla. (However, yyla is currently not documented for users.)]b4_lac_if([[
|
||||||
|
In the first two cases, it might appear that the current syntax
|
||||||
|
error should have been detected in the previous state when
|
||||||
|
yy_lac_check was invoked. However, at that time, there might
|
||||||
|
have been a different syntax error that discarded a different
|
||||||
|
initial context during error recovery, leaving behind the
|
||||||
|
current lookahead.]], [[
|
||||||
- Of course, the expected token list depends on states to have
|
- Of course, the expected token list depends on states to have
|
||||||
correct lookahead information, and it depends on the parser not
|
correct lookahead information, and it depends on the parser not
|
||||||
to perform extra reductions after fetching a lookahead from the
|
to perform extra reductions after fetching a lookahead from the
|
||||||
scanner and before detecting a syntax error. Thus, state
|
scanner and before detecting a syntax error. Thus, state merging
|
||||||
merging (from LALR or IELR) and default reductions corrupt the
|
(from LALR or IELR) and default reductions corrupt the expected
|
||||||
expected token list. However, the list is correct for
|
token list. However, the list is correct for canonical LR with
|
||||||
canonical LR with one exception: it will still contain any
|
one exception: it will still contain any token that will not be
|
||||||
token that will not be accepted due to an error action in a
|
accepted due to an error action in a later state.]])[
|
||||||
later state.
|
|
||||||
*/
|
*/
|
||||||
if (!yyla.empty ())
|
if (!yyla.empty ())
|
||||||
{
|
{
|
||||||
int yytoken = yyla.type_get ();
|
int yytoken = yyla.type_get ();
|
||||||
yyarg[yycount++] = yytname_[yytoken];
|
yyarg[yycount++] = yytname_[yytoken];]b4_lac_if([[
|
||||||
|
|
||||||
|
#if ]b4_api_PREFIX[DEBUG
|
||||||
|
// Execute LAC once. We don't care if it is succesful, we
|
||||||
|
// only do it for the sake of debugging output.
|
||||||
|
if (!yy_lac_established_)
|
||||||
|
yy_lac_check_ (yytoken);
|
||||||
|
#endif]])[
|
||||||
|
|
||||||
int yyn = yypact_[yystate];
|
int yyn = yypact_[yystate];
|
||||||
if (!yy_pact_value_is_default_ (yyn))
|
if (!yy_pact_value_is_default_ (yyn))
|
||||||
{
|
{]b4_lac_if([[
|
||||||
|
for (int yyx = 0; yyx < yyntokens_; ++yyx)
|
||||||
|
if (yyx != yyterror_ && yy_lac_check_(yyx))
|
||||||
|
{]], [[
|
||||||
/* Start YYX at -YYN if negative to avoid negative indexes in
|
/* Start YYX at -YYN if negative to avoid negative indexes in
|
||||||
YYCHECK. In other words, skip the first -YYN actions for
|
YYCHECK. In other words, skip the first -YYN actions for
|
||||||
this state because they are default actions. */
|
this state because they are default actions. */
|
||||||
@@ -1143,7 +1340,7 @@ b4_error_verbose_if([state_type yystate, const symbol_type& yyla],
|
|||||||
for (int yyx = yyxbegin; yyx < yyxend; ++yyx)
|
for (int yyx = yyxbegin; yyx < yyxend; ++yyx)
|
||||||
if (yycheck_[yyx + yyn] == yyx && yyx != yyterror_
|
if (yycheck_[yyx + yyn] == yyx && yyx != yyterror_
|
||||||
&& !yy_table_value_is_error_ (yytable_[yyx + yyn]))
|
&& !yy_table_value_is_error_ (yytable_[yyx + yyn]))
|
||||||
{
|
{]])[
|
||||||
if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM)
|
if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM)
|
||||||
{
|
{
|
||||||
yycount = 1;
|
yycount = 1;
|
||||||
|
|||||||
@@ -8697,7 +8697,7 @@ Enable LAC to improve syntax error handling.
|
|||||||
@item @code{none} (default)
|
@item @code{none} (default)
|
||||||
@item @code{full}
|
@item @code{full}
|
||||||
@end itemize
|
@end itemize
|
||||||
This feature is currently only available for deterministic parsers in C.
|
This feature is currently only available for deterministic parsers in C and C++.
|
||||||
@end deffn
|
@end deffn
|
||||||
|
|
||||||
Conceptually, the LAC mechanism is straight-forward. Whenever the parser
|
Conceptually, the LAC mechanism is straight-forward. Whenever the parser
|
||||||
@@ -11685,7 +11685,7 @@ location (if enabled) being @var{yylval} and @var{yylloc}. Invocations of
|
|||||||
Note that when using variants, the interface for @code{yylex} is the same,
|
Note that when using variants, the interface for @code{yylex} is the same,
|
||||||
but @code{yylval} is handled differently.
|
but @code{yylval} is handled differently.
|
||||||
|
|
||||||
Regular union-based code in Lex scanner typically look like:
|
Regular union-based code in Lex scanner typically looks like:
|
||||||
|
|
||||||
@example
|
@example
|
||||||
[0-9]+ @{
|
[0-9]+ @{
|
||||||
|
|||||||
Reference in New Issue
Block a user