mirror of
https://git.savannah.gnu.org/git/bison.git
synced 2026-03-09 12:23:04 +00:00
lalr1.cc: add LAC support
Implement lookahead correction (LAC) for the C++ skeleton. LAC is a mechanism to make sure that we report the correct list of expected tokens if a syntax error occurs. So far, LAC was only supported for the C skeleton "yacc.c". * data/skeletons/lalr1.cc: Add LAC support. * doc/bison.texi: Update.
This commit is contained in:
committed by
Akim Demaille
parent
996abe62d7
commit
72d4ae5306
@@ -20,6 +20,14 @@ m4_include(b4_skeletonsdir/[c++.m4])
|
||||
# api.value.type=variant is valid.
|
||||
m4_define([b4_value_type_setup_variant])
|
||||
|
||||
# Check the value of %define parse.lac, where LAC stands for lookahead
|
||||
# correction.
|
||||
b4_percent_define_default([[parse.lac]], [[none]])
|
||||
b4_define_flag_if([lac])
|
||||
m4_define([b4_lac_flag],
|
||||
[m4_if(b4_percent_define_get([[parse.lac]]),
|
||||
[none], [[0]], [[1]])])
|
||||
|
||||
|
||||
# b4_integral_parser_table_declare(TABLE-NAME, CONTENT, COMMENT)
|
||||
# --------------------------------------------------------------
|
||||
@@ -220,7 +228,18 @@ m4_define([b4_shared_declarations],
|
||||
private:
|
||||
/// This class is not copyable.
|
||||
]b4_parser_class[ (const ]b4_parser_class[&);
|
||||
]b4_parser_class[& operator= (const ]b4_parser_class[&);
|
||||
]b4_parser_class[& operator= (const ]b4_parser_class[&);]b4_lac_if([[
|
||||
|
||||
/// Check the lookahead yytoken.
|
||||
/// \returns true iff the token will be eventually shifted.
|
||||
bool yy_lac_check_ (int yytoken) const;
|
||||
/// Establish the initial context if no initial context currently exists.
|
||||
/// \returns true iff the token will be eventually shifted.
|
||||
bool yy_lac_establish_ (int yytoken);
|
||||
/// Discard any previous initial lookahead context because of event.
|
||||
/// \param event the event which caused the lookahead to be discarded.
|
||||
/// Only used for debbuging output.
|
||||
void yy_lac_discard_ (const char* event);]])[
|
||||
|
||||
/// State numbers.
|
||||
typedef int state_type;
|
||||
@@ -344,7 +363,16 @@ m4_define([b4_shared_declarations],
|
||||
typedef stack<stack_symbol_type> stack_type;
|
||||
|
||||
/// The stack.
|
||||
stack_type yystack_;
|
||||
stack_type yystack_;]b4_lac_if([[
|
||||
/// The stack for LAC.
|
||||
/// Logically, the yy_lac_stack's lifetime is confined to the function
|
||||
/// yy_lac_check_. We just store it as a member of this class to hold
|
||||
/// on to the memory and to avoid frequent reallocations.
|
||||
/// Since yy_lac_check_ is const, this member must be mutable.
|
||||
mutable std::vector<state_type> yylac_stack_;
|
||||
/// Whether an initial LAC context was established.
|
||||
bool yy_lac_established_;
|
||||
]])[
|
||||
|
||||
/// Push a new state on the stack.
|
||||
/// \param m a debug message to display
|
||||
@@ -774,7 +802,11 @@ m4_if(b4_prefix, [yy], [],
|
||||
stack_symbol_type yyerror_range[3];]])[
|
||||
|
||||
/// The return value of parse ().
|
||||
int yyresult;
|
||||
int yyresult;]b4_lac_if([[
|
||||
|
||||
/// Discard the LAC context in case there still is one left from a
|
||||
/// previous invocation.
|
||||
yy_lac_discard_ ("init");]])[
|
||||
|
||||
#if YY_EXCEPTIONS
|
||||
try
|
||||
@@ -843,14 +875,21 @@ b4_dollar_popdef])[]dnl
|
||||
to detect an error, take that action. */
|
||||
yyn += yyla.type_get ();
|
||||
if (yyn < 0 || yylast_ < yyn || yycheck_[yyn] != yyla.type_get ())
|
||||
goto yydefault;
|
||||
{]b4_lac_if([[
|
||||
if (!yy_lac_establish_ (yyla.type_get ()))
|
||||
goto yyerrlab;]])[
|
||||
goto yydefault;
|
||||
}
|
||||
|
||||
// Reduce or error.
|
||||
yyn = yytable_[yyn];
|
||||
if (yyn <= 0)
|
||||
{
|
||||
if (yy_table_value_is_error_ (yyn))
|
||||
goto yyerrlab;
|
||||
goto yyerrlab;]b4_lac_if([[
|
||||
if (!yy_lac_establish_ (yyla.type_get ()))
|
||||
goto yyerrlab;
|
||||
]])[
|
||||
yyn = -yyn;
|
||||
goto yyreduce;
|
||||
}
|
||||
@@ -860,7 +899,8 @@ b4_dollar_popdef])[]dnl
|
||||
--yyerrstatus_;
|
||||
|
||||
// Shift the lookahead token.
|
||||
yypush_ ("Shifting", yyn, YY_MOVE (yyla));
|
||||
yypush_ ("Shifting", yyn, YY_MOVE (yyla));]b4_lac_if([[
|
||||
yy_lac_discard_ ("shift");]])[
|
||||
goto yynewstate;
|
||||
|
||||
|
||||
@@ -1020,7 +1060,8 @@ b4_dollar_popdef])[]dnl
|
||||
yyerror_range[2].location = yyla.location;
|
||||
YYLLOC_DEFAULT (error_token.location, yyerror_range, 2);]])[
|
||||
|
||||
// Shift the error token.
|
||||
// Shift the error token.]b4_lac_if([[
|
||||
yy_lac_discard_ ("error recovery");]])[
|
||||
error_token.state = yyn;
|
||||
yypush_ ("Shifting", YY_MOVE (error_token));
|
||||
}
|
||||
@@ -1085,8 +1126,148 @@ b4_dollar_popdef])[]dnl
|
||||
{
|
||||
error (]b4_join(b4_locations_if([yyexc.location]),
|
||||
[[yyexc.what ()]])[);
|
||||
}]b4_lac_if([[
|
||||
|
||||
bool
|
||||
]b4_parser_class[::yy_lac_check_ (int yytoken) const
|
||||
{
|
||||
// Logically, the yylac_stack's lifetime is confined to this function.
|
||||
// Clear it, to get rid of potential left-overs from previous call.
|
||||
yylac_stack_.clear ();
|
||||
// Reduce until we encounter a shift and thereby accept the token.
|
||||
#if ]b4_api_PREFIX[DEBUG
|
||||
YYCDEBUG << "LAC: checking lookahead " << yytname_[yytoken] << ':';
|
||||
#endif
|
||||
size_t lac_top = 0;
|
||||
while (true)
|
||||
{
|
||||
state_type top_state = (yylac_stack_.empty ()
|
||||
? yystack_[lac_top].state
|
||||
: yylac_stack_.back ());
|
||||
int yyrule = yypact_[top_state];
|
||||
if (yy_pact_value_is_default_ (yyrule)
|
||||
|| (yyrule += yytoken) < 0 || yylast_ < yyrule
|
||||
|| yycheck_[yyrule] != yytoken)
|
||||
{
|
||||
// Use the default action.
|
||||
yyrule = yydefact_[top_state];
|
||||
if (yyrule == 0)
|
||||
{
|
||||
YYCDEBUG << " Err\n";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Use the action from yytable.
|
||||
yyrule = yytable_[yyrule];
|
||||
if (yy_table_value_is_error_ (yyrule))
|
||||
{
|
||||
YYCDEBUG << " Err\n";
|
||||
return false;
|
||||
}
|
||||
if (0 < yyrule)
|
||||
{
|
||||
YYCDEBUG << " S" << yyrule << '\n';
|
||||
return true;
|
||||
}
|
||||
yyrule = -yyrule;
|
||||
}
|
||||
// By now we know we have to simulate a reduce.
|
||||
YYCDEBUG << " R" << yyrule - 1;
|
||||
// Pop the corresponding number of values from the stack.
|
||||
{
|
||||
size_t yylen = yyr2_[yyrule];
|
||||
// First pop from the LAC stack as many tokens as possible.
|
||||
size_t lac_size = yylac_stack_.size ();
|
||||
if (yylen < lac_size)
|
||||
{
|
||||
for (size_t i = 0; i < yylen; ++i)
|
||||
yylac_stack_.pop_back ();
|
||||
yylen = 0;
|
||||
}
|
||||
else if (lac_size)
|
||||
{
|
||||
yylac_stack_.clear ();
|
||||
yylen -= lac_size;
|
||||
}
|
||||
// Only aftwerwards look at the main stack.
|
||||
// We simulate popping elements by incrementing lac_top.
|
||||
lac_top += yylen;
|
||||
}
|
||||
// Keep top_state in sync with the updated stack.
|
||||
top_state = (yylac_stack_.empty ()
|
||||
? yystack_[lac_top].state
|
||||
: yylac_stack_.back ());
|
||||
// Push the resulting state of the reduction.
|
||||
state_type state = yy_lr_goto_state_ (top_state, yyr1_[yyrule]);
|
||||
YYCDEBUG << " G" << state;
|
||||
yylac_stack_.push_back (state);
|
||||
}
|
||||
}
|
||||
|
||||
// Establish the initial context if no initial context currently exists.
|
||||
bool
|
||||
]b4_parser_class[::yy_lac_establish_ (int yytoken)
|
||||
{
|
||||
/* Establish the initial context for the current lookahead if no initial
|
||||
context is currently established.
|
||||
|
||||
We define a context as a snapshot of the parser stacks. We define
|
||||
the initial context for a lookahead as the context in which the
|
||||
parser initially examines that lookahead in order to select a
|
||||
syntactic action. Thus, if the lookahead eventually proves
|
||||
syntactically unacceptable (possibly in a later context reached via a
|
||||
series of reductions), the initial context can be used to determine
|
||||
the exact set of tokens that would be syntactically acceptable in the
|
||||
lookahead's place. Moreover, it is the context after which any
|
||||
further semantic actions would be erroneous because they would be
|
||||
determined by a syntactically unacceptable token.
|
||||
|
||||
yy_lac_establish_ should be invoked when a reduction is about to be
|
||||
performed in an inconsistent state (which, for the purposes of LAC,
|
||||
includes consistent states that don't know they're consistent because
|
||||
their default reductions have been disabled).
|
||||
|
||||
For parse.lac=full, the implementation of yy_lac_establish_ is as
|
||||
follows. If no initial context is currently established for the
|
||||
current lookahead, then check if that lookahead can eventually be
|
||||
shifted if syntactic actions continue from the current context. */
|
||||
if (!yy_lac_established_)
|
||||
{
|
||||
#if ]b4_api_PREFIX[DEBUG
|
||||
YYCDEBUG << "LAC: initial context established for "
|
||||
<< yytname_[yytoken] << '\n';
|
||||
#endif
|
||||
yy_lac_established_ = true;
|
||||
return yy_lac_check_ (yytoken);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Discard any previous initial lookahead context.
|
||||
void
|
||||
]b4_parser_class[::yy_lac_discard_ (const char* evt)
|
||||
{
|
||||
/* Discard any previous initial lookahead context because of Event,
|
||||
which may be a lookahead change or an invalidation of the currently
|
||||
established initial context for the current lookahead.
|
||||
|
||||
The most common example of a lookahead change is a shift. An example
|
||||
of both cases is syntax error recovery. That is, a syntax error
|
||||
occurs when the lookahead is syntactically erroneous for the
|
||||
currently established initial context, so error recovery manipulates
|
||||
the parser stacks to try to find a new initial context in which the
|
||||
current lookahead is syntactically acceptable. If it fails to find
|
||||
such a context, it discards the lookahead. */
|
||||
if (yy_lac_established_)
|
||||
{
|
||||
YYCDEBUG << "LAC: initial context discarded due to "
|
||||
<< evt << '\n';
|
||||
yy_lac_established_ = false;
|
||||
}
|
||||
}]])[
|
||||
|
||||
// Generate an error message.
|
||||
std::string
|
||||
]b4_parser_class[::yysyntax_error_ (]dnl
|
||||
@@ -1115,24 +1296,40 @@ b4_error_verbose_if([state_type yystate, const symbol_type& yyla],
|
||||
a consistent state with a default action. There might have
|
||||
been a previous inconsistent state, consistent state with a
|
||||
non-default action, or user semantic action that manipulated
|
||||
yyla. (However, yyla is currently not documented for users.)
|
||||
yyla. (However, yyla is currently not documented for users.)]b4_lac_if([[
|
||||
In the first two cases, it might appear that the current syntax
|
||||
error should have been detected in the previous state when
|
||||
yy_lac_check was invoked. However, at that time, there might
|
||||
have been a different syntax error that discarded a different
|
||||
initial context during error recovery, leaving behind the
|
||||
current lookahead.]], [[
|
||||
- Of course, the expected token list depends on states to have
|
||||
correct lookahead information, and it depends on the parser not
|
||||
to perform extra reductions after fetching a lookahead from the
|
||||
scanner and before detecting a syntax error. Thus, state
|
||||
merging (from LALR or IELR) and default reductions corrupt the
|
||||
expected token list. However, the list is correct for
|
||||
canonical LR with one exception: it will still contain any
|
||||
token that will not be accepted due to an error action in a
|
||||
later state.
|
||||
scanner and before detecting a syntax error. Thus, state merging
|
||||
(from LALR or IELR) and default reductions corrupt the expected
|
||||
token list. However, the list is correct for canonical LR with
|
||||
one exception: it will still contain any token that will not be
|
||||
accepted due to an error action in a later state.]])[
|
||||
*/
|
||||
if (!yyla.empty ())
|
||||
{
|
||||
int yytoken = yyla.type_get ();
|
||||
yyarg[yycount++] = yytname_[yytoken];
|
||||
yyarg[yycount++] = yytname_[yytoken];]b4_lac_if([[
|
||||
|
||||
#if ]b4_api_PREFIX[DEBUG
|
||||
// Execute LAC once. We don't care if it is succesful, we
|
||||
// only do it for the sake of debugging output.
|
||||
if (!yy_lac_established_)
|
||||
yy_lac_check_ (yytoken);
|
||||
#endif]])[
|
||||
|
||||
int yyn = yypact_[yystate];
|
||||
if (!yy_pact_value_is_default_ (yyn))
|
||||
{
|
||||
{]b4_lac_if([[
|
||||
for (int yyx = 0; yyx < yyntokens_; ++yyx)
|
||||
if (yyx != yyterror_ && yy_lac_check_(yyx))
|
||||
{]], [[
|
||||
/* Start YYX at -YYN if negative to avoid negative indexes in
|
||||
YYCHECK. In other words, skip the first -YYN actions for
|
||||
this state because they are default actions. */
|
||||
@@ -1143,7 +1340,7 @@ b4_error_verbose_if([state_type yystate, const symbol_type& yyla],
|
||||
for (int yyx = yyxbegin; yyx < yyxend; ++yyx)
|
||||
if (yycheck_[yyx + yyn] == yyx && yyx != yyterror_
|
||||
&& !yy_table_value_is_error_ (yytable_[yyx + yyn]))
|
||||
{
|
||||
{]])[
|
||||
if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM)
|
||||
{
|
||||
yycount = 1;
|
||||
|
||||
@@ -8697,7 +8697,7 @@ Enable LAC to improve syntax error handling.
|
||||
@item @code{none} (default)
|
||||
@item @code{full}
|
||||
@end itemize
|
||||
This feature is currently only available for deterministic parsers in C.
|
||||
This feature is currently only available for deterministic parsers in C and C++.
|
||||
@end deffn
|
||||
|
||||
Conceptually, the LAC mechanism is straight-forward. Whenever the parser
|
||||
@@ -11685,7 +11685,7 @@ location (if enabled) being @var{yylval} and @var{yylloc}. Invocations of
|
||||
Note that when using variants, the interface for @code{yylex} is the same,
|
||||
but @code{yylval} is handled differently.
|
||||
|
||||
Regular union-based code in Lex scanner typically look like:
|
||||
Regular union-based code in Lex scanner typically looks like:
|
||||
|
||||
@example
|
||||
[0-9]+ @{
|
||||
|
||||
Reference in New Issue
Block a user