java: add push-parser support

* data/lalr1.java: Capture the declarations as m4 macros to avoid
duplication.  When push parsing, the declarations occur at the class
instance level rather than within the parse() function.

Change the way that the parser state is initialized.  For
push-parsing, the parse state declarations are moved to
"push_parse_initialize()", which is called on the first invocation of
"push_parse()". The %initial-action code is also inserted after the
invocation of "push_parse_initialize()".

The body of the parse loop is modified to return values at appropriate
points when doing push parsing.  In order to make push parsing work,
it is necessary to divide YYNEWSTATE into two states: YYNEWSTATE and
YYGETTOKEN. On the first call to push_parse(), the state is
YYNEWSTATE. On all later entries, the state is set to YYGETTOKEN. The
YYNEWSTATE switch arm falls through into YYGETTOKEN. YYGETTOKEN
indicates that a new token is potentially needed.  Normally, with a
pull parser, this new token would be obtained by calling "yylex()". In
the push parser, the value YYMORE is returned to the caller. On the
next call to push_parse(), the parser will return to the YYGETTOKEN
state and continue operation.

* tests/javapush.at: New test file for java push parsing.
* tests/testsuite.at: Use it.
* tests/local.mk: Adjust.
* doc/bison.texi (Java Push Parser Interface): New.

Signed-off-by: Akim Demaille <akim@lrde.epita.fr>
This commit is contained in:
Dennis Heimbigner
2013-06-13 10:08:19 +02:00
committed by Akim Demaille
parent 0fcc2e9a74
commit aa94def12d
6 changed files with 1141 additions and 53 deletions

View File

@@ -20,7 +20,8 @@ m4_include(b4_pkgdatadir/[java.m4])
b4_defines_if([b4_fatal([%s: %%defines does not make sense in Java],
[b4_skeleton])])
# We don't depend on %debug in Java, but pacify warnings about non-used flags.
# We do not depend on %debug in Java, but pacify warnings about
# non-used flags.
b4_parse_trace_if([0], [0])
m4_define([b4_symbol_no_destructor_assert],
@@ -30,6 +31,57 @@ m4_define([b4_symbol_no_destructor_assert],
[b4_symbol_action_location([$1], [destructor])])])])
b4_symbol_foreach([b4_symbol_no_destructor_assert])
# Setup some macros for api.push-pull.
b4_percent_define_default([[api.push-pull]], [[pull]])
b4_percent_define_check_values([[[[api.push-pull]],
[[pull]], [[push]], [[both]]]])
# Define m4 conditional macros that encode the value
# of the api.push-pull flag.
b4_define_flag_if([pull]) m4_define([b4_pull_flag], [[1]])
b4_define_flag_if([push]) m4_define([b4_push_flag], [[1]])
m4_case(b4_percent_define_get([[api.push-pull]]),
[pull], [m4_define([b4_push_flag], [[0]])],
[push], [m4_define([b4_pull_flag], [[0]])])
# Define a macro to be true when api.push-pull has the value "both".
m4_define([b4_both_if],[b4_push_if([b4_pull_if([$1],[$2])],[$2])])
# Handle BISON_USE_PUSH_FOR_PULL for the test suite. So that push parsing
# tests function as written, do not let BISON_USE_PUSH_FOR_PULL modify the
# behavior of Bison at all when push parsing is already requested.
b4_define_flag_if([use_push_for_pull])
b4_use_push_for_pull_if([
b4_push_if([m4_define([b4_use_push_for_pull_flag], [[0]])],
[m4_define([b4_push_flag], [[1]])])])
# Define a macro to encapsulate the parse state variables.
# This allows them to be defined either in parse() when doing
# pull parsing, or as class instance variable when doing push parsing.
m4_define([b4_define_state],[[
/* Lookahead and lookahead in internal form. */
int yychar = yyempty_;
int yytoken = 0;
/* State. */
int yyn = 0;
int yylen = 0;
int yystate = 0;
YYStack yystack = new YYStack ();
int label = YYNEWSTATE;
/* Error handling. */
int yynerrs_ = 0;
]b4_locations_if([/* The location where the error started. */
b4_location_type yyerrloc = null;
/* Location. */
b4_location_type yylloc = new b4_location_type (null, null);])[
/* Semantic value of the lookahead. */
]b4_yystype[ yylval = null;
]])
b4_output_begin([b4_parser_file_name])
b4_copyright([Skeleton implementation for Bison LALR(1) parsers in Java],
[2007-2013])
@@ -363,6 +415,12 @@ b4_lexer_if([[
*/
public static final int YYABORT = 1;
]b4_push_if([
/**
* Returned by a Bison action in order to request a new token.
*/
public static final int YYMORE = 4;])[
/**
* Returned by a Bison action in order to start error recovery without
* printing an error message.
@@ -379,9 +437,12 @@ b4_lexer_if([[
private static final int YYREDUCE = 6;
private static final int YYERRLAB1 = 7;
private static final int YYRETURN = 8;
]b4_push_if([[ private static final int YYGETTOKEN = 9; /* Signify that a new token is expected when doing push-parsing. */]])[
private int yyerrstatus_ = 0;
]b4_push_if([dnl
b4_define_state])[
/**
* Return whether error recovery is being done. In this state, the parser
* reads token until it reaches a known state, and then restarts normal
@@ -486,6 +547,7 @@ b4_lexer_if([[
+ (yyvaluep == null ? "(null)" : yyvaluep.toString ()) + ")");
}
]b4_push_if([],[[
/**
* Parse input from the scanner that was specified at object construction
* time. Return whether the end of the input was reached successfully.
@@ -493,46 +555,53 @@ b4_lexer_if([[
* @@return <tt>true</tt> if the parsing succeeds. Note that this does not
* imply that there were no syntax errors.
*/
public boolean parse () ]b4_maybe_throws([b4_list2([b4_lex_throws], [b4_throws])])[
public boolean parse () ]b4_maybe_throws([b4_list2([b4_lex_throws], [b4_throws])])[]])[
]b4_push_if([
/**
* Push Parse input from external lexer
*
* @@param yylextoken current token
* @@param yylexval current lval
]b4_locations_if([ * @@param yylexloc current position])[
*
* @@return <tt>YYACCEPT, YYABORT, YYMORE</tt>
*/
public int push_parse (int yylextoken, b4_yystype yylexval[]b4_locations_if([, b4_location_type yylexloc]))
b4_maybe_throws([b4_list2([b4_lex_throws], [b4_throws])])])[
{
/// Lookahead and lookahead in internal form.
int yychar = yyempty_;
int yytoken = 0;
/* State. */
int yyn = 0;
int yylen = 0;
int yystate = 0;
YYStack yystack = new YYStack ();
/* Error handling. */
int yynerrs_ = 0;
]b4_locations_if([/// The location where the error started.
]b4_location_type[ yyerrloc = null;
/// ]b4_location_type[ of the lookahead.
]b4_location_type[ yylloc = new ]b4_location_type[ (null, null);
/// @@$.
]b4_location_type[ yyloc;])
/// Semantic value of the lookahead.
b4_yystype[ yylval = null;
]b4_locations_if([/* @@$. */
b4_location_type yyloc;])[
]b4_push_if([],[[
]b4_define_state[
yycdebug ("Starting parse\n");
yyerrstatus_ = 0;
/* Initialize the stack. */
yystack.push (yystate, yylval ]b4_locations_if([, yylloc])[);
]m4_ifdef([b4_initial_action], [
b4_dollar_pushdef([yylval], [], [yylloc])dnl
/* User initialization code. */
b4_user_initial_action
b4_dollar_popdef])[]dnl
b4_dollar_popdef[]dnl
])[
]])[
]b4_push_if([[
if (!this.push_parse_initialized)
{
push_parse_initialize ();
]m4_ifdef([b4_initial_action], [
b4_dollar_pushdef([yylval], [], [yylloc])dnl
/* User initialization code. */
b4_user_initial_action
b4_dollar_popdef[]dnl
])[
yycdebug ("Starting parse\n");
yyerrstatus_ = 0;
} else
label = YYGETTOKEN;
[ /* Initialize the stack. */
yystack.push (yystate, yylval]b4_locations_if([, yylloc])[);
int label = YYNEWSTATE;
boolean push_token_consumed = true;
]])[
for (;;)
switch (label)
{
@@ -545,7 +614,8 @@ b4_dollar_popdef])[]dnl
/* Accept? */
if (yystate == yyfinal_)
return true;
]b4_push_if([{label = YYACCEPT; break;}],
[return true;])[
/* Take a decision. First try without lookahead. */
yyn = yypact_[yystate];
@@ -554,16 +624,27 @@ b4_dollar_popdef])[]dnl
label = YYDEFAULT;
break;
}
]b4_push_if([ /* Fall Through */
case YYGETTOKEN:])[
/* Read a lookahead token. */
if (yychar == yyempty_)
{
]b4_push_if([[
if (!push_token_consumed)
return YYMORE;
yycdebug ("Reading a token: ");
yychar = yylexer.yylex ();]
b4_locations_if([[
yylloc = new ]b4_location_type[(yylexer.getStartPos (),
yylexer.getEndPos ());]])
yylval = yylexer.getLVal ();[
yychar = yylextoken;
yylval = yylexval;]b4_locations_if([
yylloc = yylexloc;])[
push_token_consumed = false;]])[
]b4_push_if([],[[
yycdebug ("Reading a token: ");
yychar = yylexer.yylex ();
yylval = yylexer.getLVal ();]b4_locations_if([
yylloc = new b4_location_type (yylexer.getStartPos (),
yylexer.getEndPos ());])[
]])[
}
/* Convert token to internal form. */
@@ -660,10 +741,10 @@ b4_dollar_popdef])[]dnl
{
/* Return failure if at end of input. */
if (yychar == Lexer.EOF)
return false;
]b4_push_if([{label = YYABORT; break;}],[return false;])[
}
else
yychar = yyempty_;
yychar = yyempty_;
}
/* Else will try to reuse lookahead token after shifting the error
@@ -671,9 +752,9 @@ b4_dollar_popdef])[]dnl
label = YYERRLAB1;
break;
/*---------------------------------------------------.
/*-------------------------------------------------.
| errorlab -- error raised explicitly by YYERROR. |
`---------------------------------------------------*/
`-------------------------------------------------*/
case YYERROR:
]b4_locations_if([yyerrloc = yystack.locationAt (yylen - 1);])[
@@ -705,9 +786,10 @@ b4_dollar_popdef])[]dnl
}
}
/* Pop the current state because it cannot handle the error token. */
/* Pop the current state because it cannot handle the
* error token. */
if (yystack.height == 0)
return false;
]b4_push_if([{label = YYABORT; break;}],[return false;])[
]b4_locations_if([yyerrloc = yystack.locationAt (0);])[
yystack.pop ();
@@ -716,7 +798,11 @@ b4_dollar_popdef])[]dnl
yystack.print (yyDebugStream);
}
]b4_locations_if([
if (label == YYABORT)
/* Leave the switch. */
break;
]b4_locations_if([
/* Muck with the stack to setup for yylloc. */
yystack.push (0, null, yylloc);
yystack.push (0, null, yyerrloc);
@@ -734,13 +820,92 @@ b4_dollar_popdef])[]dnl
/* Accept. */
case YYACCEPT:
return true;
]b4_push_if([this.push_parse_initialized = false; return YYACCEPT;],
[return true;])[
/* Abort. */
case YYABORT:
return false;
]b4_push_if([this.push_parse_initialized = false; return YYABORT;],
[return false;])[
}
}
]b4_push_if([[
boolean push_parse_initialized = false;
/**
* (Re-)Initialize the state of the push parser.
*/
public void push_parse_initialize()
{
/* Lookahead and lookahead in internal form. */
this.yychar = yyempty_;
this.yytoken = 0;
/* State. */
this.yyn = 0;
this.yylen = 0;
this.yystate = 0;
this.yystack = new YYStack ();
this.label = YYNEWSTATE;
/* Error handling. */
this.yynerrs_ = 0;
]b4_locations_if([/* The location where the error started. */
this.yyerrloc = null;
this.yylloc = new b4_location_type (null, null);])[
/* Semantic value of the lookahead. */
this.yylval = null;
yystack.push (this.yystate, this.yylval]b4_locations_if([, this.yylloc])[);
this.push_parse_initialized = true;
}
]b4_locations_if([
/**
* Push parse given input from an external lexer.
*
* @@param yylextoken current token
* @@param yylexval current lval
* @@param yyylexpos current position
*
* @@return <tt>YYACCEPT, YYABORT, YYMORE</tt>
*/
public int push_parse (int yylextoken, b4_yystype yylexval, b4_position_type yylexpos)
b4_maybe_throws([b4_list2([b4_lex_throws], [b4_throws])])
{
return push_parse (yylextoken, yylexval, new b4_location_type (yylexpos));
}
])[]])
b4_both_if([[
/**
* Parse input from the scanner that was specified at object construction
* time. Return whether the end of the input was reached successfully.
* This version of parse () is defined only when api.push-push=both.
*
* @@return <tt>true</tt> if the parsing succeeds. Note that this does not
* imply that there were no syntax errors.
*/
public boolean parse () ]b4_maybe_throws([b4_list2([b4_lex_throws], [b4_throws])])[
{
if (yylexer == null)
throw new NullPointerException("Null Lexer");
int status;
do {
int token = yylexer.yylex();
]b4_yystype[ lval = yylexer.getLVal();
]b4_locations_if([dnl
b4_location_type yyloc = new b4_location_type (yylexer.getStartPos (),
yylexer.getEndPos ());])[
this.yyerrstatus_ = 0;
]b4_locations_if([status = push_parse(token,lval,yyloc);],[
status = push_parse(token,lval);])[
} while (status == YYMORE);
return (status == YYACCEPT);
}
]])[
// Generate an error message.
private String yysyntax_error (int yystate, int tok)
@@ -849,6 +1014,7 @@ b4_dollar_popdef])[]dnl
]b4_integral_parser_table_define([rline], [b4_rline],
[[YYRLINE[YYN] -- Source line where rule number YYN was defined.]])[
// Report on the debug stream that the rule yyrule is going to be reduced.
private void yy_reduce_print (int yyrule, YYStack yystack)
{