mirror of
https://git.savannah.gnu.org/git/bison.git
synced 2026-03-09 12:23:04 +00:00
multistart: use b4_accept instead of action post-processing
For each start symbol, generate a parsing function with a richer
return value than the usual of yyparse. Reserve a place for the
returned semantic value, in order to avoid having to pass a pointer as
argument to "return" that value. This also makes the call to the
parsing function independent of whether a given start-symbol is typed.
For instance, if the grammar file contains:
%type <int> expression
%start input expression
(so "input" is valueless) we get
typedef struct
{
int yystatus;
} yyparse_input_t;
yyparse_input_t yyparse_input (void);
typedef struct
{
int yyvalue;
int yystatus;
} yyparse_expression_t;
yyparse_expression_t yyparse_expression (void);
This commit also changes the implementation of the parser termination:
when there are multiple start symbols, it is the initial rules that
explicitly YYACCEPT. They do that after having exported the
start-symbol's value (if it is typed):
switch (yyn)
{
case 1: /* $accept: YY_EXPRESSION expression $end */
{ ((*yyvalue).TOK_expression) = (yyvsp[-1].TOK_expression); YYACCEPT; }
break;
case 2: /* $accept: YY_INPUT input $end */
{ YYACCEPT; }
break;
I have tried several ways to deal with termination, and this is the
one that appears the best one to me. It is also the most natural.
* src/scan-code.h, src/scan-code.l (obstack_for_actions): New.
* src/reader.c (grammar_rule_check_and_complete): Generate the actions
of the rules for each start symbol.
* data/skeletons/bison.m4 (b4_symbol_slot): New, with safer semantics
than type and type_tag.
* data/skeletons/yacc.c (b4_accept): New.
Generates the body of the action of the start rules.
(_b4_declare_sub_yyparse): For each start symbol define a dedicated
return type for its parsing function.
Adjust the declaration of its parsing function.
(_b4_define_sub_yyparse): Adjust the definition of the function.
* examples/c/lexcalc/parse.y: Check the case of valueless symbols.
* examples/c/lexcalc/lexcalc.test: Check start symbols.
This commit is contained in:
4
TODO
4
TODO
@@ -240,6 +240,10 @@ functions.
|
||||
states/nstates, rules/nrules, ..., ritem/nritems
|
||||
Fix the latter.
|
||||
|
||||
*** m4: slot, type, type_tag
|
||||
The meaning of type_tag varies depending on api.value.type. We should avoid
|
||||
that and using clear definitions with stable semantics.
|
||||
|
||||
* D programming language
|
||||
There's a number of features that are missing, here sorted in _suggested_
|
||||
order of implementation.
|
||||
|
||||
@@ -142,11 +142,17 @@ The macro `b4_symbol(NUM, FIELD)` gives access to the following FIELDS:
|
||||
When api.value.type=union, the generated name for the union member.
|
||||
yytype_INT etc. for symbols that has_id, otherwise yytype_1 etc.
|
||||
|
||||
- `type`
|
||||
- `type`: string
|
||||
If it has a semantic value, its type tag, or, if variant are used,
|
||||
its type.
|
||||
In the case of api.value.type=union, type is the real type (e.g. int).
|
||||
|
||||
- `slot`: string
|
||||
If it has a semantic value, the name of the union member (i.e., bounces to
|
||||
either `type_tag` or `type`). It would be better to fix our mess and
|
||||
always use `type` for the true type of the member, and `type_tag` for the
|
||||
name of the union member.
|
||||
|
||||
- `has_printer`: 0, 1
|
||||
- `printer`: string
|
||||
- `printer_file`: string
|
||||
|
||||
@@ -465,6 +465,19 @@ m4_case([$1],
|
||||
# but are S_YYEMPTY and symbol_kind::S_YYEMPTY in C++.
|
||||
m4_copy([b4_symbol_kind_base], [b4_symbol_kind])
|
||||
|
||||
|
||||
# b4_symbol_slot(NUM)
|
||||
# -------------------
|
||||
# The name of union member that contains the value of these symbols.
|
||||
# Currently, we are messy, this should actually be type_tag, but type_tag
|
||||
# has several meanings.
|
||||
m4_define([b4_symbol_slot],
|
||||
[m4_case(b4_percent_define_get([[api.value.type]]),
|
||||
[union], [b4_symbol([$1], [type_tag])],
|
||||
[variant], [b4_symbol([$1], [type_tag])],
|
||||
[b4_symbol([$1], [type])])])
|
||||
|
||||
|
||||
# b4_symbol(NUM, FIELD)
|
||||
# ---------------------
|
||||
# Fetch FIELD of symbol #NUM (or "orig NUM"). Fail if undefined.
|
||||
@@ -475,6 +488,7 @@ m4_define([b4_symbol],
|
||||
[id], [b4_symbol_token_kind([$1])],
|
||||
[kind_base], [b4_symbol_kind_base([$1])],
|
||||
[kind], [b4_symbol_kind([$1])],
|
||||
[slot], [b4_symbol_slot([$1])],
|
||||
[_b4_symbol($@)])])
|
||||
|
||||
|
||||
|
||||
@@ -116,6 +116,16 @@ m4_ifset([b4_parse_param], [b4_args(b4_parse_param), ])])
|
||||
## ----------------- ##
|
||||
|
||||
|
||||
# b4_accept([SYMBOL-NUM])
|
||||
# -----------------------
|
||||
# Used in actions of the rules of accept, the initial symbol, to call
|
||||
# YYACCEPT. If SYMBOL-NUM is specified, run "yyvalue->SLOT = $2;"
|
||||
# before, using the slot of SYMBOL-NUM.
|
||||
m4_define([b4_accept],
|
||||
[m4_ifval([$1],
|
||||
[b4_symbol_value((*yyvalue), [$1]) = b4_rhs_value(2, 1, [$1]); ])YYACCEPT])
|
||||
|
||||
|
||||
# b4_lhs_value(SYMBOL-NUM, [TYPE])
|
||||
# --------------------------------
|
||||
# See README.
|
||||
@@ -155,21 +165,48 @@ m4_define([b4_rhs_location],
|
||||
## Declarations. ##
|
||||
## -------------- ##
|
||||
|
||||
# _b4_declare_sub_yyparse(START-SYMBOL-NUM)
|
||||
# -----------------------------------
|
||||
# _b4_declare_sub_yyparse(START-SYMBOL-NUM, SWITCHING-TOKEN-SYMBOL-NUM)
|
||||
# ---------------------------------------------------------------------
|
||||
# Define the return type of the parsing function for SYMBOL-NUM, and
|
||||
# declare its parsing function.
|
||||
m4_define([_b4_declare_sub_yyparse],
|
||||
[[int ]b4_prefix[parse_]_b4_symbol($1, id)[ (]m4_ifset([b4_parse_param], [b4_formals(b4_parse_param)], [void])[);]])
|
||||
[[
|
||||
// Return type when parsing one ]_b4_symbol($1, tag)[.
|
||||
typedef struct
|
||||
{]b4_symbol_if([$1], [has_type], [[
|
||||
]_b4_symbol($1, type)[ yyvalue;]])[
|
||||
int yystatus;
|
||||
} ]b4_prefix[parse_]_b4_symbol($1, id)[_t;
|
||||
|
||||
// Parse one ]_b4_symbol($1, tag)[.
|
||||
]b4_prefix[parse_]_b4_symbol($1, id)[_t ]b4_prefix[parse_]_b4_symbol($1, id)[ (]m4_ifset([b4_parse_param], [b4_formals(b4_parse_param)], [void])[);
|
||||
]])
|
||||
|
||||
|
||||
# _b4_first_switching_token
|
||||
# -------------------------
|
||||
m4_define([b4_first], [$1])
|
||||
m4_define([b4_second], [$2])
|
||||
m4_define([_b4_first_switching_token],
|
||||
[b4_second(b4_first(b4_start_symbols))])
|
||||
|
||||
|
||||
# _b4_define_sub_yyparse(START-SYMBOL-NUM, SWITCHING-TOKEN-SYMBOL-NUM)
|
||||
# --------------------------------------------------------------------
|
||||
# Define the parsing function for START-SYMBOL-NUM.
|
||||
m4_define([_b4_define_sub_yyparse],
|
||||
[[int
|
||||
[[
|
||||
]b4_prefix[parse_]_b4_symbol($1, id)[_t
|
||||
yyparse_]_b4_symbol($1, id)[ (]m4_ifset([b4_parse_param], [b4_formals(b4_parse_param)], [void])[)
|
||||
{
|
||||
return yyparse_impl (]b4_symbol($2, id)[]m4_ifset([b4_parse_param],
|
||||
[[, ]b4_args(b4_parse_param)])[);
|
||||
}]])
|
||||
]b4_prefix[parse_]_b4_symbol($1, id)[_t yyres;
|
||||
YYSTYPE yyvalue;
|
||||
yyres.yystatus = yy_parse_impl (]b4_symbol($2, id)[, &yyvalue]m4_ifset([b4_parse_param],
|
||||
[[, ]b4_args(b4_parse_param)])[);]b4_symbol_if([$1], [has_type], [[
|
||||
yyres.yyvalue = yyvalue.]b4_symbol($1, slot)[;]])[
|
||||
return yyres;
|
||||
}
|
||||
]])
|
||||
|
||||
|
||||
# b4_declare_scanner_communication_variables
|
||||
@@ -179,8 +216,8 @@ yyparse_]_b4_symbol($1, id)[ (]m4_ifset([b4_parse_param], [b4_formals(b4_parse_p
|
||||
m4_define([b4_declare_scanner_communication_variables], [[
|
||||
]m4_ifdef([b4_start_symbols], [],
|
||||
[[/* Lookahead token kind. */
|
||||
int yychar;]])[
|
||||
|
||||
int yychar;
|
||||
]])[
|
||||
]b4_pure_if([[
|
||||
/* The semantic value of the lookahead symbol. */
|
||||
/* Default value used for initialization, for pacifying older GCCs
|
||||
@@ -1560,20 +1597,19 @@ yypush_parse (yypstate *yyps]b4_pure_if([[,
|
||||
]m4_ifdef([b4_start_symbols],
|
||||
[[
|
||||
static int
|
||||
yyparse_impl (int yychar]m4_ifset([b4_parse_param], [, b4_formals(b4_parse_param)])[);
|
||||
yy_parse_impl (int yychar, YYSTYPE *yyvalue]m4_ifset([b4_parse_param], [, b4_formals(b4_parse_param)])[);
|
||||
|
||||
]m4_map([_b4_define_sub_yyparse], m4_defn([b4_start_symbols]))[
|
||||
|
||||
int
|
||||
yyparse (]m4_ifset([b4_parse_param], [b4_formals(b4_parse_param)], [void])[)
|
||||
{
|
||||
/* ]b4_symbol(-2, id)[ causes a token to be read. */
|
||||
return yyparse_impl (]b4_symbol(-2, id)[]m4_ifset([b4_parse_param],
|
||||
return yy_parse_impl (]b4_symbol(_b4_first_switching_token, id)[, YY_NULLPTR]m4_ifset([b4_parse_param],
|
||||
[[, ]b4_args(b4_parse_param)])[);
|
||||
}
|
||||
|
||||
static int
|
||||
yyparse_impl (int yychar]m4_ifset([b4_parse_param], [, b4_formals(b4_parse_param)])[)]],
|
||||
yy_parse_impl (int yychar, YYSTYPE *yyvalue]m4_ifset([b4_parse_param], [, b4_formals(b4_parse_param)])[)]],
|
||||
[[int
|
||||
yyparse (]m4_ifset([b4_parse_param], [b4_formals(b4_parse_param)], [void])[)]])])[
|
||||
{]b4_pure_if([b4_declare_scanner_communication_variables
|
||||
@@ -1812,9 +1848,7 @@ yyread_pushed_token:]])[
|
||||
{
|
||||
if (yytable_value_is_error (yyn))
|
||||
goto yyerrlab;
|
||||
yyn = -yyn;]m4_ifdef([b4_start_symbols], [[
|
||||
if (yyr1[yyn] == YYNTOKENS)
|
||||
YYACCEPT;]])[]b4_lac_if([[
|
||||
yyn = -yyn;]b4_lac_if([[
|
||||
YY_LAC_ESTABLISH;]])[
|
||||
goto yyreduce;
|
||||
}
|
||||
@@ -1844,9 +1878,7 @@ yyread_pushed_token:]])[
|
||||
yydefault:
|
||||
yyn = yydefact[yystate];
|
||||
if (yyn == 0)
|
||||
goto yyerrlab;]m4_ifdef([b4_start_symbols], [[
|
||||
else if (yyr1[yyn] == YYNTOKENS)
|
||||
YYACCEPT;]])[
|
||||
goto yyerrlab;
|
||||
goto yyreduce;
|
||||
|
||||
|
||||
|
||||
@@ -46,15 +46,15 @@ EOF
|
||||
run 1 'err: 1.1-11: error: division by zero'
|
||||
|
||||
|
||||
# Multistart: parse "line" instead of "input".
|
||||
# Multistart: parse "expression" instead of "input".
|
||||
cat >input <<EOF
|
||||
1+2*3
|
||||
EOF
|
||||
run 0 7 -l
|
||||
run 0 'expression: 7' -e
|
||||
|
||||
cat >input <<EOF
|
||||
1
|
||||
2
|
||||
EOF
|
||||
run 1 '1
|
||||
err: 2.1: syntax error, unexpected number, expecting end of file' -l
|
||||
run 1 'expression: failure
|
||||
err: 2.1: syntax error, unexpected number, expecting end of file' -e
|
||||
|
||||
@@ -76,10 +76,10 @@
|
||||
;
|
||||
|
||||
%token <int> NUM "number"
|
||||
%type <int> exp
|
||||
%type <int> exp expression line
|
||||
%printer { fprintf (yyo, "%d", $$); } <int>
|
||||
|
||||
%start input line
|
||||
%start input expression
|
||||
|
||||
// Precedence (from lowest to highest) and associativity.
|
||||
%left "+" "-"
|
||||
@@ -93,8 +93,12 @@ input:
|
||||
;
|
||||
|
||||
line:
|
||||
exp EOL { printf ("%d\n", $exp); }
|
||||
| error EOL { yyerrok; }
|
||||
exp EOL { $$ = $exp; printf ("%d\n", $$); }
|
||||
| error EOL { $$ = 0; yyerrok; }
|
||||
;
|
||||
|
||||
expression:
|
||||
exp EOL { $$ = $exp; }
|
||||
;
|
||||
|
||||
exp:
|
||||
@@ -129,16 +133,22 @@ int main (int argc, const char *argv[])
|
||||
int nerrs = 0;
|
||||
// Possibly enable parser runtime debugging.
|
||||
yydebug = !!getenv ("YYDEBUG");
|
||||
int parse_expression_p = 0;
|
||||
// Enable parse traces on option -p.
|
||||
int parse_line_p = 0;
|
||||
for (int i = 0; i < argc; ++i)
|
||||
if (1 < argc && strcmp (argv[1], "-p") == 0)
|
||||
yydebug = 1;
|
||||
else if (strcmp (argv[i], "-l") == 0)
|
||||
parse_line_p = 1;
|
||||
else if (strcmp (argv[i], "-e") == 0)
|
||||
parse_expression_p = 1;
|
||||
|
||||
if (parse_line_p)
|
||||
yyparse_line (&nerrs);
|
||||
if (parse_expression_p)
|
||||
{
|
||||
yyparse_expression_t res = yyparse_expression (&nerrs);
|
||||
if (res.yystatus == 0)
|
||||
printf ("expression: %d\n", res.yyvalue);
|
||||
else
|
||||
printf ("expression: failure\n");
|
||||
}
|
||||
else
|
||||
yyparse_input (&nerrs);
|
||||
// Exit on failure if there were errors.
|
||||
|
||||
@@ -90,6 +90,7 @@ main (int argc, char *argv[])
|
||||
uniqstrs_new ();
|
||||
muscle_init ();
|
||||
complain_init ();
|
||||
code_scanner_init ();
|
||||
|
||||
getargs (argc, argv);
|
||||
|
||||
|
||||
31
src/reader.c
31
src/reader.c
@@ -267,6 +267,8 @@ static void
|
||||
grammar_rule_check_and_complete (symbol_list *r)
|
||||
{
|
||||
const symbol *lhs = r->content.sym;
|
||||
const symbol *first_rhs = r->next->content.sym;
|
||||
|
||||
/* Type check.
|
||||
|
||||
If there is an action, then there is nothing we can do: the user
|
||||
@@ -276,7 +278,6 @@ grammar_rule_check_and_complete (symbol_list *r)
|
||||
value can't be used. */
|
||||
if (!r->action_props.code && lhs->content->type_name)
|
||||
{
|
||||
symbol *first_rhs = r->next->content.sym;
|
||||
/* If $$ is being set in default way, report if any type mismatch. */
|
||||
if (first_rhs)
|
||||
{
|
||||
@@ -312,6 +313,29 @@ grammar_rule_check_and_complete (symbol_list *r)
|
||||
_("empty rule for typed nonterminal, and no action"));
|
||||
}
|
||||
|
||||
/* For each start symbol, build the action of its start rule. Use
|
||||
the same obstack as the one used by scan-code, which is in charge
|
||||
of actions. */
|
||||
const bool multistart = start_symbols && start_symbols->next;
|
||||
if (multistart && lhs == acceptsymbol)
|
||||
{
|
||||
const symbol *start = r->next->next->content.sym;
|
||||
if (start->content->type_name)
|
||||
obstack_printf (obstack_for_actions,
|
||||
"{ ]b4_accept([orig %d])[; }",
|
||||
start->content->number);
|
||||
else
|
||||
obstack_printf (obstack_for_actions,
|
||||
"{ ]b4_accept[; }");
|
||||
code_props_rule_action_init (&r->action_props,
|
||||
obstack_finish0 (obstack_for_actions),
|
||||
r->rhs_loc, r,
|
||||
/* name */ NULL,
|
||||
/* type */ NULL,
|
||||
/* is_predicate */ false);
|
||||
}
|
||||
|
||||
|
||||
/* Check that symbol values that should be used are in fact used.
|
||||
Don't check the generated start rules. It has no action, so some
|
||||
rhs symbols may appear unused, but the parsing algorithm ensures
|
||||
@@ -772,6 +796,11 @@ create_start_rule (symbol *swtok, symbol *start)
|
||||
symbol_list *p = initial_rule;
|
||||
if (swtok)
|
||||
{
|
||||
// Cannot create the action now, as the symbols have not yet
|
||||
// been assigned their number (by symbol_pack), which we need to
|
||||
// know the type name. So the action is created in
|
||||
// grammar_rule_check_and_complete, which is run after
|
||||
// symbol_pack.
|
||||
p->next = symbol_list_sym_new (swtok, empty_loc);
|
||||
p = p->next;
|
||||
}
|
||||
|
||||
@@ -34,6 +34,11 @@ struct symbol_list;
|
||||
*/
|
||||
extern int max_left_semantic_context;
|
||||
|
||||
/**
|
||||
* The obstack used to store the translated actions.
|
||||
*/
|
||||
extern struct obstack *obstack_for_actions;
|
||||
|
||||
/**
|
||||
* A code passage captured from the grammar file and possibly translated,
|
||||
* and/or properties associated with such a code passage. Don't break
|
||||
@@ -191,6 +196,8 @@ void code_props_translate_code (code_props *self);
|
||||
*/
|
||||
void code_scanner_last_string_free (void);
|
||||
|
||||
void code_scanner_init (void);
|
||||
|
||||
/**
|
||||
* \pre
|
||||
* - None.
|
||||
|
||||
@@ -40,6 +40,8 @@
|
||||
#undef code_wrap
|
||||
#define code_wrap() 1
|
||||
|
||||
struct obstack *obstack_for_actions = &obstack_for_string;
|
||||
|
||||
/* The current calling start condition: SC_RULE_ACTION or
|
||||
SC_SYMBOL_ACTION. */
|
||||
# define YY_DECL static char *code_lex (code_props *self, int sc_context)
|
||||
@@ -756,19 +758,10 @@ handle_action_at (symbol_list *rule, char *text, const location *at_loc)
|
||||
static char const *
|
||||
translate_action (code_props *self, int sc_context)
|
||||
{
|
||||
static bool initialized = false;
|
||||
if (!initialized)
|
||||
{
|
||||
obstack_init (&obstack_for_string);
|
||||
yy_flex_debug = 0;
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
loc->start = loc->end = self->location.start;
|
||||
yy_switch_to_buffer (yy_scan_string (self->code));
|
||||
char *res = code_lex (self, sc_context);
|
||||
yy_delete_buffer (YY_CURRENT_BUFFER);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
@@ -845,6 +838,13 @@ code_scanner_last_string_free (void)
|
||||
STRING_FREE ();
|
||||
}
|
||||
|
||||
void
|
||||
code_scanner_init (void)
|
||||
{
|
||||
obstack_init (&obstack_for_string);
|
||||
yy_flex_debug = 0;
|
||||
}
|
||||
|
||||
void
|
||||
code_scanner_free (void)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user