multistart: use b4_accept instead of action post-processing

For each start symbol, generate a parsing function with a richer
return value than the usual of yyparse.  Reserve a place for the
returned semantic value, in order to avoid having to pass a pointer as
argument to "return" that value.  This also makes the call to the
parsing function independent of whether a given start-symbol is typed.

For instance, if the grammar file contains:

    %type <int> expression
    %start input expression

(so "input" is valueless) we get

    typedef struct
    {
      int yystatus;
    } yyparse_input_t;

    yyparse_input_t yyparse_input (void);

    typedef struct
    {
      int yyvalue;
      int yystatus;
    } yyparse_expression_t;

    yyparse_expression_t yyparse_expression (void);

This commit also changes the implementation of the parser termination:
when there are multiple start symbols, it is the initial rules that
explicitly YYACCEPT.  They do that after having exported the
start-symbol's value (if it is typed):

  switch (yyn)
    {
  case 1: /* $accept: YY_EXPRESSION expression $end  */
  { ((*yyvalue).TOK_expression) = (yyvsp[-1].TOK_expression); YYACCEPT; }
    break;

  case 2: /* $accept: YY_INPUT input $end  */
  { YYACCEPT; }
    break;

I have tried several ways to deal with termination, and this is the
one that appears the best one to me.  It is also the most natural.

* src/scan-code.h, src/scan-code.l (obstack_for_actions): New.
* src/reader.c (grammar_rule_check_and_complete): Generate the actions
of the rules for each start symbol.

* data/skeletons/bison.m4 (b4_symbol_slot): New, with safer semantics
than type and type_tag.
* data/skeletons/yacc.c (b4_accept): New.
Generates the body of the action of the start rules.
(_b4_declare_sub_yyparse): For each start symbol define a dedicated
return type for its parsing function.
Adjust the declaration of its parsing function.
(_b4_define_sub_yyparse): Adjust the definition of the function.

* examples/c/lexcalc/parse.y: Check the case of valueless symbols.
* examples/c/lexcalc/lexcalc.test: Check start symbols.
This commit is contained in:
Akim Demaille
2020-07-05 08:00:20 +02:00
parent a6805bb8d9
commit d9cf99b6a5
10 changed files with 146 additions and 43 deletions

View File

@@ -90,6 +90,7 @@ main (int argc, char *argv[])
uniqstrs_new ();
muscle_init ();
complain_init ();
code_scanner_init ();
getargs (argc, argv);

View File

@@ -267,6 +267,8 @@ static void
grammar_rule_check_and_complete (symbol_list *r)
{
const symbol *lhs = r->content.sym;
const symbol *first_rhs = r->next->content.sym;
/* Type check.
If there is an action, then there is nothing we can do: the user
@@ -276,7 +278,6 @@ grammar_rule_check_and_complete (symbol_list *r)
value can't be used. */
if (!r->action_props.code && lhs->content->type_name)
{
symbol *first_rhs = r->next->content.sym;
/* If $$ is being set in default way, report if any type mismatch. */
if (first_rhs)
{
@@ -312,6 +313,29 @@ grammar_rule_check_and_complete (symbol_list *r)
_("empty rule for typed nonterminal, and no action"));
}
/* For each start symbol, build the action of its start rule. Use
the same obstack as the one used by scan-code, which is in charge
of actions. */
const bool multistart = start_symbols && start_symbols->next;
if (multistart && lhs == acceptsymbol)
{
const symbol *start = r->next->next->content.sym;
if (start->content->type_name)
obstack_printf (obstack_for_actions,
"{ ]b4_accept([orig %d])[; }",
start->content->number);
else
obstack_printf (obstack_for_actions,
"{ ]b4_accept[; }");
code_props_rule_action_init (&r->action_props,
obstack_finish0 (obstack_for_actions),
r->rhs_loc, r,
/* name */ NULL,
/* type */ NULL,
/* is_predicate */ false);
}
/* Check that symbol values that should be used are in fact used.
Don't check the generated start rules. It has no action, so some
rhs symbols may appear unused, but the parsing algorithm ensures
@@ -772,6 +796,11 @@ create_start_rule (symbol *swtok, symbol *start)
symbol_list *p = initial_rule;
if (swtok)
{
// Cannot create the action now, as the symbols have not yet
// been assigned their number (by symbol_pack), which we need to
// know the type name. So the action is created in
// grammar_rule_check_and_complete, which is run after
// symbol_pack.
p->next = symbol_list_sym_new (swtok, empty_loc);
p = p->next;
}

View File

@@ -34,6 +34,11 @@ struct symbol_list;
*/
extern int max_left_semantic_context;
/**
* The obstack used to store the translated actions.
*/
extern struct obstack *obstack_for_actions;
/**
* A code passage captured from the grammar file and possibly translated,
* and/or properties associated with such a code passage. Don't break
@@ -191,6 +196,8 @@ void code_props_translate_code (code_props *self);
*/
void code_scanner_last_string_free (void);
void code_scanner_init (void);
/**
* \pre
* - None.

View File

@@ -40,6 +40,8 @@
#undef code_wrap
#define code_wrap() 1
struct obstack *obstack_for_actions = &obstack_for_string;
/* The current calling start condition: SC_RULE_ACTION or
SC_SYMBOL_ACTION. */
# define YY_DECL static char *code_lex (code_props *self, int sc_context)
@@ -756,19 +758,10 @@ handle_action_at (symbol_list *rule, char *text, const location *at_loc)
static char const *
translate_action (code_props *self, int sc_context)
{
static bool initialized = false;
if (!initialized)
{
obstack_init (&obstack_for_string);
yy_flex_debug = 0;
initialized = true;
}
loc->start = loc->end = self->location.start;
yy_switch_to_buffer (yy_scan_string (self->code));
char *res = code_lex (self, sc_context);
yy_delete_buffer (YY_CURRENT_BUFFER);
return res;
}
@@ -845,6 +838,13 @@ code_scanner_last_string_free (void)
STRING_FREE ();
}
void
code_scanner_init (void)
{
obstack_init (&obstack_for_string);
yy_flex_debug = 0;
}
void
code_scanner_free (void)
{