multistart: turn start symbols into rules on $accept

Now that the parser can read several start symbols, let's process
them, and create the corresponding rules.

* src/parse-gram.y (grammar_declaration): Accept a list of start symbols.
* src/reader.h, src/reader.c (grammar_start_symbol_set): Rename as...
(grammar_start_symbols_set): this.

* src/reader.h, src/reader.c (start_flag): Replace with...
(start_symbols): this.
* src/reader.c (grammar_start_symbols_set): Build a list of start
symbols.
(switching_token, create_start_rules): New.
(check_and_convert_grammar): Use them to turn the list of start
symbols into a set of rules.
* src/reduce.c (nonterminals_reduce): Don't complain about $accept,
it's an internal detail.
(reduce_grammar): Complain about all the start symbols that don't
derive sentences.

* src/symtab.c (startsymbol, startsymbol_loc): Remove, replaced by
start_symbols.
symbols_pack): Move the check about the start symbols
to...
* src/symlist.c (check_start_symbols): here.
Adjust to multiple start symbols.
* tests/reduce.at (Empty Language): Generalize into...
(Bad start symbols): this.
This commit is contained in:
Akim Demaille
2020-02-20 18:11:29 +01:00
parent db68f61595
commit 8eaddf326b
8 changed files with 194 additions and 54 deletions

11
TODO
View File

@@ -657,6 +657,17 @@ happen with yy_start: stmt | expr). Then adjust the skeletons so that this
initial token (YY_START_STMT, YY_START_EXPR) be shifted first in the initial token (YY_START_STMT, YY_START_EXPR) be shifted first in the
corresponding parse function. corresponding parse function.
*** Number of useless symbols
AT_TEST(
[[%start exp;
exp: exp;]],
[[input.y: warning: 2 nonterminals useless in grammar [-Wother]
input.y: warning: 2 rules useless in grammar [-Wother]
input.y:2.8-10: error: start symbol exp does not derive any sentence]])
We should say "1 nonterminal": the other one is $accept, which should not
participate in the count.
** %include ** %include
This is a popular demand. We already made many changes in the parser that This is a popular demand. We already made many changes in the parser that
should make this reasonably easy to implement. should make this reasonably easy to implement.

View File

@@ -381,9 +381,9 @@ params:
grammar_declaration: grammar_declaration:
symbol_declaration symbol_declaration
| "%start" symbol | "%start" symbols.1
{ {
grammar_start_symbol_set ($2, @2); grammar_start_symbols_set ($2);
} }
| code_props_type "{...}" generic_symlist | code_props_type "{...}" generic_symlist
{ {

View File

@@ -21,7 +21,9 @@
#include <config.h> #include <config.h>
#include "system.h" #include "system.h"
#include <c-ctype.h>
#include <quote.h> #include <quote.h>
#include <vasnprintf.h>
#include "complain.h" #include "complain.h"
#include "conflicts.h" #include "conflicts.h"
@@ -40,7 +42,7 @@ static void prepare_percent_define_front_end_variables (void);
static void check_and_convert_grammar (void); static void check_and_convert_grammar (void);
static symbol_list *grammar = NULL; static symbol_list *grammar = NULL;
static bool start_flag = false; symbol_list *start_symbols = NULL;
merger_list *merge_functions; merger_list *merge_functions;
/* Was %union seen? */ /* Was %union seen? */
@@ -54,16 +56,9 @@ bool default_prec = true;
`-----------------------*/ `-----------------------*/
void void
grammar_start_symbol_set (symbol *sym, location loc) grammar_start_symbols_set (symbol_list *syms)
{ {
if (start_flag) start_symbols = symbol_list_append (start_symbols, syms);
complain (&loc, complaint, _("multiple %s declarations"), "%start");
else
{
start_flag = true;
startsymbol = sym;
startsymbol_loc = loc;
}
} }
@@ -791,6 +786,95 @@ create_start_rule (symbol *swtok, symbol *start)
grammar = initial_rule; grammar = initial_rule;
} }
/* Fetch (or create) a token "YY_PARSE_foo" for start symbol "foo".
We don't use the simple "YY_FOO" because (i) we might get clashes
with some of our symbols (e.g., cast => YY_CAST), and (ii) upcasing
introduces possible clashes between terminal FOO and nonterminal
foo. */
symbol *
switching_token (const symbol *start)
{
char buf[100];
size_t len = sizeof buf;
char *name
= asnprintf (buf, &len,
"YY_PARSE_%s", start->alias ? start->alias->tag : start->tag);
if (!name)
xalloc_die ();
// Setting the location ensures deterministic symbol numbers.
symbol *res = symbol_get (name, start->location);
if (name != buf)
free (name);
symbol_class_set (res, token_sym, start->location, false);
return res;
}
/* Create the start rules in reverse order, since they are inserted at
the top of the grammar. That way the rules follow the order of
declaration to %start. */
static void
create_multiple_start_rules (symbol_list *start_syms)
{
if (start_syms)
{
create_multiple_start_rules (start_syms->next);
assert (start_syms->content_type == SYMLIST_SYMBOL);
symbol *start = start_syms->content.sym;
symbol *swtok = switching_token (start);
create_start_rule (swtok, start);
}
}
/* For each start symbol "foo", create the rule "$accept: YY_FOO
foo $end". */
static void
create_start_rules (void)
{
if (!start_symbols)
{
symbol *start = find_start_symbol ();
start_symbols = symbol_list_sym_new (start, start->location);
}
const bool several = start_symbols->next;
if (several)
create_multiple_start_rules (start_symbols);
else
{
symbol *start = start_symbols->content.sym;
create_start_rule (NULL, start);
}
}
static void
check_start_symbols (void)
{
// Sanity checks on the start symbols.
for (symbol_list *list = start_symbols; list; list = list->next)
{
const symbol *start = list->content.sym;
if (start->content->class == unknown_sym)
{
complain (&start->location, complaint,
_("the start symbol %s is undefined"),
start->tag);
// I claim this situation is unreachable. This is caught
// before, and we get "symbol 'foo' is used, but is not
// defined as a token and has no rules".
abort ();
}
if (start->content->class == token_sym)
complain (&start->location, complaint,
_("the start symbol %s is a token"),
start->tag);
}
if (complaint_status == status_complaint)
exit (EXIT_FAILURE);
}
/*-------------------------------------------------------------. /*-------------------------------------------------------------.
| Check the grammar that has just been read, and convert it to | | Check the grammar that has just been read, and convert it to |
| internal form. | | internal form. |
@@ -818,19 +902,12 @@ check_and_convert_grammar (void)
} }
} }
/* Insert the initial rule(s). */
create_start_rules ();
/* Report any undefined symbols and consider them nonterminals. */ /* Report any undefined symbols and consider them nonterminals. */
symbols_check_defined (); symbols_check_defined ();
/* Find the start symbol if no %start. */
if (!start_flag)
{
symbol *start = find_start_symbol ();
grammar_start_symbol_set (start, start->location);
}
/* Insert the initial rule. */
create_start_rule (NULL, startsymbol);
if (SYMBOL_NUMBER_MAXIMUM - nnterms < ntokens) if (SYMBOL_NUMBER_MAXIMUM - nnterms < ntokens)
complain (NULL, fatal, "too many symbols in input grammar (limit is %d)", complain (NULL, fatal, "too many symbols in input grammar (limit is %d)",
SYMBOL_NUMBER_MAXIMUM); SYMBOL_NUMBER_MAXIMUM);
@@ -840,6 +917,8 @@ check_and_convert_grammar (void)
/* Assign the symbols their symbol numbers. */ /* Assign the symbols their symbol numbers. */
symbols_pack (); symbols_pack ();
check_start_symbols ();
/* Scan rule actions after invoking symbol_check_alias_consistency /* Scan rule actions after invoking symbol_check_alias_consistency
(in symbols_pack above) so that token types are set correctly (in symbols_pack above) so that token types are set correctly
before the rule action type checking. before the rule action type checking.

View File

@@ -38,7 +38,18 @@ typedef struct merger_list
void free_merger_functions (void); void free_merger_functions (void);
extern merger_list *merge_functions; extern merger_list *merge_functions;
void grammar_start_symbol_set (symbol *sym, location loc); /* List of the start symbols. */
extern symbol_list *start_symbols;
/* Fetch (or create) a token "YY_PARSE_foo" for start symbol "foo".
We don't use the simple "YY_FOO" because (i) we might get clashes
with some of our symbols (e.g., cast => YY_CAST), and (ii) upcasing
introduces possible clashes between terminal FOO and nonterminal
foo. */
symbol *switching_token (const symbol *start);
void grammar_start_symbols_set (symbol_list *syms);
void grammar_current_rule_begin (symbol *lhs, location loc, void grammar_current_rule_begin (symbol *lhs, location loc,
named_ref *lhs_named_ref); named_ref *lhs_named_ref);

View File

@@ -275,7 +275,8 @@ nonterminals_reduce (void)
if (!bitset_test (V, i)) if (!bitset_test (V, i))
{ {
nterm_map[i - ntokens] = n++; nterm_map[i - ntokens] = n++;
if (symbols[i]->content->status != used) if (symbols[i]->content->status != used
&& symbols[i] != acceptsymbol)
complain (&symbols[i]->location, Wother, complain (&symbols[i]->location, Wother,
_("nonterminal useless in grammar: %s"), _("nonterminal useless in grammar: %s"),
symbols[i]->tag); symbols[i]->tag);
@@ -381,10 +382,18 @@ reduce_grammar (void)
{ {
reduce_print (); reduce_print ();
if (!bitset_test (N, acceptsymbol->content->number - ntokens)) // Check that start symbols have non-empty languages.
complain (&startsymbol_loc, fatal, bool failure = false;
_("start symbol %s does not derive any sentence"), for (symbol_list *list = start_symbols; list; list = list->next)
startsymbol->tag); if (!bitset_test (N, list->content.sym->content->number - ntokens))
{
failure = true;
complain (&list->sym_loc, complaint,
_("start symbol %s does not derive any sentence"),
list->content.sym->tag);
}
if (failure)
exit (EXIT_FAILURE);
/* First reduce the nonterminals, as they renumber themselves in the /* First reduce the nonterminals, as they renumber themselves in the
whole grammar. If you change the order, nonterms would be whole grammar. If you change the order, nonterms would be

View File

@@ -60,8 +60,6 @@ symbol *errtoken = NULL;
symbol *undeftoken = NULL; symbol *undeftoken = NULL;
symbol *eoftoken = NULL; symbol *eoftoken = NULL;
symbol *acceptsymbol = NULL; symbol *acceptsymbol = NULL;
symbol *startsymbol = NULL;
location startsymbol_loc;
/* Precedence relation graph. */ /* Precedence relation graph. */
static symgraph **prec_nodes; static symgraph **prec_nodes;
@@ -1146,15 +1144,6 @@ symbols_pack (void)
symbols_token_translations_init (); symbols_token_translations_init ();
if (startsymbol->content->class == unknown_sym)
complain (&startsymbol_loc, fatal,
_("the start symbol %s is undefined"),
startsymbol->tag);
else if (startsymbol->content->class == token_sym)
complain (&startsymbol_loc, fatal,
_("the start symbol %s is a token"),
startsymbol->tag);
// If some user tokens are internationalized, the internal ones // If some user tokens are internationalized, the internal ones
// should be too. // should be too.
if (has_translations ()) if (has_translations ())

View File

@@ -247,11 +247,6 @@ extern symbol *eoftoken;
$accept: start-symbol $end */ $accept: start-symbol $end */
extern symbol *acceptsymbol; extern symbol *acceptsymbol;
/** The user start symbol. */
extern symbol *startsymbol;
/** The location of the \c \%start declaration. */
extern location startsymbol_loc;
/** Whether a symbol declared with a type tag. */ /** Whether a symbol declared with a type tag. */
extern bool tag_seen; extern bool tag_seen;

View File

@@ -445,23 +445,69 @@ AT_CLEANUP
## ---------------- ## ## ------------------- ##
## Empty Language. ## ## Bad start symbols. ##
## ---------------- ## ## ------------------- ##
AT_SETUP([Empty Language]) AT_SETUP([Bad start symbols])
m4_pushdef([AT_TEST],
[
AT_DATA([[input.y]], AT_DATA([[input.y]],
[[%output "input.c" [%%
%% $1
exp: exp; ])
]])
AT_BISON_CHECK([[input.y]], 1, [], AT_BISON_CHECK([[input.y]], 1, [],
[$2
])
])
AT_TEST(
[[exp: exp;]],
[[input.y: warning: 2 nonterminals useless in grammar [-Wother] [[input.y: warning: 2 nonterminals useless in grammar [-Wother]
input.y: warning: 2 rules useless in grammar [-Wother] input.y: warning: 2 rules useless in grammar [-Wother]
input.y:3.1-3: fatal error: start symbol exp does not derive any sentence input.y:2.1-3: error: start symbol exp does not derive any sentence]])
]])
AT_TEST(
[[%start exp;
exp: exp;]],
[[input.y: warning: 2 nonterminals useless in grammar [-Wother]
input.y: warning: 2 rules useless in grammar [-Wother]
input.y:2.8-10: error: start symbol exp does not derive any sentence]])
AT_TEST(
[[%start exp stmt;
exp: exp;
stmt: "stmt"]],
[[input.y: warning: 1 nonterminal useless in grammar [-Wother]
input.y: warning: 2 rules useless in grammar [-Wother]
input.y:2.8-10: error: start symbol exp does not derive any sentence]])
AT_TEST(
[[%start exp stmt;
exp: exp;
stmt: stmt]],
[[input.y: warning: 3 nonterminals useless in grammar [-Wother]
input.y: warning: 4 rules useless in grammar [-Wother]
input.y:2.8-10: error: start symbol exp does not derive any sentence
input.y:2.12-15: error: start symbol stmt does not derive any sentence]])
AT_TEST(
[[%start exp;
stmt: stmt]],
[[input.y:2.8-10: warning: symbol 'exp' is used, but is not defined as a token and has no rules [-Wother]
input.y: warning: 3 nonterminals useless in grammar [-Wother]
input.y: warning: 2 rules useless in grammar [-Wother]
input.y:2.8-10: error: start symbol exp does not derive any sentence]])
AT_TEST(
[[%token FOO;
%start FOO;
stmt: FOO]],
[[input.y:2.8-10: error: the start symbol FOO is a token]])
m4_popdef([AT_TEST])
AT_CLEANUP AT_CLEANUP