mirror of
https://git.savannah.gnu.org/git/bison.git
synced 2026-03-09 04:13:03 +00:00
multistart: turn start symbols into rules on $accept
Now that the parser can read several start symbols, let's process them, and create the corresponding rules. * src/parse-gram.y (grammar_declaration): Accept a list of start symbols. * src/reader.h, src/reader.c (grammar_start_symbol_set): Rename as... (grammar_start_symbols_set): this. * src/reader.h, src/reader.c (start_flag): Replace with... (start_symbols): this. * src/reader.c (grammar_start_symbols_set): Build a list of start symbols. (switching_token, create_start_rules): New. (check_and_convert_grammar): Use them to turn the list of start symbols into a set of rules. * src/reduce.c (nonterminals_reduce): Don't complain about $accept, it's an internal detail. (reduce_grammar): Complain about all the start symbols that don't derive sentences. * src/symtab.c (startsymbol, startsymbol_loc): Remove, replaced by start_symbols. symbols_pack): Move the check about the start symbols to... * src/symlist.c (check_start_symbols): here. Adjust to multiple start symbols. * tests/reduce.at (Empty Language): Generalize into... (Bad start symbols): this.
This commit is contained in:
11
TODO
11
TODO
@@ -657,6 +657,17 @@ happen with yy_start: stmt | expr). Then adjust the skeletons so that this
|
||||
initial token (YY_START_STMT, YY_START_EXPR) be shifted first in the
|
||||
corresponding parse function.
|
||||
|
||||
*** Number of useless symbols
|
||||
AT_TEST(
|
||||
[[%start exp;
|
||||
exp: exp;]],
|
||||
[[input.y: warning: 2 nonterminals useless in grammar [-Wother]
|
||||
input.y: warning: 2 rules useless in grammar [-Wother]
|
||||
input.y:2.8-10: error: start symbol exp does not derive any sentence]])
|
||||
|
||||
We should say "1 nonterminal": the other one is $accept, which should not
|
||||
participate in the count.
|
||||
|
||||
** %include
|
||||
This is a popular demand. We already made many changes in the parser that
|
||||
should make this reasonably easy to implement.
|
||||
|
||||
@@ -381,9 +381,9 @@ params:
|
||||
|
||||
grammar_declaration:
|
||||
symbol_declaration
|
||||
| "%start" symbol
|
||||
| "%start" symbols.1
|
||||
{
|
||||
grammar_start_symbol_set ($2, @2);
|
||||
grammar_start_symbols_set ($2);
|
||||
}
|
||||
| code_props_type "{...}" generic_symlist
|
||||
{
|
||||
|
||||
119
src/reader.c
119
src/reader.c
@@ -21,7 +21,9 @@
|
||||
#include <config.h>
|
||||
#include "system.h"
|
||||
|
||||
#include <c-ctype.h>
|
||||
#include <quote.h>
|
||||
#include <vasnprintf.h>
|
||||
|
||||
#include "complain.h"
|
||||
#include "conflicts.h"
|
||||
@@ -40,7 +42,7 @@ static void prepare_percent_define_front_end_variables (void);
|
||||
static void check_and_convert_grammar (void);
|
||||
|
||||
static symbol_list *grammar = NULL;
|
||||
static bool start_flag = false;
|
||||
symbol_list *start_symbols = NULL;
|
||||
merger_list *merge_functions;
|
||||
|
||||
/* Was %union seen? */
|
||||
@@ -54,16 +56,9 @@ bool default_prec = true;
|
||||
`-----------------------*/
|
||||
|
||||
void
|
||||
grammar_start_symbol_set (symbol *sym, location loc)
|
||||
grammar_start_symbols_set (symbol_list *syms)
|
||||
{
|
||||
if (start_flag)
|
||||
complain (&loc, complaint, _("multiple %s declarations"), "%start");
|
||||
else
|
||||
{
|
||||
start_flag = true;
|
||||
startsymbol = sym;
|
||||
startsymbol_loc = loc;
|
||||
}
|
||||
start_symbols = symbol_list_append (start_symbols, syms);
|
||||
}
|
||||
|
||||
|
||||
@@ -791,6 +786,95 @@ create_start_rule (symbol *swtok, symbol *start)
|
||||
grammar = initial_rule;
|
||||
}
|
||||
|
||||
/* Fetch (or create) a token "YY_PARSE_foo" for start symbol "foo".
|
||||
|
||||
We don't use the simple "YY_FOO" because (i) we might get clashes
|
||||
with some of our symbols (e.g., cast => YY_CAST), and (ii) upcasing
|
||||
introduces possible clashes between terminal FOO and nonterminal
|
||||
foo. */
|
||||
symbol *
|
||||
switching_token (const symbol *start)
|
||||
{
|
||||
char buf[100];
|
||||
size_t len = sizeof buf;
|
||||
char *name
|
||||
= asnprintf (buf, &len,
|
||||
"YY_PARSE_%s", start->alias ? start->alias->tag : start->tag);
|
||||
if (!name)
|
||||
xalloc_die ();
|
||||
// Setting the location ensures deterministic symbol numbers.
|
||||
symbol *res = symbol_get (name, start->location);
|
||||
if (name != buf)
|
||||
free (name);
|
||||
symbol_class_set (res, token_sym, start->location, false);
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Create the start rules in reverse order, since they are inserted at
|
||||
the top of the grammar. That way the rules follow the order of
|
||||
declaration to %start. */
|
||||
|
||||
static void
|
||||
create_multiple_start_rules (symbol_list *start_syms)
|
||||
{
|
||||
if (start_syms)
|
||||
{
|
||||
create_multiple_start_rules (start_syms->next);
|
||||
assert (start_syms->content_type == SYMLIST_SYMBOL);
|
||||
symbol *start = start_syms->content.sym;
|
||||
symbol *swtok = switching_token (start);
|
||||
create_start_rule (swtok, start);
|
||||
}
|
||||
}
|
||||
|
||||
/* For each start symbol "foo", create the rule "$accept: YY_FOO
|
||||
foo $end". */
|
||||
static void
|
||||
create_start_rules (void)
|
||||
{
|
||||
if (!start_symbols)
|
||||
{
|
||||
symbol *start = find_start_symbol ();
|
||||
start_symbols = symbol_list_sym_new (start, start->location);
|
||||
}
|
||||
|
||||
const bool several = start_symbols->next;
|
||||
if (several)
|
||||
create_multiple_start_rules (start_symbols);
|
||||
else
|
||||
{
|
||||
symbol *start = start_symbols->content.sym;
|
||||
create_start_rule (NULL, start);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
check_start_symbols (void)
|
||||
{
|
||||
// Sanity checks on the start symbols.
|
||||
for (symbol_list *list = start_symbols; list; list = list->next)
|
||||
{
|
||||
const symbol *start = list->content.sym;
|
||||
if (start->content->class == unknown_sym)
|
||||
{
|
||||
complain (&start->location, complaint,
|
||||
_("the start symbol %s is undefined"),
|
||||
start->tag);
|
||||
// I claim this situation is unreachable. This is caught
|
||||
// before, and we get "symbol 'foo' is used, but is not
|
||||
// defined as a token and has no rules".
|
||||
abort ();
|
||||
}
|
||||
if (start->content->class == token_sym)
|
||||
complain (&start->location, complaint,
|
||||
_("the start symbol %s is a token"),
|
||||
start->tag);
|
||||
}
|
||||
if (complaint_status == status_complaint)
|
||||
exit (EXIT_FAILURE);
|
||||
}
|
||||
|
||||
|
||||
/*-------------------------------------------------------------.
|
||||
| Check the grammar that has just been read, and convert it to |
|
||||
| internal form. |
|
||||
@@ -818,19 +902,12 @@ check_and_convert_grammar (void)
|
||||
}
|
||||
}
|
||||
|
||||
/* Insert the initial rule(s). */
|
||||
create_start_rules ();
|
||||
|
||||
/* Report any undefined symbols and consider them nonterminals. */
|
||||
symbols_check_defined ();
|
||||
|
||||
/* Find the start symbol if no %start. */
|
||||
if (!start_flag)
|
||||
{
|
||||
symbol *start = find_start_symbol ();
|
||||
grammar_start_symbol_set (start, start->location);
|
||||
}
|
||||
|
||||
/* Insert the initial rule. */
|
||||
create_start_rule (NULL, startsymbol);
|
||||
|
||||
if (SYMBOL_NUMBER_MAXIMUM - nnterms < ntokens)
|
||||
complain (NULL, fatal, "too many symbols in input grammar (limit is %d)",
|
||||
SYMBOL_NUMBER_MAXIMUM);
|
||||
@@ -840,6 +917,8 @@ check_and_convert_grammar (void)
|
||||
/* Assign the symbols their symbol numbers. */
|
||||
symbols_pack ();
|
||||
|
||||
check_start_symbols ();
|
||||
|
||||
/* Scan rule actions after invoking symbol_check_alias_consistency
|
||||
(in symbols_pack above) so that token types are set correctly
|
||||
before the rule action type checking.
|
||||
|
||||
13
src/reader.h
13
src/reader.h
@@ -38,7 +38,18 @@ typedef struct merger_list
|
||||
void free_merger_functions (void);
|
||||
extern merger_list *merge_functions;
|
||||
|
||||
void grammar_start_symbol_set (symbol *sym, location loc);
|
||||
/* List of the start symbols. */
|
||||
extern symbol_list *start_symbols;
|
||||
|
||||
/* Fetch (or create) a token "YY_PARSE_foo" for start symbol "foo".
|
||||
|
||||
We don't use the simple "YY_FOO" because (i) we might get clashes
|
||||
with some of our symbols (e.g., cast => YY_CAST), and (ii) upcasing
|
||||
introduces possible clashes between terminal FOO and nonterminal
|
||||
foo. */
|
||||
symbol *switching_token (const symbol *start);
|
||||
|
||||
void grammar_start_symbols_set (symbol_list *syms);
|
||||
|
||||
void grammar_current_rule_begin (symbol *lhs, location loc,
|
||||
named_ref *lhs_named_ref);
|
||||
|
||||
19
src/reduce.c
19
src/reduce.c
@@ -275,7 +275,8 @@ nonterminals_reduce (void)
|
||||
if (!bitset_test (V, i))
|
||||
{
|
||||
nterm_map[i - ntokens] = n++;
|
||||
if (symbols[i]->content->status != used)
|
||||
if (symbols[i]->content->status != used
|
||||
&& symbols[i] != acceptsymbol)
|
||||
complain (&symbols[i]->location, Wother,
|
||||
_("nonterminal useless in grammar: %s"),
|
||||
symbols[i]->tag);
|
||||
@@ -381,10 +382,18 @@ reduce_grammar (void)
|
||||
{
|
||||
reduce_print ();
|
||||
|
||||
if (!bitset_test (N, acceptsymbol->content->number - ntokens))
|
||||
complain (&startsymbol_loc, fatal,
|
||||
_("start symbol %s does not derive any sentence"),
|
||||
startsymbol->tag);
|
||||
// Check that start symbols have non-empty languages.
|
||||
bool failure = false;
|
||||
for (symbol_list *list = start_symbols; list; list = list->next)
|
||||
if (!bitset_test (N, list->content.sym->content->number - ntokens))
|
||||
{
|
||||
failure = true;
|
||||
complain (&list->sym_loc, complaint,
|
||||
_("start symbol %s does not derive any sentence"),
|
||||
list->content.sym->tag);
|
||||
}
|
||||
if (failure)
|
||||
exit (EXIT_FAILURE);
|
||||
|
||||
/* First reduce the nonterminals, as they renumber themselves in the
|
||||
whole grammar. If you change the order, nonterms would be
|
||||
|
||||
11
src/symtab.c
11
src/symtab.c
@@ -60,8 +60,6 @@ symbol *errtoken = NULL;
|
||||
symbol *undeftoken = NULL;
|
||||
symbol *eoftoken = NULL;
|
||||
symbol *acceptsymbol = NULL;
|
||||
symbol *startsymbol = NULL;
|
||||
location startsymbol_loc;
|
||||
|
||||
/* Precedence relation graph. */
|
||||
static symgraph **prec_nodes;
|
||||
@@ -1146,15 +1144,6 @@ symbols_pack (void)
|
||||
|
||||
symbols_token_translations_init ();
|
||||
|
||||
if (startsymbol->content->class == unknown_sym)
|
||||
complain (&startsymbol_loc, fatal,
|
||||
_("the start symbol %s is undefined"),
|
||||
startsymbol->tag);
|
||||
else if (startsymbol->content->class == token_sym)
|
||||
complain (&startsymbol_loc, fatal,
|
||||
_("the start symbol %s is a token"),
|
||||
startsymbol->tag);
|
||||
|
||||
// If some user tokens are internationalized, the internal ones
|
||||
// should be too.
|
||||
if (has_translations ())
|
||||
|
||||
@@ -247,11 +247,6 @@ extern symbol *eoftoken;
|
||||
$accept: start-symbol $end */
|
||||
extern symbol *acceptsymbol;
|
||||
|
||||
/** The user start symbol. */
|
||||
extern symbol *startsymbol;
|
||||
/** The location of the \c \%start declaration. */
|
||||
extern location startsymbol_loc;
|
||||
|
||||
/** Whether a symbol declared with a type tag. */
|
||||
extern bool tag_seen;
|
||||
|
||||
|
||||
@@ -445,23 +445,69 @@ AT_CLEANUP
|
||||
|
||||
|
||||
|
||||
## ---------------- ##
|
||||
## Empty Language. ##
|
||||
## ---------------- ##
|
||||
## ------------------- ##
|
||||
## Bad start symbols. ##
|
||||
## ------------------- ##
|
||||
|
||||
AT_SETUP([Empty Language])
|
||||
AT_SETUP([Bad start symbols])
|
||||
|
||||
m4_pushdef([AT_TEST],
|
||||
[
|
||||
AT_DATA([[input.y]],
|
||||
[[%output "input.c"
|
||||
%%
|
||||
exp: exp;
|
||||
]])
|
||||
[%%
|
||||
$1
|
||||
])
|
||||
|
||||
AT_BISON_CHECK([[input.y]], 1, [],
|
||||
[$2
|
||||
])
|
||||
])
|
||||
|
||||
AT_TEST(
|
||||
[[exp: exp;]],
|
||||
[[input.y: warning: 2 nonterminals useless in grammar [-Wother]
|
||||
input.y: warning: 2 rules useless in grammar [-Wother]
|
||||
input.y:3.1-3: fatal error: start symbol exp does not derive any sentence
|
||||
]])
|
||||
input.y:2.1-3: error: start symbol exp does not derive any sentence]])
|
||||
|
||||
AT_TEST(
|
||||
[[%start exp;
|
||||
exp: exp;]],
|
||||
[[input.y: warning: 2 nonterminals useless in grammar [-Wother]
|
||||
input.y: warning: 2 rules useless in grammar [-Wother]
|
||||
input.y:2.8-10: error: start symbol exp does not derive any sentence]])
|
||||
|
||||
AT_TEST(
|
||||
[[%start exp stmt;
|
||||
exp: exp;
|
||||
stmt: "stmt"]],
|
||||
[[input.y: warning: 1 nonterminal useless in grammar [-Wother]
|
||||
input.y: warning: 2 rules useless in grammar [-Wother]
|
||||
input.y:2.8-10: error: start symbol exp does not derive any sentence]])
|
||||
|
||||
AT_TEST(
|
||||
[[%start exp stmt;
|
||||
exp: exp;
|
||||
stmt: stmt]],
|
||||
[[input.y: warning: 3 nonterminals useless in grammar [-Wother]
|
||||
input.y: warning: 4 rules useless in grammar [-Wother]
|
||||
input.y:2.8-10: error: start symbol exp does not derive any sentence
|
||||
input.y:2.12-15: error: start symbol stmt does not derive any sentence]])
|
||||
|
||||
AT_TEST(
|
||||
[[%start exp;
|
||||
stmt: stmt]],
|
||||
[[input.y:2.8-10: warning: symbol 'exp' is used, but is not defined as a token and has no rules [-Wother]
|
||||
input.y: warning: 3 nonterminals useless in grammar [-Wother]
|
||||
input.y: warning: 2 rules useless in grammar [-Wother]
|
||||
input.y:2.8-10: error: start symbol exp does not derive any sentence]])
|
||||
|
||||
AT_TEST(
|
||||
[[%token FOO;
|
||||
%start FOO;
|
||||
stmt: FOO]],
|
||||
[[input.y:2.8-10: error: the start symbol FOO is a token]])
|
||||
|
||||
m4_popdef([AT_TEST])
|
||||
|
||||
AT_CLEANUP
|
||||
|
||||
|
||||
Reference in New Issue
Block a user