The computation of nullable is broken: it doesn't handle empty

RHS's properly.
* tests/torture.at (GNU AWK Grammar): New.
* tests/sets.at (Nullable): New.
* src/nullable.c (set_nullable): Instead of blindly looping over
`ritems', loop over the rules, and then over their rhs's.
Work around Autotest bugs.
* src/warshall.c (bitmatrix_print): Don't use `+--+' as table
frame, because Autotest understand lines starting with a `+' as
traces from the shell.  Then, they are not processed properly.
Admittedly an Autotest bug, but we don't have time to wait for
Autotest to catch up.
* tests/regression.at (Broken Closure): Adjust to the new table
frames.
Move to...
* tests/sets.at: here.
This commit is contained in:
Akim Demaille
2001-12-13 11:02:21 +00:00
parent cb581495e6
commit ed8e1f68e9
8 changed files with 600 additions and 124 deletions

View File

@@ -1,3 +1,25 @@
2001-12-13 Akim Demaille <akim@epita.fr>
The computation of nullable is broken: it doesn't handle empty
RHS's properly.
* tests/torture.at (GNU AWK Grammar): New.
* tests/sets.at (Nullable): New.
* src/nullable.c (set_nullable): Instead of blindly looping over
`ritems', loop over the rules, and then over their rhs's.
Work around Autotest bugs.
* src/warshall.c (bitmatrix_print): Don't use `+--+' as table
frame, because Autotest understand lines starting with a `+' as
traces from the shell. Then, they are not processed properly.
Admittedly an Autotest bug, but we don't have time to wait for
Autotest to catch up.
* tests/regression.at (Broken Closure): Adjust to the new table
frames.
Move to...
* tests/sets.at: here.
2001-12-13 Akim Demaille <akim@epita.fr>
* src/closure.c (closure): Use nrules instead of playing tricks

View File

@@ -46,70 +46,65 @@ nullable_print (FILE *out)
void
set_nullable (void)
{
short *r;
int ruleno;
short *s1;
short *s2;
shorts *p;
short *squeue;
short *rcount;
shorts **rsets;
shorts *relts;
short *squeue = XCALLOC (short, nvars);
short *rcount = XCALLOC (short, nrules + 1);
/* RITEM contains all the rules, including useless productions.
Hence we must allocate room for useless nonterminals too. */
shorts **rsets = XCALLOC (shorts *, nvars) - ntokens;
/* This is said to be more elements than we actually use.
Supposedly nitems - nrules is enough. But why take the risk? */
shorts *relts = XCALLOC (shorts, nitems + nvars + 1);
if (trace_flag)
fprintf (stderr, "Entering set_nullable\n");
nullable = XCALLOC (char, nvars) - ntokens;
squeue = XCALLOC (short, nvars);
s1 = s2 = squeue;
rcount = XCALLOC (short, nrules + 1);
/* RITEM contains all the rules, including useless productions.
Hence we must allocate room for useless nonterminals too. */
rsets = XCALLOC (shorts *, nvars + nuseless_nonterminals) - ntokens;
/* This is said to be more elements than we actually use.
Supposedly nitems - nrules is enough.
But why take the risk? */
relts = XCALLOC (shorts, nitems + nvars + nuseless_nonterminals + 1);
p = relts;
for (r = ritem; *r; ++r)
{
/* Walk RITEM to find (i), if there are any tokens in the
RHS, and (ii), to find RULENO. */
int ruleno;
int any_tokens = 0;
short *r1;
for (r1 = r; *r1 > 0; ++r1)
if (ISTOKEN (*r1))
any_tokens = 1;
ruleno = -*r1;
for (ruleno = 1; ruleno < nrules + 1; ++ruleno)
if (ritem[rule_table[ruleno].rhs] > 0)
{
/* This rule has a non empty RHS. */
short *r;
int any_tokens = 0;
for (r = ritem + rule_table[ruleno].rhs; *r > 0; ++r)
if (ISTOKEN (*r))
any_tokens = 1;
/* Examine the RHS of the rule. */
if (!any_tokens)
for (/* Nothing. */; *r > 0; ++r)
/* This rule has only nonterminals: schedule it for the second
pass. */
if (!any_tokens)
for (r = ritem + rule_table[ruleno].rhs; *r > 0; ++r)
{
rcount[ruleno]++;
p->next = rsets[*r];
p->value = ruleno;
rsets[*r] = p;
p++;
}
}
else
{
/* This rule has an empty RHS. */
assert (ritem[rule_table[ruleno].rhs] == -ruleno);
if (rule_table[ruleno].useful && !nullable[rule_table[ruleno].lhs])
{
rcount[ruleno]++;
p->next = rsets[*r];
p->value = ruleno;
rsets[*r] = p;
p++;
nullable[rule_table[ruleno].lhs] = 1;
*s2++ = rule_table[ruleno].lhs;
}
/* Examine its LHS. */
if (rule_table[ruleno].useful && !nullable[rule_table[ruleno].lhs])
{
nullable[rule_table[ruleno].lhs] = 1;
*s2++ = rule_table[ruleno].lhs;
}
}
}
while (s1 < s2)
for (p = rsets[*s1++]; p; p = p->next)
{
int ruleno = p->value;
ruleno = p->value;
if (--rcount[ruleno] == 0)
if (rule_table[ruleno].useful && !nullable[rule_table[ruleno].lhs])
{

View File

@@ -49,10 +49,10 @@ bitmatrix_print (const char *title, unsigned *matrix, size_t size)
putc ('\n', stderr);
/* Bar. */
fputs (" +", stderr);
fputs (" .", stderr);
for (i = 0; i < size; ++i)
putc ('-', stderr);
fputs ("+\n", stderr);
fputs (".\n", stderr);
/* Contents. */
for (i = 0; i < size; ++i)
@@ -64,10 +64,10 @@ bitmatrix_print (const char *title, unsigned *matrix, size_t size)
}
/* Bar. */
fputs (" +", stderr);
fputs (" `", stderr);
for (i = 0; i < size; ++i)
putc ('-', stderr);
fputs ("+\n", stderr);
fputs ("'\n", stderr);
/* End title. */
fprintf (stderr, "%s END\n\n", title);

View File

@@ -25,7 +25,7 @@ MAINTAINERCLEANFILES = Makefile.in $(TESTSUITE)
TESTSUITE_AT = \
testsuite.at \
output.at reduce.at calc.at torture.at regression.at
output.at sets.at reduce.at calc.at torture.at regression.at
TESTSUITE = $(srcdir)/testsuite
AUTOTEST = $(AUTOM4TE) --language=autotest

View File

@@ -605,78 +605,3 @@ AT_CLEANUP
AT_TEST_CPP_GUARD_H([input/input])
AT_TEST_CPP_GUARD_H([9foo])
## ---------------- ##
## Broken Closure. ##
## ---------------- ##
# TC was once broken during a massive `simplification' of the code.
# It resulted in bison dumping core on the following grammar (the
# computation of FIRSTS uses TC). It managed to produce a pretty
# exotic closure:
#
# TC: Input
#
# 01234567
# +--------+
# 0| 1 |
# 1| 1 |
# 2| 1 |
# 3| 1 |
# 4| 1 |
# 5| 1 |
# 6| 1|
# 7| |
# +--------+
#
# TC: Output
#
# 01234567
# +--------+
# 0| 1 |
# 1| 111 |
# 2| 111 |
# 3| 1111 |
# 4| 111 1 |
# 5| 111 1 |
# 6| 111 1|
# 7| 111 |
# +--------+
#
# instead of that below.
AT_SETUP([Broken Closure])
AT_DATA([input.y],
[[%%
a: b
b: c
c: d
d: e
e: f
f: g
g: h
h: 'h'
]])
AT_CHECK([bison --trace input.y 2>&1 |
sed -n '/^TC: Output BEGIN/,/^TC: Output END/p'],
[0],
[[TC: Output BEGIN
@&t@
01234567
+--------+
0| 1111111|
1| 111111|
2| 11111|
3| 1111|
4| 111|
5| 11|
6| 1|
7| |
+--------+
TC: Output END
]])
AT_CLEANUP

193
tests/sets.at Normal file
View File

@@ -0,0 +1,193 @@
# Exercising Bison Grammar Sets. -*- Autotest -*-
# Copyright 2001 Free Software Foundation, Inc.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
# 02111-1307, USA.
AT_BANNER([[Grammar Sets (Firsts etc.).]])
## ---------- ##
## Nullable. ##
## ---------- ##
AT_SETUP([Nullable])
# At some point, nullable had been smoking grass, and managed to say:
#
# Entering set_nullable
# NULLABLE
# 'e': yes
# (null): no
# ...
AT_DATA([[input.y]],
[[%%
e: 'e' | /* Nothing */;
]])
AT_CHECK([[bison --trace input.y]], [], [],
[[RITEM
'e' (rule 1)
(rule 2)
DERIVES
e derives
1: 'e' (rule 1)
2: (rule 2)
Entering set_nullable
NULLABLE
e: yes
TC: Input BEGIN
@&t@
0
.-.
0| |
`-'
TC: Input END
TC: Output BEGIN
@&t@
0
.-.
0| |
`-'
TC: Output END
FIRSTS
e firsts
4 (e)
FDERIVES
e derives
1: 'e'
2:
Processing state 0 (reached by $)
Closure: input
Closure: output
0: . 'e' (rule 1)
2: . (rule 2)
Entering new_itemsets, state = 0
Entering append_states, state = 0
Entering get_state, state = 0, symbol = 3 ('e')
Entering new_state, state = 0, symbol = 3 ('e')
Exiting get_state => 1
Processing state 1 (reached by 'e')
Closure: input
1: . (rule 1)
Closure: output
1: . (rule 1)
Entering new_itemsets, state = 1
Entering append_states, state = 1
transpose: input
0: @&t@
transpose: output
0: @&t@
]])
AT_CLEANUP
## ---------------- ##
## Broken Closure. ##
## ---------------- ##
# TC was once broken during a massive `simplification' of the code.
# It resulted in bison dumping core on the following grammar (the
# computation of FIRSTS uses TC). It managed to produce a pretty
# exotic closure:
#
# TC: Input
#
# 01234567
# +--------+
# 0| 1 |
# 1| 1 |
# 2| 1 |
# 3| 1 |
# 4| 1 |
# 5| 1 |
# 6| 1|
# 7| |
# +--------+
#
# TC: Output
#
# 01234567
# +--------+
# 0| 1 |
# 1| 111 |
# 2| 111 |
# 3| 1111 |
# 4| 111 1 |
# 5| 111 1 |
# 6| 111 1|
# 7| 111 |
# +--------+
#
# instead of that below.
AT_SETUP([Broken Closure])
AT_DATA([input.y],
[[%%
a: b
b: c
c: d
d: e
e: f
f: g
g: h
h: 'h'
]])
AT_CHECK([bison --trace input.y 2>&1 |
sed -n '/^TC: Output BEGIN/,/^TC: Output END/p'],
[0],
[[TC: Output BEGIN
@&t@
01234567
.--------.
0| 1111111|
1| 111111|
2| 11111|
3| 1111|
4| 111|
5| 11|
6| 1|
7| |
`--------'
TC: Output END
]])
AT_CLEANUP

View File

@@ -26,6 +26,7 @@ AT_INIT
AT_TESTED([bison])
m4_include([output.at])
m4_include([sets.at])
m4_include([reduce.at])
m4_include([calc.at])
m4_include([torture.at])

View File

@@ -114,3 +114,343 @@ AT_CHECK([input 900], 0, [], [ignore])
AT_CHECK([input 10000], 1, [], [ignore])
AT_CLEANUP
## ----------------- ##
## GNU AWK Grammar. ##
## ----------------- ##
AT_SETUP([GNU AWK Grammar])
# We have been careful to strip all the actions excepts the
# mid-rule actions. We rely on %expect to check that there are
# indeed 65 SR conflicts.
#
# Bison was once wrong, due to an incorrect computation of nullable.
# It reported 485 SR conflicts!
AT_DATA([[input.y]],
[[%expect 65
%token FUNC_CALL NAME REGEXP
%token ERROR
%token YNUMBER YSTRING
%token RELOP APPEND_OP
%token ASSIGNOP MATCHOP NEWLINE CONCAT_OP
%token LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE
%token LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE
%token LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION
%token LEX_GETLINE LEX_NEXTFILE
%token LEX_IN
%token LEX_AND LEX_OR INCREMENT DECREMENT
%token LEX_BUILTIN LEX_LENGTH
/* Lowest to highest */
%right ASSIGNOP
%right '?' ':'
%left LEX_OR
%left LEX_AND
%left LEX_GETLINE
%nonassoc LEX_IN
%left FUNC_CALL LEX_BUILTIN LEX_LENGTH
%nonassoc ','
%nonassoc MATCHOP
%nonassoc RELOP '<' '>' '|' APPEND_OP TWOWAYIO
%left CONCAT_OP
%left YSTRING YNUMBER
%left '+' '-'
%left '*' '/' '%'
%right '!' UNARY
%right '^'
%left INCREMENT DECREMENT
%left '$'
%left '(' ')'
%%
start
: opt_nls program opt_nls
;
program
: rule
| program rule
| error
| program error
| /* empty */
;
rule
: LEX_BEGIN {} action
| LEX_END {} action
| LEX_BEGIN statement_term
| LEX_END statement_term
| pattern action
| action
| pattern statement_term
| function_prologue function_body
;
func_name
: NAME
| FUNC_CALL
| lex_builtin
;
lex_builtin
: LEX_BUILTIN
| LEX_LENGTH
;
function_prologue
: LEX_FUNCTION {} func_name '(' opt_param_list r_paren opt_nls
;
function_body
: l_brace statements r_brace opt_semi opt_nls
| l_brace r_brace opt_semi opt_nls
;
pattern
: exp
| exp ',' exp
;
regexp
/*
* In this rule, want_regexp tells yylex that the next thing
* is a regexp so it should read up to the closing slash.
*/
: '/' {} REGEXP '/'
;
action
: l_brace statements r_brace opt_semi opt_nls
| l_brace r_brace opt_semi opt_nls
;
statements
: statement
| statements statement
| error
| statements error
;
statement_term
: nls
| semi opt_nls
;
statement
: semi opt_nls
| l_brace r_brace
| l_brace statements r_brace
| if_statement
| LEX_WHILE '(' exp r_paren opt_nls statement
| LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls
| LEX_FOR '(' NAME LEX_IN NAME r_paren opt_nls statement
| LEX_FOR '(' opt_exp semi opt_nls exp semi opt_nls opt_exp r_paren opt_nls statement
| LEX_FOR '(' opt_exp semi opt_nls semi opt_nls opt_exp r_paren opt_nls statement
| LEX_BREAK statement_term
| LEX_CONTINUE statement_term
| print '(' expression_list r_paren output_redir statement_term
| print opt_rexpression_list output_redir statement_term
| LEX_NEXT statement_term
| LEX_NEXTFILE statement_term
| LEX_EXIT opt_exp statement_term
| LEX_RETURN {} opt_exp statement_term
| LEX_DELETE NAME '[' expression_list ']' statement_term
| LEX_DELETE NAME statement_term
| exp statement_term
;
print
: LEX_PRINT
| LEX_PRINTF
;
if_statement
: LEX_IF '(' exp r_paren opt_nls statement
| LEX_IF '(' exp r_paren opt_nls statement
LEX_ELSE opt_nls statement
;
nls
: NEWLINE
| nls NEWLINE
;
opt_nls
: /* empty */
| nls
;
input_redir
: /* empty */
| '<' simp_exp
;
output_redir
: /* empty */
| '>' exp
| APPEND_OP exp
| '|' exp
| TWOWAYIO exp
;
opt_param_list
: /* empty */
| param_list
;
param_list
: NAME
| param_list comma NAME
| error
| param_list error
| param_list comma error
;
/* optional expression, as in for loop */
opt_exp
: /* empty */
| exp
;
opt_rexpression_list
: /* empty */
| rexpression_list
;
rexpression_list
: rexp
| rexpression_list comma rexp
| error
| rexpression_list error
| rexpression_list error rexp
| rexpression_list comma error
;
opt_expression_list
: /* empty */
| expression_list
;
expression_list
: exp
| expression_list comma exp
| error
| expression_list error
| expression_list error exp
| expression_list comma error
;
/* Expressions, not including the comma operator. */
exp : variable ASSIGNOP {} exp
| '(' expression_list r_paren LEX_IN NAME
| exp '|' LEX_GETLINE opt_variable
| exp TWOWAYIO LEX_GETLINE opt_variable
| LEX_GETLINE opt_variable input_redir
| exp LEX_AND exp
| exp LEX_OR exp
| exp MATCHOP exp
| regexp
| '!' regexp %prec UNARY
| exp LEX_IN NAME
| exp RELOP exp
| exp '<' exp
| exp '>' exp
| exp '?' exp ':' exp
| simp_exp
| exp simp_exp %prec CONCAT_OP
;
rexp
: variable ASSIGNOP {} rexp
| rexp LEX_AND rexp
| rexp LEX_OR rexp
| LEX_GETLINE opt_variable input_redir
| regexp
| '!' regexp %prec UNARY
| rexp MATCHOP rexp
| rexp LEX_IN NAME
| rexp RELOP rexp
| rexp '?' rexp ':' rexp
| simp_exp
| rexp simp_exp %prec CONCAT_OP
;
simp_exp
: non_post_simp_exp
/* Binary operators in order of decreasing precedence. */
| simp_exp '^' simp_exp
| simp_exp '*' simp_exp
| simp_exp '/' simp_exp
| simp_exp '%' simp_exp
| simp_exp '+' simp_exp
| simp_exp '-' simp_exp
| variable INCREMENT
| variable DECREMENT
;
non_post_simp_exp
: '!' simp_exp %prec UNARY
| '(' exp r_paren
| LEX_BUILTIN
'(' opt_expression_list r_paren
| LEX_LENGTH '(' opt_expression_list r_paren
| LEX_LENGTH
| FUNC_CALL '(' opt_expression_list r_paren
| variable
| INCREMENT variable
| DECREMENT variable
| YNUMBER
| YSTRING
| '-' simp_exp %prec UNARY
| '+' simp_exp %prec UNARY
;
opt_variable
: /* empty */
| variable
;
variable
: NAME
| NAME '[' expression_list ']'
| '$' non_post_simp_exp
;
l_brace
: '{' opt_nls
;
r_brace
: '}' opt_nls
;
r_paren
: ')'
;
opt_semi
: /* empty */
| semi
;
semi
: ';'
;
comma : ',' opt_nls
;
%%
]])
# Pass plenty of options, to exercise plenty of code, even if we
# don't actually check the output. But SEGV is watching us, and
# so might do dmalloc.
AT_CHECK([[bison --verbose --defines input.y]])
AT_CLEANUP