The computation of nullable is broken: it doesn't handle empty

RHS's properly. * tests/torture.at (GNU AWK Grammar): New. * tests/sets.at (Nullable): New. * src/nullable.c (set_nullable): Instead of blindly looping over `ritems', loop over the rules, and then over their rhs's. Work around Autotest bugs. * src/warshall.c (bitmatrix_print): Don't use `+--+' as table frame, because Autotest understand lines starting with a `+' as traces from the shell. Then, they are not processed properly. Admittedly an Autotest bug, but we don't have time to wait for Autotest to catch up. * tests/regression.at (Broken Closure): Adjust to the new table frames. Move to... * tests/sets.at: here.
2026-04-24 02:29:43 +00:00 · 2001-12-13 11:02:21 +00:00
parent cb581495e6
commit ed8e1f68e9
8 changed files with 600 additions and 124 deletions
@@ -1,3 +1,25 @@
+2001-12-13  Akim Demaille  <akim@epita.fr>
+
+	The computation of nullable is broken: it doesn't handle empty
+	RHS's properly.
+
+	* tests/torture.at (GNU AWK Grammar): New.
+	* tests/sets.at (Nullable): New.
+	* src/nullable.c (set_nullable): Instead of blindly looping over
+	`ritems', loop over the rules, and then over their rhs's.
+
+	Work around Autotest bugs.
+
+	* src/warshall.c (bitmatrix_print): Don't use `+--+' as table
+	frame, because Autotest understand lines starting with a `+' as
+	traces from the shell.  Then, they are not processed properly.
+	Admittedly an Autotest bug, but we don't have time to wait for
+	Autotest to catch up.
+	* tests/regression.at (Broken Closure): Adjust to the new table
+	frames.
+	Move to...
+	* tests/sets.at: here.
+
 2001-12-13  Akim Demaille  <akim@epita.fr>

 	* src/closure.c (closure): Use nrules instead of playing tricks
@@ -46,70 +46,65 @@ nullable_print (FILE *out)
 void
 set_nullable (void)
 {
-  short *r;
+  int ruleno;
  short *s1;
  short *s2;
  shorts *p;

-  short *squeue;
-  short *rcount;
-  shorts **rsets;
-  shorts *relts;
+  short *squeue = XCALLOC (short, nvars);
+  short *rcount = XCALLOC (short, nrules + 1);
+  /* RITEM contains all the rules, including useless productions.
+     Hence we must allocate room for useless nonterminals too.  */
+  shorts **rsets = XCALLOC (shorts *, nvars) - ntokens;
+  /* This is said to be more elements than we actually use.
+     Supposedly nitems - nrules is enough.  But why take the risk?  */
+  shorts *relts = XCALLOC (shorts, nitems + nvars + 1);

  if (trace_flag)
    fprintf (stderr, "Entering set_nullable\n");

  nullable = XCALLOC (char, nvars) - ntokens;

-  squeue = XCALLOC (short, nvars);
  s1 = s2 = squeue;
-
-  rcount = XCALLOC (short, nrules + 1);
-
-  /* RITEM contains all the rules, including useless productions.
-     Hence we must allocate room for useless nonterminals too.  */
-  rsets = XCALLOC (shorts *, nvars + nuseless_nonterminals) - ntokens;
-  /* This is said to be more elements than we actually use.
-     Supposedly nitems - nrules is enough.
-     But why take the risk?  */
-  relts = XCALLOC (shorts, nitems + nvars + nuseless_nonterminals + 1);
  p = relts;

-  for (r = ritem; *r; ++r)
-    {
-      /* Walk RITEM to find (i), if there are any tokens in the
-	 RHS, and (ii), to find RULENO. */
-      int ruleno;
-      int any_tokens = 0;
-      short *r1;
-      for (r1 = r; *r1 > 0; ++r1)
-	if (ISTOKEN (*r1))
-	  any_tokens = 1;
-      ruleno = -*r1;
+  for (ruleno = 1; ruleno < nrules + 1; ++ruleno)
+    if (ritem[rule_table[ruleno].rhs] > 0)
+      {
+	/* This rule has a non empty RHS. */
+	short *r;
+	int any_tokens = 0;
+	for (r = ritem + rule_table[ruleno].rhs; *r > 0; ++r)
+	  if (ISTOKEN (*r))
+	    any_tokens = 1;

-      /* Examine the RHS of the rule.  */
-      if (!any_tokens)
-	for (/* Nothing. */; *r > 0; ++r)
+	/* This rule has only nonterminals: schedule it for the second
+	   pass.  */
+	if (!any_tokens)
+	  for (r = ritem + rule_table[ruleno].rhs; *r > 0; ++r)
+	    {
+	      rcount[ruleno]++;
+	      p->next = rsets[*r];
+	      p->value = ruleno;
+	      rsets[*r] = p;
+	      p++;
+	    }
+      }
+    else
+      {
+	/* This rule has an empty RHS. */
+	assert (ritem[rule_table[ruleno].rhs] == -ruleno);
+	if (rule_table[ruleno].useful && !nullable[rule_table[ruleno].lhs])
 	  {
-	    rcount[ruleno]++;
-	    p->next = rsets[*r];
-	    p->value = ruleno;
-	    rsets[*r] = p;
-	    p++;
+	    nullable[rule_table[ruleno].lhs] = 1;
+	    *s2++ = rule_table[ruleno].lhs;
 	  }
-
-      /* Examine its LHS. */
-      if (rule_table[ruleno].useful && !nullable[rule_table[ruleno].lhs])
-	{
-	  nullable[rule_table[ruleno].lhs] = 1;
-	  *s2++ = rule_table[ruleno].lhs;
-        }
-    }
+      }

  while (s1 < s2)
    for (p = rsets[*s1++]; p; p = p->next)
      {
-	int ruleno = p->value;
+	ruleno = p->value;
 	if (--rcount[ruleno] == 0)
 	  if (rule_table[ruleno].useful && !nullable[rule_table[ruleno].lhs])
 	    {
@@ -49,10 +49,10 @@ bitmatrix_print (const char *title, unsigned *matrix, size_t size)
  putc ('\n', stderr);

  /* Bar. */
-  fputs ("  +", stderr);
+  fputs ("  .", stderr);
  for (i = 0; i < size; ++i)
    putc ('-', stderr);
-  fputs ("+\n", stderr);
+  fputs (".\n", stderr);

  /* Contents. */
  for (i = 0; i < size; ++i)
@@ -64,10 +64,10 @@ bitmatrix_print (const char *title, unsigned *matrix, size_t size)
    }

  /* Bar. */
-  fputs ("  +", stderr);
+  fputs ("  `", stderr);
  for (i = 0; i < size; ++i)
    putc ('-', stderr);
-  fputs ("+\n", stderr);
+  fputs ("'\n", stderr);

  /* End title. */
  fprintf (stderr, "%s END\n\n", title);
@@ -25,7 +25,7 @@ MAINTAINERCLEANFILES = Makefile.in $(TESTSUITE)

 TESTSUITE_AT = \
 	testsuite.at \
-	output.at reduce.at calc.at torture.at regression.at
+	output.at sets.at reduce.at calc.at torture.at regression.at
 TESTSUITE = $(srcdir)/testsuite

 AUTOTEST = $(AUTOM4TE) --language=autotest
@@ -605,78 +605,3 @@ AT_CLEANUP

 AT_TEST_CPP_GUARD_H([input/input])
 AT_TEST_CPP_GUARD_H([9foo])
-
-
-## ---------------- ##
-## Broken Closure.  ##
-## ---------------- ##
-
-# TC was once broken during a massive `simplification' of the code.
-# It resulted in bison dumping core on the following grammar (the
-# computation of FIRSTS uses TC).  It managed to produce a pretty
-# exotic closure:
-#
-# TC: Input
-#
-#    01234567
-#   +--------+
-#  0| 1      |
-#  1|  1     |
-#  2|   1    |
-#  3|    1   |
-#  4|     1  |
-#  5|      1 |
-#  6|       1|
-#  7|        |
-#   +--------+
-#
-# TC: Output
-#
-#    01234567
-#   +--------+
-#  0| 1      |
-#  1| 111    |
-#  2| 111    |
-#  3| 1111   |
-#  4| 111 1  |
-#  5| 111  1 |
-#  6| 111   1|
-#  7| 111    |
-#   +--------+
-#
-# instead of that below.
-
-AT_SETUP([Broken Closure])
-
-AT_DATA([input.y],
-[[%%
-a: b
-b: c
-c: d
-d: e
-e: f
-f: g
-g: h
-h: 'h'
-]])
-
-AT_CHECK([bison --trace input.y 2>&1 |
-          sed -n '/^TC: Output BEGIN/,/^TC: Output END/p'],
-         [0],
-[[TC: Output BEGIN
-           @&t@
-   01234567
-  +--------+
- 0| 1111111|
- 1|  111111|
- 2|   11111|
- 3|    1111|
- 4|     111|
- 5|      11|
- 6|       1|
- 7|        |
-  +--------+
-TC: Output END
-]])
-
-AT_CLEANUP
@@ -0,0 +1,193 @@
+# Exercising Bison Grammar Sets.                      -*- Autotest -*-
+# Copyright 2001 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+# 02111-1307, USA.
+
+AT_BANNER([[Grammar Sets (Firsts etc.).]])
+
+
+## ---------- ##
+## Nullable.  ##
+## ---------- ##
+
+AT_SETUP([Nullable])
+
+# At some point, nullable had been smoking grass, and managed to say:
+#
+# Entering set_nullable
+# NULLABLE
+#         'e': yes
+#         (null): no
+# ...
+
+AT_DATA([[input.y]],
+[[%%
+e: 'e' | /* Nothing */;
+]])
+
+AT_CHECK([[bison --trace input.y]], [], [],
+[[RITEM
+  'e'  (rule 1)
+  (rule 2)
+
+
+DERIVES
+	e derives
+		1: 'e' (rule 1)
+		2: (rule 2)
+
+
+Entering set_nullable
+NULLABLE
+	e: yes
+
+
+TC: Input BEGIN
+    @&t@
+   0
+  .-.
+ 0| |
+  `-'
+TC: Input END
+
+TC: Output BEGIN
+    @&t@
+   0
+  .-.
+ 0| |
+  `-'
+TC: Output END
+
+FIRSTS
+	e firsts
+		4 (e)
+
+
+FDERIVES
+	e derives
+		1: 'e'
+		2:
+
+
+Processing state 0 (reached by $)
+Closure: input
+
+
+Closure: output
+   0: . 'e'  (rule 1)
+   2: .  (rule 2)
+
+
+Entering new_itemsets, state = 0
+Entering append_states, state = 0
+Entering get_state, state = 0, symbol = 3 ('e')
+Entering new_state, state = 0, symbol = 3 ('e')
+Exiting get_state => 1
+Processing state 1 (reached by 'e')
+Closure: input
+   1: .  (rule 1)
+
+
+Closure: output
+   1: .  (rule 1)
+
+
+Entering new_itemsets, state = 1
+Entering append_states, state = 1
+transpose: input
+  0: @&t@
+
+transpose: output
+  0: @&t@
+
+]])
+
+AT_CLEANUP
+
+
+## ---------------- ##
+## Broken Closure.  ##
+## ---------------- ##
+
+# TC was once broken during a massive `simplification' of the code.
+# It resulted in bison dumping core on the following grammar (the
+# computation of FIRSTS uses TC).  It managed to produce a pretty
+# exotic closure:
+#
+# TC: Input
+#
+#    01234567
+#   +--------+
+#  0| 1      |
+#  1|  1     |
+#  2|   1    |
+#  3|    1   |
+#  4|     1  |
+#  5|      1 |
+#  6|       1|
+#  7|        |
+#   +--------+
+#
+# TC: Output
+#
+#    01234567
+#   +--------+
+#  0| 1      |
+#  1| 111    |
+#  2| 111    |
+#  3| 1111   |
+#  4| 111 1  |
+#  5| 111  1 |
+#  6| 111   1|
+#  7| 111    |
+#   +--------+
+#
+# instead of that below.
+
+AT_SETUP([Broken Closure])
+
+AT_DATA([input.y],
+[[%%
+a: b
+b: c
+c: d
+d: e
+e: f
+f: g
+g: h
+h: 'h'
+]])
+
+AT_CHECK([bison --trace input.y 2>&1 |
+          sed -n '/^TC: Output BEGIN/,/^TC: Output END/p'],
+         [0],
+[[TC: Output BEGIN
+           @&t@
+   01234567
+  .--------.
+ 0| 1111111|
+ 1|  111111|
+ 2|   11111|
+ 3|    1111|
+ 4|     111|
+ 5|      11|
+ 6|       1|
+ 7|        |
+  `--------'
+TC: Output END
+]])
+
+AT_CLEANUP
@@ -26,6 +26,7 @@ AT_INIT
 AT_TESTED([bison])

 m4_include([output.at])
+m4_include([sets.at])
 m4_include([reduce.at])
 m4_include([calc.at])
 m4_include([torture.at])
@@ -114,3 +114,343 @@ AT_CHECK([input 900], 0, [], [ignore])
 AT_CHECK([input 10000], 1, [], [ignore])

 AT_CLEANUP
+
+
+## ----------------- ##
+## GNU AWK Grammar.  ##
+## ----------------- ##
+
+AT_SETUP([GNU AWK Grammar])
+
+# We have been careful to strip all the actions excepts the
+# mid-rule actions.  We rely on %expect to check that there are
+# indeed 65 SR conflicts.
+#
+# Bison was once wrong, due to an incorrect computation of nullable.
+# It reported 485 SR conflicts!
+
+AT_DATA([[input.y]],
+[[%expect 65
+
+%token FUNC_CALL NAME REGEXP
+%token ERROR
+%token YNUMBER YSTRING
+%token RELOP APPEND_OP
+%token ASSIGNOP MATCHOP NEWLINE CONCAT_OP
+%token LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE
+%token LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE
+%token LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION
+%token LEX_GETLINE LEX_NEXTFILE
+%token LEX_IN
+%token LEX_AND LEX_OR INCREMENT DECREMENT
+%token LEX_BUILTIN LEX_LENGTH
+
+/* Lowest to highest */
+%right ASSIGNOP
+%right '?' ':'
+%left LEX_OR
+%left LEX_AND
+%left LEX_GETLINE
+%nonassoc LEX_IN
+%left FUNC_CALL LEX_BUILTIN LEX_LENGTH
+%nonassoc ','
+%nonassoc MATCHOP
+%nonassoc RELOP '<' '>' '|' APPEND_OP TWOWAYIO
+%left CONCAT_OP
+%left YSTRING YNUMBER
+%left '+' '-'
+%left '*' '/' '%'
+%right '!' UNARY
+%right '^'
+%left INCREMENT DECREMENT
+%left '$'
+%left '(' ')'
+%%
+
+start
+	: opt_nls program opt_nls
+	;
+
+program
+	: rule
+	| program rule
+	| error
+	| program error
+	| /* empty */
+	;
+
+rule
+	: LEX_BEGIN {} action
+	| LEX_END {}   action
+	| LEX_BEGIN statement_term
+	| LEX_END statement_term
+	| pattern action
+	| action
+	| pattern statement_term
+	| function_prologue function_body
+	;
+
+func_name
+	: NAME
+	| FUNC_CALL
+	| lex_builtin
+	;
+
+lex_builtin
+	: LEX_BUILTIN
+	| LEX_LENGTH
+	;
+
+function_prologue
+	: LEX_FUNCTION {} func_name '(' opt_param_list r_paren opt_nls
+	;
+
+function_body
+	: l_brace statements r_brace opt_semi opt_nls
+	| l_brace r_brace opt_semi opt_nls
+	;
+
+
+pattern
+	: exp
+	| exp ',' exp
+	;
+
+regexp
+	/*
+	 * In this rule, want_regexp tells yylex that the next thing
+	 * is a regexp so it should read up to the closing slash.
+	 */
+	: '/' {} REGEXP '/'
+	;
+
+action
+	: l_brace statements r_brace opt_semi opt_nls
+	| l_brace r_brace opt_semi opt_nls
+	;
+
+statements
+	: statement
+	| statements statement
+	| error
+	| statements error
+	;
+
+statement_term
+	: nls
+	| semi opt_nls
+	;
+
+statement
+	: semi opt_nls
+	| l_brace r_brace
+	| l_brace statements r_brace
+	| if_statement
+	| LEX_WHILE '(' exp r_paren opt_nls statement
+	| LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls
+	| LEX_FOR '(' NAME LEX_IN NAME r_paren opt_nls statement
+	| LEX_FOR '(' opt_exp semi opt_nls exp semi opt_nls opt_exp r_paren opt_nls statement
+	| LEX_FOR '(' opt_exp semi opt_nls semi opt_nls opt_exp r_paren opt_nls statement
+	| LEX_BREAK statement_term
+	| LEX_CONTINUE statement_term
+	| print '(' expression_list r_paren output_redir statement_term
+	| print opt_rexpression_list output_redir statement_term
+	| LEX_NEXT statement_term
+	| LEX_NEXTFILE statement_term
+	| LEX_EXIT opt_exp statement_term
+	| LEX_RETURN {} opt_exp statement_term
+	| LEX_DELETE NAME '[' expression_list ']' statement_term
+	| LEX_DELETE NAME  statement_term
+	| exp statement_term
+	;
+
+print
+	: LEX_PRINT
+	| LEX_PRINTF
+	;
+
+if_statement
+	: LEX_IF '(' exp r_paren opt_nls statement
+	| LEX_IF '(' exp r_paren opt_nls statement
+	     LEX_ELSE opt_nls statement
+	;
+
+nls
+	: NEWLINE
+	| nls NEWLINE
+	;
+
+opt_nls
+	: /* empty */
+	| nls
+	;
+
+input_redir
+	: /* empty */
+	| '<' simp_exp
+	;
+
+output_redir
+	: /* empty */
+	| '>' exp
+	| APPEND_OP exp
+	| '|' exp
+	| TWOWAYIO exp
+	;
+
+opt_param_list
+	: /* empty */
+	| param_list
+	;
+
+param_list
+	: NAME
+	| param_list comma NAME
+	| error
+	| param_list error
+	| param_list comma error
+	;
+
+/* optional expression, as in for loop */
+opt_exp
+	: /* empty */
+	| exp
+	;
+
+opt_rexpression_list
+	: /* empty */
+	| rexpression_list
+	;
+
+rexpression_list
+	: rexp
+	| rexpression_list comma rexp
+	| error
+	| rexpression_list error
+	| rexpression_list error rexp
+	| rexpression_list comma error
+	;
+
+opt_expression_list
+	: /* empty */
+	| expression_list
+	;
+
+expression_list
+	: exp
+	| expression_list comma exp
+	| error
+	| expression_list error
+	| expression_list error exp
+	| expression_list comma error
+	;
+
+/* Expressions, not including the comma operator.  */
+exp	: variable ASSIGNOP {} exp
+	| '(' expression_list r_paren LEX_IN NAME
+	| exp '|' LEX_GETLINE opt_variable
+	| exp TWOWAYIO LEX_GETLINE opt_variable
+	| LEX_GETLINE opt_variable input_redir
+	| exp LEX_AND exp
+	| exp LEX_OR exp
+	| exp MATCHOP exp
+	| regexp
+	| '!' regexp %prec UNARY
+	| exp LEX_IN NAME
+	| exp RELOP exp
+	| exp '<' exp
+	| exp '>' exp
+	| exp '?' exp ':' exp
+	| simp_exp
+	| exp simp_exp %prec CONCAT_OP
+	;
+
+rexp
+	: variable ASSIGNOP {} rexp
+	| rexp LEX_AND rexp
+	| rexp LEX_OR rexp
+	| LEX_GETLINE opt_variable input_redir
+	| regexp
+	| '!' regexp %prec UNARY
+	| rexp MATCHOP rexp
+	| rexp LEX_IN NAME
+	| rexp RELOP rexp
+	| rexp '?' rexp ':' rexp
+	| simp_exp
+	| rexp simp_exp %prec CONCAT_OP
+	;
+
+simp_exp
+	: non_post_simp_exp
+	/* Binary operators in order of decreasing precedence.  */
+	| simp_exp '^' simp_exp
+	| simp_exp '*' simp_exp
+	| simp_exp '/' simp_exp
+	| simp_exp '%' simp_exp
+	| simp_exp '+' simp_exp
+	| simp_exp '-' simp_exp
+	| variable INCREMENT
+	| variable DECREMENT
+	;
+
+non_post_simp_exp
+	: '!' simp_exp %prec UNARY
+	| '(' exp r_paren
+	| LEX_BUILTIN
+	  '(' opt_expression_list r_paren
+	| LEX_LENGTH '(' opt_expression_list r_paren
+	| LEX_LENGTH
+	| FUNC_CALL '(' opt_expression_list r_paren
+	| variable
+	| INCREMENT variable
+	| DECREMENT variable
+	| YNUMBER
+	| YSTRING
+	| '-' simp_exp    %prec UNARY
+	| '+' simp_exp    %prec UNARY
+	;
+
+opt_variable
+	: /* empty */
+	| variable
+	;
+
+variable
+	: NAME
+	| NAME '[' expression_list ']'
+	| '$' non_post_simp_exp
+	;
+
+l_brace
+	: '{' opt_nls
+	;
+
+r_brace
+	: '}' opt_nls
+	;
+
+r_paren
+	: ')'
+	;
+
+opt_semi
+	: /* empty */
+	| semi
+	;
+
+semi
+	: ';'
+	;
+
+comma	: ',' opt_nls
+	;
+
+%%
+]])
+
+# Pass plenty of options, to exercise plenty of code, even if we
+# don't actually check the output.  But SEGV is watching us, and
+# so might do dmalloc.
+AT_CHECK([[bison --verbose --defines input.y]])
+
+AT_CLEANUP