* tests/atlocal.in (CPPFLAGS): We have config.h.

* tests/testsuite.at (AT_DATA_GRAMMAR_PROLOGUE, AT_DATA_GRAMMAR): New. * tests/actions.at, tests/calc.at, tests/conflicts.at, * tests/cxx-type.at, tests/glr-regr1.at, tests/headers.at, * tests/regression.at, tests/torture.at: Use them for all the grammars that are to be compiled. * tests/cxx-type.at (_AT_TEST_GLR_CALC): Rename as... * tests/cxx-type.at (_AT_TEST_GLR_CXXTYPES): this. * doc/bison.texinfo (GLR Parsers): Document `inline'.
2026-07-23 12:10:32 +00:00 · 2002-11-14 09:58:01 +00:00
parent 18b519c0d6
commit 9501dc6e69
14 changed files with 155 additions and 85 deletions
@@ -1,3 +1,16 @@
+2002-11-14  Akim Demaille  <[email protected]>
+
+	* tests/atlocal.in (CPPFLAGS): We have config.h.
+	* tests/testsuite.at (AT_DATA_GRAMMAR_PROLOGUE, AT_DATA_GRAMMAR):
+	New.
+	* tests/actions.at, tests/calc.at, tests/conflicts.at,
+	* tests/cxx-type.at, tests/glr-regr1.at, tests/headers.at,
+	* tests/regression.at, tests/torture.at: Use them for all the
+	grammars that are to be compiled.
+	* tests/cxx-type.at (_AT_TEST_GLR_CALC): Rename as...
+	* tests/cxx-type.at (_AT_TEST_GLR_CXXTYPES): this.
+	* doc/bison.texinfo (GLR Parsers): Document `inline'.
+
 2002-11-14  Akim Demaille  <[email protected]>

 	* doc/bison.texinfo: Various formatting changes (alignments in
@@ -2,6 +2,10 @@ Bison News
 ----------
 Changes in version 1.75c:

+* GLR and inline
+  Users of Bison have to decide how they handle the portability of the
+  C keyword `inline'.
+
 Changes in version 1.75b, 2002-11-13:

 * %destructor
@@ -412,42 +412,41 @@ more information on this.
@cindex generalized @acronym{LR} (@acronym{GLR}) parsing
@cindex ambiguous grammars
@cindex non-deterministic parsing
-Parsers for @acronym{LALR}(1) grammars are @dfn{deterministic},
-meaning roughly that
-the next grammar rule to apply at any point in the input is uniquely
-determined by the preceding input and a fixed, finite portion (called
-a @dfn{look-ahead}) of the remaining input.
-A context-free grammar can be @dfn{ambiguous}, meaning that
-there are multiple ways to apply the grammar rules to get the some inputs.
-Even unambiguous grammars can be @dfn{non-deterministic}, meaning that no
-fixed look-ahead always suffices to determine the next grammar rule to apply.
-With the proper declarations, Bison is also able to parse these more general
-context-free grammars, using a technique known as @acronym{GLR} parsing (for
-Generalized @acronym{LR}).  Bison's @acronym{GLR} parsers are able to
-handle any context-free
-grammar for which the number of possible parses of any given string
-is finite.
+
+Parsers for @acronym{LALR}(1) grammars are @dfn{deterministic}, meaning
+roughly that the next grammar rule to apply at any point in the input is
+uniquely determined by the preceding input and a fixed, finite portion
+(called a @dfn{look-ahead}) of the remaining input.  A context-free
+grammar can be @dfn{ambiguous}, meaning that there are multiple ways to
+apply the grammar rules to get the some inputs.  Even unambiguous
+grammars can be @dfn{non-deterministic}, meaning that no fixed
+look-ahead always suffices to determine the next grammar rule to apply.
+With the proper declarations, Bison is also able to parse these more
+general context-free grammars, using a technique known as @acronym{GLR}
+parsing (for Generalized @acronym{LR}).  Bison's @acronym{GLR} parsers
+are able to handle any context-free grammar for which the number of
+possible parses of any given string is finite.

@cindex symbols (abstract)
@cindex token
@cindex syntactic grouping
@cindex grouping, syntactic
-In the formal grammatical rules for a language, each kind of syntactic unit
-or grouping is named by a @dfn{symbol}.  Those which are built by grouping
-smaller constructs according to grammatical rules are called
+In the formal grammatical rules for a language, each kind of syntactic
+unit or grouping is named by a @dfn{symbol}.  Those which are built by
+grouping smaller constructs according to grammatical rules are called
@dfn{nonterminal symbols}; those which can't be subdivided are called
@dfn{terminal symbols} or @dfn{token types}.  We call a piece of input
 corresponding to a single terminal symbol a @dfn{token}, and a piece
 corresponding to a single nonterminal symbol a @dfn{grouping}.

 We can use the C language as an example of what symbols, terminal and
-nonterminal, mean.  The tokens of C are identifiers, constants (numeric and
-string), and the various keywords, arithmetic operators and punctuation
-marks.  So the terminal symbols of a grammar for C include `identifier',
-`number', `string', plus one symbol for each keyword, operator or
-punctuation mark: `if', `return', `const', `static', `int', `char',
-`plus-sign', `open-brace', `close-brace', `comma' and many more.  (These
-tokens can be subdivided into characters, but that is a matter of
+nonterminal, mean.  The tokens of C are identifiers, constants (numeric
+and string), and the various keywords, arithmetic operators and
+punctuation marks.  So the terminal symbols of a grammar for C include
+`identifier', `number', `string', plus one symbol for each keyword,
+operator or punctuation mark: `if', `return', `const', `static', `int',
+`char', `plus-sign', `open-brace', `close-brace', `comma' and many more.
+(These tokens can be subdivided into characters, but that is a matter of
 lexicography, not grammar.)

 Here is a simple C function subdivided into tokens:
@@ -642,28 +641,28 @@ from the values of the two subexpressions.
@cindex conflicts
@cindex shift/reduce conflicts

-In some grammars, there will be cases where Bison's standard @acronym{LALR}(1)
-parsing algorithm cannot decide whether to apply a certain grammar rule
-at a given point.  That is, it may not be able to decide (on the basis
-of the input read so far) which of two possible reductions (applications
-of a grammar rule) applies, or whether to apply a reduction or read more
-of the input and apply a reduction later in the input.  These are known
-respectively as @dfn{reduce/reduce} conflicts (@pxref{Reduce/Reduce}),
-and @dfn{shift/reduce} conflicts (@pxref{Shift/Reduce}).
+In some grammars, there will be cases where Bison's standard
+@acronym{LALR}(1) parsing algorithm cannot decide whether to apply a
+certain grammar rule at a given point.  That is, it may not be able to
+decide (on the basis of the input read so far) which of two possible
+reductions (applications of a grammar rule) applies, or whether to apply
+a reduction or read more of the input and apply a reduction later in the
+input.  These are known respectively as @dfn{reduce/reduce} conflicts
+(@pxref{Reduce/Reduce}), and @dfn{shift/reduce} conflicts
+(@pxref{Shift/Reduce}).

-To use a grammar that is not easily modified to be @acronym{LALR}(1), a more
-general parsing algorithm is sometimes necessary.  If you include
+To use a grammar that is not easily modified to be @acronym{LALR}(1), a
+more general parsing algorithm is sometimes necessary.  If you include
@code{%glr-parser} among the Bison declarations in your file
-(@pxref{Grammar Outline}), the result will be a Generalized
-@acronym{LR} (@acronym{GLR})
-parser.  These parsers handle Bison grammars that contain no unresolved
-conflicts (i.e., after applying precedence declarations) identically to
-@acronym{LALR}(1) parsers.  However, when faced with unresolved
-shift/reduce and reduce/reduce conflicts, @acronym{GLR} parsers use
-the simple expedient of doing
-both, effectively cloning the parser to follow both possibilities.  Each
-of the resulting parsers can again split, so that at any given time,
-there can be any number of possible parses being explored.  The parsers
+(@pxref{Grammar Outline}), the result will be a Generalized @acronym{LR}
+(@acronym{GLR}) parser.  These parsers handle Bison grammars that
+contain no unresolved conflicts (i.e., after applying precedence
+declarations) identically to @acronym{LALR}(1) parsers.  However, when
+faced with unresolved shift/reduce and reduce/reduce conflicts,
+@acronym{GLR} parsers use the simple expedient of doing both,
+effectively cloning the parser to follow both possibilities.  Each of
+the resulting parsers can again split, so that at any given time, there
+can be any number of possible parses being explored.  The parsers
 proceed in lockstep; that is, all of them consume (shift) a given input
 symbol before any of them proceed to the next.  Each of the cloned
 parsers eventually meets one of two possible fates: either it runs into
@@ -810,6 +809,32 @@ as both an @code{expr} and a @code{decl}, and print
 "x" y z + T <init-declare> x T <cast> y z + = <OR>
@end example

+@sp 1
+
+@cindex @code{incline}
+@cindex @acronym{GLR} parsers and @code{inline}
+Note that the @acronym{GLR} parsers require an ISO C89 compiler.  In
+addition, they use the @code{inline} keyword, which is not C89, but a
+common extension.  It is up to the user of these parsers to handle
+portability issues.  For instance, if using Autoconf and the Autoconf
+macro @code{AC_C_INLINE}, a mere
+
+@example
+%@{
+#include <config.h>
+%@}
+@end example
+
+@noindent
+will suffice.  Otherwise, we suggest
+
+@example
+%@{
+#if ! defined __GNUC__ && ! defined inline
+# define inline
+#endif
+%@}
+@end example

@node Locations Overview
@section Locations
@@ -29,7 +29,7 @@ AT_SETUP([Mid-rule actions])
 # instead of being attached to the empty rule dedicated to this
 # action.

-AT_DATA([[input.y]],
+AT_DATA_GRAMMAR([[input.y]],
 [[%{
 # include <stdio.h>
 # include <stdlib.h>
@@ -88,7 +88,7 @@ AT_CLEANUP

 AT_SETUP([Exotic Dollars])

-AT_DATA([[input.y]],
+AT_DATA_GRAMMAR([[input.y]],
 [[%{
 # include <stdio.h>
 # include <stdlib.h>
@@ -167,7 +167,7 @@ AT_SETUP([Printers and Destructors: $4])

 # Make sure complex $n work.

-AT_DATA([[input.y]],
+AT_DATA_GRAMMAR([[input.y]],
 [[$4
 %{
 #include <stdio.h>
@@ -9,7 +9,7 @@ CC='@CC@'
 CFLAGS='@O0CFLAGS@ @WARNING_CFLAGS@ @WERROR_CFLAGS@'

 # We need `config.h'.
-CPPFLAGS="-I$abs_top_builddir @CPPFLAGS@"
+CPPFLAGS="-DHAVE_CONFIG_H=1 -I$abs_top_builddir @CPPFLAGS@"

 # Is the compiler GCC?
 GCC='@GCC@'
@@ -35,15 +35,10 @@
 m4_define([_AT_DATA_CALC_Y],
 [m4_if([$1$2$3], $[1]$[2]$[3], [],
       [m4_fatal([$0: Invalid arguments: $@])])dnl
-AT_DATA([calc.y],
+AT_DATA_GRAMMAR([calc.y],
 [[/* Infix notation calculator--calc */
 ]$4[
 %{
-#include <config.h>
-/* We don't need perfect functions for these tests. */
-#undef malloc
-#undef memcmp
-#undef realloc
 #include <stdio.h>

 #if STDC_HEADERS
@@ -50,14 +50,9 @@ AT_CLEANUP

 AT_SETUP([%nonassoc and eof])

-AT_DATA([input.y],
+AT_DATA_GRAMMAR([input.y],
 [[
 %{
-#include <config.h>
-/* We don't need perfect functions for these tests. */
-#undef malloc
-#undef memcmp
-#undef realloc
 #include <stdio.h>

 #if STDC_HEADERS
@@ -18,13 +18,13 @@

 AT_BANNER([[C++ Type Syntax (GLR).]])

-# _AT_TEST_GLR_CALC(DECL, RESOLVE1, RESOLVE2)
-# -------------------------------------------
+# _AT_TEST_GLR_CXXTYPES(DECL, RESOLVE1, RESOLVE2)
+# -----------------------------------------------
 # Store into types.y the calc program, with DECL inserted as a declaration,
 # and with RESOLVE1 and RESOLVE2 as annotations on the conflicted rule for
 # stmt.  Then compile the result.
-m4_define([_AT_TEST_GLR_CALC],
-[AT_DATA([types.y],
+m4_define([_AT_TEST_GLR_CXXTYPES],
+[AT_DATA_GRAMMAR([types.y],
 [[/* Simplified C++ Type and Expression Grammar.  */

 $1
@@ -253,57 +253,62 @@ m4_define([_AT_VERBOSE_GLR_STDERR],
 ## ---------------------------------------------------- ##

 AT_SETUP([GLR: Resolve ambiguity, impure, no locations])
-_AT_TEST_GLR_CALC([],[%dprec 1],[%dprec 2])
+_AT_TEST_GLR_CXXTYPES([],
+                      [%dprec 1], [%dprec 2])
 AT_PARSER_CHECK([[./types test-input | sed 's/  *$//']], 0,
                _AT_RESOLVED_GLR_OUTPUT, _AT_GLR_STDERR)
 AT_CLEANUP

 AT_SETUP([GLR: Resolve ambiguity, impure, locations])
-_AT_TEST_GLR_CALC([%locations],[%dprec 1],[%dprec 2])
+_AT_TEST_GLR_CXXTYPES([%locations],[%dprec 1],[%dprec 2])
 AT_PARSER_CHECK([[./types test-input | sed 's/  *$//']], 0,
                _AT_RESOLVED_GLR_OUTPUT, _AT_GLR_STDERR)
 AT_CLEANUP

 AT_SETUP([GLR: Resolve ambiguity, pure, no locations])
-_AT_TEST_GLR_CALC([%pure-parser],[%dprec 1],[%dprec 2])
+_AT_TEST_GLR_CXXTYPES([%pure-parser],
+                      [%dprec 1], [%dprec 2])
 AT_PARSER_CHECK([[./types test-input | sed 's/  *$//']], 0,
                _AT_RESOLVED_GLR_OUTPUT, _AT_GLR_STDERR)
 AT_CLEANUP

 AT_SETUP([GLR: Resolve ambiguity, pure, locations])
-_AT_TEST_GLR_CALC([%pure-parser
-%locations],[%dprec 1],[%dprec 2])
+_AT_TEST_GLR_CXXTYPES([%pure-parser %locations],
+                      [%dprec 1], [%dprec 2])
 AT_PARSER_CHECK([[./types test-input | sed 's/  *$//']], 0,
                _AT_RESOLVED_GLR_OUTPUT, _AT_GLR_STDERR)
 AT_CLEANUP

 AT_SETUP([GLR: Merge conflicting parses, impure, no locations])
-_AT_TEST_GLR_CALC([],[%merge <stmtMerge>],[%merge <stmtMerge>])
+_AT_TEST_GLR_CXXTYPES([],
+                      [%merge <stmtMerge>], [%merge <stmtMerge>])
 AT_PARSER_CHECK([[./types test-input | sed 's/  *$//']], 0,
                _AT_AMBIG_GLR_OUTPUT, _AT_GLR_STDERR)
 AT_CLEANUP

 AT_SETUP([GLR: Merge conflicting parses, impure, locations])
-_AT_TEST_GLR_CALC([%locations],[%merge <stmtMerge>],[%merge <stmtMerge>])
+_AT_TEST_GLR_CXXTYPES([%locations],
+                      [%merge <stmtMerge>], [%merge <stmtMerge>])
 AT_PARSER_CHECK([[./types test-input | sed 's/  *$//']], 0,
                _AT_AMBIG_GLR_OUTPUT, _AT_GLR_STDERR)
 AT_CLEANUP

 AT_SETUP([GLR: Merge conflicting parses, pure, no locations])
-_AT_TEST_GLR_CALC([%pure-parser],[%merge <stmtMerge>],[%merge <stmtMerge>])
+_AT_TEST_GLR_CXXTYPES([%pure-parser],
+                      [%merge <stmtMerge>], [%merge <stmtMerge>])
 AT_PARSER_CHECK([[./types test-input | sed 's/  *$//']], 0,
                _AT_AMBIG_GLR_OUTPUT, _AT_GLR_STDERR)
 AT_CLEANUP
 AT_SETUP([GLR: Merge conflicting parses, pure, locations])
-_AT_TEST_GLR_CALC([%pure-parser
-%locations],[%merge <stmtMerge>],[%merge <stmtMerge>])
+_AT_TEST_GLR_CXXTYPES([%pure-parser %locations],
+                      [%merge <stmtMerge>],[%merge <stmtMerge>])
 AT_PARSER_CHECK([[./types test-input | sed 's/  *$//']], 0,
                _AT_AMBIG_GLR_OUTPUT, _AT_GLR_STDERR)
 AT_CLEANUP

 AT_SETUP([GLR: Verbose messages, resolve ambiguity, impure, no locations])
-_AT_TEST_GLR_CALC([%error-verbose],
-[%merge <stmtMerge>],[%merge <stmtMerge>])
+_AT_TEST_GLR_CXXTYPES([%error-verbose],
+                      [%merge <stmtMerge>], [%merge <stmtMerge>])
 AT_PARSER_CHECK([[./types test-input | sed 's/  *$//']], 0,
 	        _AT_AMBIG_GLR_OUTPUT, _AT_VERBOSE_GLR_STDERR)
 AT_CLEANUP
@@ -20,7 +20,7 @@ AT_BANNER([[GLR Regression Test #1.]])

 AT_SETUP([Badly Collapsed GLR States])

-AT_DATA([glr-regr1.y],
+AT_DATA_GRAMMAR([glr-regr1.y],
 [[/* Regression Test: Improper state compression */
 /* Reported by Scott McPeak */

@@ -55,7 +55,7 @@ m4_define([AT_TEST_CPP_GUARD_H],
 dirname=`AS_DIRNAME([$1])`
 AS_MKDIR_P([$dirname])

-AT_DATA([$1.y],
+AT_DATA_GRAMMAR([$1.y],
 [%%
 dummy:;
 ])
@@ -80,7 +80,7 @@ AT_TEST_CPP_GUARD_H([9foo])

 AT_SETUP([export YYLTYPE])

-AT_DATA([input.y],
+AT_DATA_GRAMMAR([input.y],
 [%locations

 %name-prefix="my_"
@@ -92,7 +92,7 @@ AT_CLEANUP

 AT_SETUP([Torturing the Scanner])

-AT_DATA([input.y],
+AT_DATA_GRAMMAR([input.y],
 [[%{
 /* This is seen in GCC: a %{ and %} in middle of a comment. */
 const char *foo = "So %{ and %} can be here too.";
@@ -188,6 +188,8 @@ yyerror (const char *msg)
 }
 ]])

+# Pacify Emacs'font-lock-mode: "
+
 AT_DATA([main.c],
 [[typedef int value_t;
 #include "input.h"
@@ -29,7 +29,7 @@ AT_SETUP([Early token definitions])
 # Found in GCJ: they expect the tokens to be defined before the user
 # prologue, so that they can use the token definitions in it.

-AT_DATA([input.y],
+AT_DATA_GRAMMAR([input.y],
 [[%{
 void yyerror (const char *s);
 int yylex (void);
@@ -313,7 +313,7 @@ AT_CLEANUP
 AT_SETUP([Token definitions])

 # Bison managed, when fed with `%token 'f' "f"' to #define 'f'!
-AT_DATA([input.y],
+AT_DATA_GRAMMAR([input.y],
 [%{
 void yyerror (const char *s);
 int yylex (void);
@@ -340,7 +340,7 @@ AT_CLEANUP

 AT_SETUP([Characters Escapes])

-AT_DATA([input.y],
+AT_DATA_GRAMMAR([input.y],
 [%{
 void yyerror (const char *s);
 int yylex (void);
@@ -352,6 +352,7 @@ exp:
 | '"'  "'"
 ;
 ]])
+# Pacify font-lock-mode: "

 AT_CHECK([bison -o input.c input.y])
 AT_COMPILE([input.o], [-c input.c])
@@ -22,6 +22,34 @@
 m4_version_prereq([2.52g])


+# AT_DATA_GRAMMAR_PROLOGUE
+# ------------------------
+# The prologue that should be included in any grammar which parser is
+# meant to be compiled.
+m4_define([AT_DATA_GRAMMAR_PROLOGUE],
+[[%{
+#if HAVE_CONFIG_H
+# include <config.h>
+  /* We don't need perfect functions for these tests. */
+# undef malloc
+# undef memcmp
+# undef realloc
+#endif
+%}]
+])
+
+
+# AT_DATA_GRAMMAR(NAME, CONTENT)
+# ------------------------------
+# Generate the file NAME, which CONTENT is preceded by
+# AT_DATA_GRAMMAR_PROLOGUE.
+m4_define([AT_DATA_GRAMMAR],
+[AT_DATA([$1],
+[AT_DATA_GRAMMAR_PROLOGUE
+$2])
+])
+
+
 # AT_COMPILE(OUTPUT, [SOURCES = OUTPUT.c])
 # ----------------------------------------
 #
@@ -77,7 +105,7 @@ m4_include([existing.at])
 # Some old bugs.
 m4_include([regression.at])

-# GLR tests: 
+# GLR tests:
 # C++ types, simplified
 m4_include([cxx-type.at])
 # Regression test for state-folding optimization
@@ -48,6 +48,7 @@ use strict;
 my $max = $ARGV[0] || 10;

 print <<EOF;
+]AT_DATA_GRAMMAR_PROLOGUE[
 %{
 #include <stdio.h>
 #include <stdlib.h>
@@ -163,6 +164,7 @@ use strict;
 my $max = $ARGV[0] || 10;

 print <<EOF;
+]AT_DATA_GRAMMAR_PROLOGUE[
 %{
 #include <stdio.h>
 #include <stdlib.h>