mirror of
https://git.savannah.gnu.org/git/bison.git
synced 2026-03-19 09:13:04 +00:00
Merge remote-tracking branch 'origin/maint'
* origin/maint: yacc.c: always initialize yylloc scanner: issue a single error for groups of invalid characters tests: formatting changes doc: one of the fixes for an ambiguous grammar was ambiguous too doc: fix the dangling else with precedence directives doc: prefer "token" to TOKEN doc: formatting changes scanner: use explicit "ignore" statements Conflicts: src/scan-gram.l
This commit is contained in:
11
NEWS
11
NEWS
@@ -296,6 +296,17 @@ GNU Bison NEWS
|
|||||||
|
|
||||||
Two nodes were added to the documentation: Xml and Graphviz.
|
Two nodes were added to the documentation: Xml and Graphviz.
|
||||||
|
|
||||||
|
* Noteworthy changes in release ?.? (????-??-??) [?]
|
||||||
|
|
||||||
|
** Bug fixes
|
||||||
|
|
||||||
|
Warnings about uninitialized yylloc in yyparse have been fixed.
|
||||||
|
|
||||||
|
** Documentation
|
||||||
|
|
||||||
|
The sections about shift/reduce and reduce/reduce conflicts resolution
|
||||||
|
have been fixed and extended.
|
||||||
|
|
||||||
* Noteworthy changes in release 2.6.5 (2012-11-07) [stable]
|
* Noteworthy changes in release 2.6.5 (2012-11-07) [stable]
|
||||||
|
|
||||||
We consider compiler warnings about Bison generated parsers to be bugs.
|
We consider compiler warnings about Bison generated parsers to be bugs.
|
||||||
|
|||||||
4
THANKS
4
THANKS
@@ -1,8 +1,9 @@
|
|||||||
Bison was originally written by Robert Corbett. It would not be what
|
Bison was originally written by Robert Corbett. It would not be what
|
||||||
it is today without the invaluable help of these people:
|
it is today without the invaluable help of these people:
|
||||||
|
|
||||||
|
Аскар Сафин safinaskar@mail.ru
|
||||||
Airy Andre Airy.Andre@edf.fr
|
Airy Andre Airy.Andre@edf.fr
|
||||||
Akim Demaille akim@freefriends.org
|
Akim Demaille akim@lrde.epita.fr
|
||||||
Albert Chin-A-Young china@thewrittenword.com
|
Albert Chin-A-Young china@thewrittenword.com
|
||||||
Alexander Belopolsky alexb@rentec.com
|
Alexander Belopolsky alexb@rentec.com
|
||||||
Alexandre Duret-Lutz adl@lrde.epita.fr
|
Alexandre Duret-Lutz adl@lrde.epita.fr
|
||||||
@@ -89,6 +90,7 @@ Pascal Bart pascal.bart@epita.fr
|
|||||||
Paul Eggert eggert@cs.ucla.edu
|
Paul Eggert eggert@cs.ucla.edu
|
||||||
Paul Hilfinger Hilfinger@CS.Berkeley.EDU
|
Paul Hilfinger Hilfinger@CS.Berkeley.EDU
|
||||||
Per Allansson per@appgate.com
|
Per Allansson per@appgate.com
|
||||||
|
Peter Eisentraut peter_e@gmx.net
|
||||||
Peter Fales psfales@lucent.com
|
Peter Fales psfales@lucent.com
|
||||||
Peter Hamorsky hamo@upjs.sk
|
Peter Hamorsky hamo@upjs.sk
|
||||||
Peter Simons simons@cryp.to
|
Peter Simons simons@cryp.to
|
||||||
|
|||||||
11
data/yacc.c
11
data/yacc.c
@@ -186,7 +186,8 @@ int yychar;
|
|||||||
or non-GCC compilers. */
|
or non-GCC compilers. */
|
||||||
static YYSTYPE yyval_default;
|
static YYSTYPE yyval_default;
|
||||||
# define YY_INITIAL_VALUE(Value) = Value
|
# define YY_INITIAL_VALUE(Value) = Value
|
||||||
#endif]])[
|
#endif]b4_locations_if([[
|
||||||
|
static YYLTYPE yyloc_default][]b4_yyloc_default[;]])])[
|
||||||
#ifndef YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
|
#ifndef YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
|
||||||
# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
|
# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
|
||||||
# define YY_IGNORE_MAYBE_UNINITIALIZED_END
|
# define YY_IGNORE_MAYBE_UNINITIALIZED_END
|
||||||
@@ -199,7 +200,7 @@ static YYSTYPE yyval_default;
|
|||||||
YYSTYPE yylval YY_INITIAL_VALUE(yyval_default);]b4_locations_if([[
|
YYSTYPE yylval YY_INITIAL_VALUE(yyval_default);]b4_locations_if([[
|
||||||
|
|
||||||
/* Location data for the lookahead symbol. */
|
/* Location data for the lookahead symbol. */
|
||||||
YYLTYPE yylloc][]b4_yyloc_default[;
|
YYLTYPE yylloc]b4_pure_if([ = yyloc_default], [b4_yyloc_default])[;
|
||||||
]])b4_pure_if([], [[
|
]])b4_pure_if([], [[
|
||||||
|
|
||||||
/* Number of syntax errors so far. */
|
/* Number of syntax errors so far. */
|
||||||
@@ -1307,7 +1308,8 @@ b4_function_define([[yyparse]], [[int]], b4_parse_param)[
|
|||||||
yypstate *yyps_local;]b4_pure_if([[
|
yypstate *yyps_local;]b4_pure_if([[
|
||||||
int yychar;
|
int yychar;
|
||||||
YYSTYPE yylval;]b4_locations_if([[
|
YYSTYPE yylval;]b4_locations_if([[
|
||||||
YYLTYPE yylloc][]b4_yyloc_default[;]])])[
|
static YYLTYPE yyloc_default][]b4_yyloc_default[;
|
||||||
|
YYLTYPE yylloc = yyloc_default;]])])[
|
||||||
if (yyps)
|
if (yyps)
|
||||||
yyps_local = yyps;
|
yyps_local = yyps;
|
||||||
else
|
else
|
||||||
@@ -1451,8 +1453,7 @@ b4_function_define([[yyparse]], [[int]], b4_parse_param)[
|
|||||||
yychar = YYEMPTY; /* Cause a token to be read. */
|
yychar = YYEMPTY; /* Cause a token to be read. */
|
||||||
]m4_ifdef([b4_initial_action], [
|
]m4_ifdef([b4_initial_action], [
|
||||||
b4_dollar_pushdef([m4_define([b4_dollar_dollar_used])yylval], [],
|
b4_dollar_pushdef([m4_define([b4_dollar_dollar_used])yylval], [],
|
||||||
[m4_define([b4_at_dollar_used])dnl
|
[b4_push_if([b4_pure_if([*])yypushed_loc], [yylloc])])dnl
|
||||||
b4_push_if([b4_pure_if([*])yypushed_loc], [yylloc])])dnl
|
|
||||||
/* User initialization code. */
|
/* User initialization code. */
|
||||||
b4_user_initial_action
|
b4_user_initial_action
|
||||||
b4_dollar_popdef[]dnl
|
b4_dollar_popdef[]dnl
|
||||||
|
|||||||
181
doc/bison.texi
181
doc/bison.texi
@@ -280,6 +280,7 @@ Operator Precedence
|
|||||||
* Precedence Only:: How to specify precedence only.
|
* Precedence Only:: How to specify precedence only.
|
||||||
* Precedence Examples:: How these features are used in the previous example.
|
* Precedence Examples:: How these features are used in the previous example.
|
||||||
* How Precedence:: How they work.
|
* How Precedence:: How they work.
|
||||||
|
* Non Operators:: Using precedence for general conflicts.
|
||||||
|
|
||||||
Tuning LR
|
Tuning LR
|
||||||
|
|
||||||
@@ -6875,7 +6876,7 @@ expr:
|
|||||||
term:
|
term:
|
||||||
'(' expr ')'
|
'(' expr ')'
|
||||||
| term '!'
|
| term '!'
|
||||||
| NUMBER
|
| "number"
|
||||||
;
|
;
|
||||||
@end group
|
@end group
|
||||||
@end example
|
@end example
|
||||||
@@ -6914,20 +6915,20 @@ statements, with a pair of rules like this:
|
|||||||
@example
|
@example
|
||||||
@group
|
@group
|
||||||
if_stmt:
|
if_stmt:
|
||||||
IF expr THEN stmt
|
"if" expr "then" stmt
|
||||||
| IF expr THEN stmt ELSE stmt
|
| "if" expr "then" stmt "else" stmt
|
||||||
;
|
;
|
||||||
@end group
|
@end group
|
||||||
@end example
|
@end example
|
||||||
|
|
||||||
@noindent
|
@noindent
|
||||||
Here we assume that @code{IF}, @code{THEN} and @code{ELSE} are
|
Here @code{"if"}, @code{"then"} and @code{"else"} are terminal symbols for
|
||||||
terminal symbols for specific keyword tokens.
|
specific keyword tokens.
|
||||||
|
|
||||||
When the @code{ELSE} token is read and becomes the lookahead token, the
|
When the @code{"else"} token is read and becomes the lookahead token, the
|
||||||
contents of the stack (assuming the input is valid) are just right for
|
contents of the stack (assuming the input is valid) are just right for
|
||||||
reduction by the first rule. But it is also legitimate to shift the
|
reduction by the first rule. But it is also legitimate to shift the
|
||||||
@code{ELSE}, because that would lead to eventual reduction by the second
|
@code{"else"}, because that would lead to eventual reduction by the second
|
||||||
rule.
|
rule.
|
||||||
|
|
||||||
This situation, where either a shift or a reduction would be valid, is
|
This situation, where either a shift or a reduction would be valid, is
|
||||||
@@ -6936,14 +6937,14 @@ these conflicts by choosing to shift, unless otherwise directed by
|
|||||||
operator precedence declarations. To see the reason for this, let's
|
operator precedence declarations. To see the reason for this, let's
|
||||||
contrast it with the other alternative.
|
contrast it with the other alternative.
|
||||||
|
|
||||||
Since the parser prefers to shift the @code{ELSE}, the result is to attach
|
Since the parser prefers to shift the @code{"else"}, the result is to attach
|
||||||
the else-clause to the innermost if-statement, making these two inputs
|
the else-clause to the innermost if-statement, making these two inputs
|
||||||
equivalent:
|
equivalent:
|
||||||
|
|
||||||
@example
|
@example
|
||||||
if x then if y then win (); else lose;
|
if x then if y then win; else lose;
|
||||||
|
|
||||||
if x then do; if y then win (); else lose; end;
|
if x then do; if y then win; else lose; end;
|
||||||
@end example
|
@end example
|
||||||
|
|
||||||
But if the parser chose to reduce when possible rather than shift, the
|
But if the parser chose to reduce when possible rather than shift, the
|
||||||
@@ -6951,9 +6952,9 @@ result would be to attach the else-clause to the outermost if-statement,
|
|||||||
making these two inputs equivalent:
|
making these two inputs equivalent:
|
||||||
|
|
||||||
@example
|
@example
|
||||||
if x then if y then win (); else lose;
|
if x then if y then win; else lose;
|
||||||
|
|
||||||
if x then do; if y then win (); end; else lose;
|
if x then do; if y then win; end; else lose;
|
||||||
@end example
|
@end example
|
||||||
|
|
||||||
The conflict exists because the grammar as written is ambiguous: either
|
The conflict exists because the grammar as written is ambiguous: either
|
||||||
@@ -6966,11 +6967,16 @@ This particular ambiguity was first encountered in the specifications of
|
|||||||
Algol 60 and is called the ``dangling @code{else}'' ambiguity.
|
Algol 60 and is called the ``dangling @code{else}'' ambiguity.
|
||||||
|
|
||||||
To avoid warnings from Bison about predictable, legitimate shift/reduce
|
To avoid warnings from Bison about predictable, legitimate shift/reduce
|
||||||
conflicts, use the @code{%expect @var{n}} declaration.
|
conflicts, you can use the @code{%expect @var{n}} declaration.
|
||||||
There will be no warning as long as the number of shift/reduce conflicts
|
There will be no warning as long as the number of shift/reduce conflicts
|
||||||
is exactly @var{n}, and Bison will report an error if there is a
|
is exactly @var{n}, and Bison will report an error if there is a
|
||||||
different number.
|
different number.
|
||||||
@xref{Expect Decl, ,Suppressing Conflict Warnings}.
|
@xref{Expect Decl, ,Suppressing Conflict Warnings}. However, we don't
|
||||||
|
recommend the use of @code{%expect} (except @samp{%expect 0}!), as an equal
|
||||||
|
number of conflicts does not mean that they are the @emph{same}. When
|
||||||
|
possible, you should rather use precedence directives to @emph{fix} the
|
||||||
|
conflicts explicitly (@pxref{Non Operators,, Using Precedence For Non
|
||||||
|
Operators}).
|
||||||
|
|
||||||
The definition of @code{if_stmt} above is solely to blame for the
|
The definition of @code{if_stmt} above is solely to blame for the
|
||||||
conflict, but the conflict does not actually appear without additional
|
conflict, but the conflict does not actually appear without additional
|
||||||
@@ -6979,7 +6985,6 @@ the conflict:
|
|||||||
|
|
||||||
@example
|
@example
|
||||||
@group
|
@group
|
||||||
%token IF THEN ELSE variable
|
|
||||||
%%
|
%%
|
||||||
@end group
|
@end group
|
||||||
@group
|
@group
|
||||||
@@ -6991,13 +6996,13 @@ stmt:
|
|||||||
|
|
||||||
@group
|
@group
|
||||||
if_stmt:
|
if_stmt:
|
||||||
IF expr THEN stmt
|
"if" expr "then" stmt
|
||||||
| IF expr THEN stmt ELSE stmt
|
| "if" expr "then" stmt "else" stmt
|
||||||
;
|
;
|
||||||
@end group
|
@end group
|
||||||
|
|
||||||
expr:
|
expr:
|
||||||
variable
|
"identifier"
|
||||||
;
|
;
|
||||||
@end example
|
@end example
|
||||||
|
|
||||||
@@ -7017,6 +7022,7 @@ shift and when to reduce.
|
|||||||
* Precedence Only:: How to specify precedence only.
|
* Precedence Only:: How to specify precedence only.
|
||||||
* Precedence Examples:: How these features are used in the previous example.
|
* Precedence Examples:: How these features are used in the previous example.
|
||||||
* How Precedence:: How they work.
|
* How Precedence:: How they work.
|
||||||
|
* Non Operators:: Using precedence for general conflicts.
|
||||||
@end menu
|
@end menu
|
||||||
|
|
||||||
@node Why Precedence
|
@node Why Precedence
|
||||||
@@ -7155,16 +7161,11 @@ would declare them in groups of equal precedence. For example, @code{'+'} is
|
|||||||
declared with @code{'-'}:
|
declared with @code{'-'}:
|
||||||
|
|
||||||
@example
|
@example
|
||||||
%left '<' '>' '=' NE LE GE
|
%left '<' '>' '=' "!=" "<=" ">="
|
||||||
%left '+' '-'
|
%left '+' '-'
|
||||||
%left '*' '/'
|
%left '*' '/'
|
||||||
@end example
|
@end example
|
||||||
|
|
||||||
@noindent
|
|
||||||
(Here @code{NE} and so on stand for the operators for ``not equal''
|
|
||||||
and so on. We assume that these tokens are more than one character long
|
|
||||||
and therefore are represented by names, not character literals.)
|
|
||||||
|
|
||||||
@node How Precedence
|
@node How Precedence
|
||||||
@subsection How Precedence Works
|
@subsection How Precedence Works
|
||||||
|
|
||||||
@@ -7187,6 +7188,44 @@ resolved.
|
|||||||
Not all rules and not all tokens have precedence. If either the rule or
|
Not all rules and not all tokens have precedence. If either the rule or
|
||||||
the lookahead token has no precedence, then the default is to shift.
|
the lookahead token has no precedence, then the default is to shift.
|
||||||
|
|
||||||
|
@node Non Operators
|
||||||
|
@subsection Using Precedence For Non Operators
|
||||||
|
|
||||||
|
Using properly precedence and associativity directives can help fixing
|
||||||
|
shift/reduce conflicts that do not involve arithmetics-like operators. For
|
||||||
|
instance, the ``dangling @code{else}'' problem (@pxref{Shift/Reduce, ,
|
||||||
|
Shift/Reduce Conflicts}) can be solved elegantly in two different ways.
|
||||||
|
|
||||||
|
In the present case, the conflict is between the token @code{"else"} willing
|
||||||
|
to be shifted, and the rule @samp{if_stmt: "if" expr "then" stmt}, asking
|
||||||
|
for reduction. By default, the precedence of a rule is that of its last
|
||||||
|
token, here @code{"then"}, so the conflict will be solved appropriately
|
||||||
|
by giving @code{"else"} a precedence higher than that of @code{"then"}, for
|
||||||
|
instance as follows:
|
||||||
|
|
||||||
|
@example
|
||||||
|
@group
|
||||||
|
%nonassoc "then"
|
||||||
|
%nonassoc "else"
|
||||||
|
@end group
|
||||||
|
@end example
|
||||||
|
|
||||||
|
Alternatively, you may give both tokens the same precedence, in which case
|
||||||
|
associativity is used to solve the conflict. To preserve the shift action,
|
||||||
|
use right associativity:
|
||||||
|
|
||||||
|
@example
|
||||||
|
%right "then" "else"
|
||||||
|
@end example
|
||||||
|
|
||||||
|
Neither solution is perfect however. Since Bison does not provide, so far,
|
||||||
|
support for ``scoped'' precedence, both force you to declare the precedence
|
||||||
|
of these keywords with respect to the other operators your grammar.
|
||||||
|
Therefore, instead of being warned about new conflicts you would be unaware
|
||||||
|
of (e.g., a shift/reduce conflict due to @samp{if test then 1 else 2 + 3}
|
||||||
|
being ambiguous: @samp{if test then 1 else (2 + 3)} or @samp{(if test then 1
|
||||||
|
else 2) + 3}?), the conflict will be already ``fixed''.
|
||||||
|
|
||||||
@node Contextual Precedence
|
@node Contextual Precedence
|
||||||
@section Context-Dependent Precedence
|
@section Context-Dependent Precedence
|
||||||
@cindex context-dependent precedence
|
@cindex context-dependent precedence
|
||||||
@@ -7347,30 +7386,38 @@ reduce/reduce conflict must be studied and usually eliminated. Here is the
|
|||||||
proper way to define @code{sequence}:
|
proper way to define @code{sequence}:
|
||||||
|
|
||||||
@example
|
@example
|
||||||
|
@group
|
||||||
sequence:
|
sequence:
|
||||||
/* empty */ @{ printf ("empty sequence\n"); @}
|
/* empty */ @{ printf ("empty sequence\n"); @}
|
||||||
| sequence word @{ printf ("added word %s\n", $2); @}
|
| sequence word @{ printf ("added word %s\n", $2); @}
|
||||||
;
|
;
|
||||||
|
@end group
|
||||||
@end example
|
@end example
|
||||||
|
|
||||||
Here is another common error that yields a reduce/reduce conflict:
|
Here is another common error that yields a reduce/reduce conflict:
|
||||||
|
|
||||||
@example
|
@example
|
||||||
sequence:
|
sequence:
|
||||||
|
@group
|
||||||
/* empty */
|
/* empty */
|
||||||
| sequence words
|
| sequence words
|
||||||
| sequence redirects
|
| sequence redirects
|
||||||
;
|
;
|
||||||
|
@end group
|
||||||
|
|
||||||
|
@group
|
||||||
words:
|
words:
|
||||||
/* empty */
|
/* empty */
|
||||||
| words word
|
| words word
|
||||||
;
|
;
|
||||||
|
@end group
|
||||||
|
|
||||||
|
@group
|
||||||
redirects:
|
redirects:
|
||||||
/* empty */
|
/* empty */
|
||||||
| redirects redirect
|
| redirects redirect
|
||||||
;
|
;
|
||||||
|
@end group
|
||||||
@end example
|
@end example
|
||||||
|
|
||||||
@noindent
|
@noindent
|
||||||
@@ -7423,6 +7470,58 @@ redirects:
|
|||||||
@end group
|
@end group
|
||||||
@end example
|
@end example
|
||||||
|
|
||||||
|
Yet this proposal introduces another kind of ambiguity! The input
|
||||||
|
@samp{word word} can be parsed as a single @code{words} composed of two
|
||||||
|
@samp{word}s, or as two one-@code{word} @code{words} (and likewise for
|
||||||
|
@code{redirect}/@code{redirects}). However this ambiguity is now a
|
||||||
|
shift/reduce conflict, and therefore it can now be addressed with precedence
|
||||||
|
directives.
|
||||||
|
|
||||||
|
To simplify the matter, we will proceed with @code{word} and @code{redirect}
|
||||||
|
being tokens: @code{"word"} and @code{"redirect"}.
|
||||||
|
|
||||||
|
To prefer the longest @code{words}, the conflict between the token
|
||||||
|
@code{"word"} and the rule @samp{sequence: sequence words} must be resolved
|
||||||
|
as a shift. To this end, we use the same techniques as exposed above, see
|
||||||
|
@ref{Non Operators,, Using Precedence For Non Operators}. One solution
|
||||||
|
relies on precedences: use @code{%prec} to give a lower precedence to the
|
||||||
|
rule:
|
||||||
|
|
||||||
|
@example
|
||||||
|
%nonassoc "word"
|
||||||
|
%nonassoc "sequence"
|
||||||
|
%%
|
||||||
|
@group
|
||||||
|
sequence:
|
||||||
|
/* empty */
|
||||||
|
| sequence word %prec "sequence"
|
||||||
|
| sequence redirect %prec "sequence"
|
||||||
|
;
|
||||||
|
@end group
|
||||||
|
|
||||||
|
@group
|
||||||
|
words:
|
||||||
|
word
|
||||||
|
| words "word"
|
||||||
|
;
|
||||||
|
@end group
|
||||||
|
@end example
|
||||||
|
|
||||||
|
Another solution relies on associativity: provide both the token and the
|
||||||
|
rule with the same precedence, but make them right-associative:
|
||||||
|
|
||||||
|
@example
|
||||||
|
%right "word" "redirect"
|
||||||
|
%%
|
||||||
|
@group
|
||||||
|
sequence:
|
||||||
|
/* empty */
|
||||||
|
| sequence word %prec "word"
|
||||||
|
| sequence redirect %prec "redirect"
|
||||||
|
;
|
||||||
|
@end group
|
||||||
|
@end example
|
||||||
|
|
||||||
@node Mysterious Conflicts
|
@node Mysterious Conflicts
|
||||||
@section Mysterious Conflicts
|
@section Mysterious Conflicts
|
||||||
@cindex Mysterious Conflicts
|
@cindex Mysterious Conflicts
|
||||||
@@ -7432,8 +7531,6 @@ Here is an example:
|
|||||||
|
|
||||||
@example
|
@example
|
||||||
@group
|
@group
|
||||||
%token ID
|
|
||||||
|
|
||||||
%%
|
%%
|
||||||
def: param_spec return_spec ',';
|
def: param_spec return_spec ',';
|
||||||
param_spec:
|
param_spec:
|
||||||
@@ -7448,10 +7545,10 @@ return_spec:
|
|||||||
;
|
;
|
||||||
@end group
|
@end group
|
||||||
@group
|
@group
|
||||||
type: ID;
|
type: "id";
|
||||||
@end group
|
@end group
|
||||||
@group
|
@group
|
||||||
name: ID;
|
name: "id";
|
||||||
name_list:
|
name_list:
|
||||||
name
|
name
|
||||||
| name ',' name_list
|
| name ',' name_list
|
||||||
@@ -7459,16 +7556,16 @@ name_list:
|
|||||||
@end group
|
@end group
|
||||||
@end example
|
@end example
|
||||||
|
|
||||||
It would seem that this grammar can be parsed with only a single token
|
It would seem that this grammar can be parsed with only a single token of
|
||||||
of lookahead: when a @code{param_spec} is being read, an @code{ID} is
|
lookahead: when a @code{param_spec} is being read, an @code{"id"} is a
|
||||||
a @code{name} if a comma or colon follows, or a @code{type} if another
|
@code{name} if a comma or colon follows, or a @code{type} if another
|
||||||
@code{ID} follows. In other words, this grammar is LR(1).
|
@code{"id"} follows. In other words, this grammar is LR(1).
|
||||||
|
|
||||||
@cindex LR
|
@cindex LR
|
||||||
@cindex LALR
|
@cindex LALR
|
||||||
However, for historical reasons, Bison cannot by default handle all
|
However, for historical reasons, Bison cannot by default handle all
|
||||||
LR(1) grammars.
|
LR(1) grammars.
|
||||||
In this grammar, two contexts, that after an @code{ID} at the beginning
|
In this grammar, two contexts, that after an @code{"id"} at the beginning
|
||||||
of a @code{param_spec} and likewise at the beginning of a
|
of a @code{param_spec} and likewise at the beginning of a
|
||||||
@code{return_spec}, are similar enough that Bison assumes they are the
|
@code{return_spec}, are similar enough that Bison assumes they are the
|
||||||
same.
|
same.
|
||||||
@@ -7499,27 +7596,24 @@ distinct. In the above example, adding one rule to
|
|||||||
|
|
||||||
@example
|
@example
|
||||||
@group
|
@group
|
||||||
%token BOGUS
|
|
||||||
@dots{}
|
|
||||||
%%
|
|
||||||
@dots{}
|
@dots{}
|
||||||
return_spec:
|
return_spec:
|
||||||
type
|
type
|
||||||
| name ':' type
|
| name ':' type
|
||||||
| ID BOGUS /* This rule is never used. */
|
| "id" "bogus" /* This rule is never used. */
|
||||||
;
|
;
|
||||||
@end group
|
@end group
|
||||||
@end example
|
@end example
|
||||||
|
|
||||||
This corrects the problem because it introduces the possibility of an
|
This corrects the problem because it introduces the possibility of an
|
||||||
additional active rule in the context after the @code{ID} at the beginning of
|
additional active rule in the context after the @code{"id"} at the beginning of
|
||||||
@code{return_spec}. This rule is not active in the corresponding context
|
@code{return_spec}. This rule is not active in the corresponding context
|
||||||
in a @code{param_spec}, so the two contexts receive distinct parser states.
|
in a @code{param_spec}, so the two contexts receive distinct parser states.
|
||||||
As long as the token @code{BOGUS} is never generated by @code{yylex},
|
As long as the token @code{"bogus"} is never generated by @code{yylex},
|
||||||
the added rule cannot alter the way actual input is parsed.
|
the added rule cannot alter the way actual input is parsed.
|
||||||
|
|
||||||
In this particular example, there is another way to solve the problem:
|
In this particular example, there is another way to solve the problem:
|
||||||
rewrite the rule for @code{return_spec} to use @code{ID} directly
|
rewrite the rule for @code{return_spec} to use @code{"id"} directly
|
||||||
instead of via @code{name}. This also causes the two confusing
|
instead of via @code{name}. This also causes the two confusing
|
||||||
contexts to have different sets of active rules, because the one for
|
contexts to have different sets of active rules, because the one for
|
||||||
@code{return_spec} activates the altered rule for @code{return_spec}
|
@code{return_spec} activates the altered rule for @code{return_spec}
|
||||||
@@ -7532,7 +7626,7 @@ param_spec:
|
|||||||
;
|
;
|
||||||
return_spec:
|
return_spec:
|
||||||
type
|
type
|
||||||
| ID ':' type
|
| "id" ':' type
|
||||||
;
|
;
|
||||||
@end example
|
@end example
|
||||||
|
|
||||||
@@ -12746,7 +12840,10 @@ London, Department of Computer Science, TR-00-12 (December 2000).
|
|||||||
@c LocalWords: subdirectory Solaris nonassociativity perror schemas Malloy ints
|
@c LocalWords: subdirectory Solaris nonassociativity perror schemas Malloy ints
|
||||||
@c LocalWords: Scannerless ispell american ChangeLog smallexample CSTYPE CLTYPE
|
@c LocalWords: Scannerless ispell american ChangeLog smallexample CSTYPE CLTYPE
|
||||||
@c LocalWords: clval CDEBUG cdebug deftypeopx yyterminate LocationType
|
@c LocalWords: clval CDEBUG cdebug deftypeopx yyterminate LocationType
|
||||||
@c LocalWords: errorVerbose
|
@c LocalWords: parsers parser's
|
||||||
|
@c LocalWords: associativity subclasses precedences unresolvable runnable
|
||||||
|
@c LocalWords: allocators subunit initializations unreferenced untyped
|
||||||
|
@c LocalWords: errorVerbose subtype subtypes
|
||||||
|
|
||||||
@c Local Variables:
|
@c Local Variables:
|
||||||
@c ispell-dictionary: "american"
|
@c ispell-dictionary: "american"
|
||||||
|
|||||||
@@ -131,8 +131,8 @@ static void unexpected_newline (boundary, char const *);
|
|||||||
%x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
|
%x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
|
||||||
|
|
||||||
letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
|
letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
|
||||||
|
notletter [^.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]{-}[%\{]
|
||||||
id {letter}({letter}|[-0-9])*
|
id {letter}({letter}|[-0-9])*
|
||||||
directive %{id}
|
|
||||||
int [0-9]+
|
int [0-9]+
|
||||||
|
|
||||||
/* POSIX says that a tag must be both an id and a C union member, but
|
/* POSIX says that a tag must be both an id and a C union member, but
|
||||||
@@ -184,7 +184,7 @@ eqopt ([[:space:]]*=)?
|
|||||||
complain (loc, Wother, _("stray ',' treated as white space"));
|
complain (loc, Wother, _("stray ',' treated as white space"));
|
||||||
}
|
}
|
||||||
[ \f\n\t\v] |
|
[ \f\n\t\v] |
|
||||||
"//".* ;
|
"//".* continue;
|
||||||
"/*" {
|
"/*" {
|
||||||
token_start = loc->start;
|
token_start = loc->start;
|
||||||
context_state = YY_START;
|
context_state = YY_START;
|
||||||
@@ -269,7 +269,7 @@ eqopt ([[:space:]]*=)?
|
|||||||
"%pure"[-_]"parser" DEPRECATED("%pure-parser");
|
"%pure"[-_]"parser" DEPRECATED("%pure-parser");
|
||||||
"%token"[-_]"table" DEPRECATED("%token-table");
|
"%token"[-_]"table" DEPRECATED("%token-table");
|
||||||
|
|
||||||
{directive} {
|
"%"{id}|"%"{notletter}([[:graph:]])+ {
|
||||||
complain (loc, complaint, _("invalid directive: %s"), quote (yytext));
|
complain (loc, complaint, _("invalid directive: %s"), quote (yytext));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -353,8 +353,9 @@ eqopt ([[:space:]]*=)?
|
|||||||
BEGIN SC_BRACKETED_ID;
|
BEGIN SC_BRACKETED_ID;
|
||||||
}
|
}
|
||||||
|
|
||||||
. {
|
[^\[%A-Za-z0-9_<>{}\"\'*;|=/, \f\n\t\v]+|. {
|
||||||
complain (loc, complaint, _("invalid character: %s"),
|
complain (loc, complaint, "%s: %s",
|
||||||
|
ngettext ("invalid character", "invalid characters", yyleng),
|
||||||
quote_mem (yytext, yyleng));
|
quote_mem (yytext, yyleng));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -449,10 +450,14 @@ eqopt ([[:space:]]*=)?
|
|||||||
else
|
else
|
||||||
complain (loc, complaint, _("an identifier expected"));
|
complain (loc, complaint, _("an identifier expected"));
|
||||||
}
|
}
|
||||||
. {
|
|
||||||
complain (loc, complaint, _("invalid character in bracketed name: %s"),
|
[^\].A-Za-z0-9_/ \f\n\t\v]+|. {
|
||||||
|
complain (loc, complaint, "%s: %s",
|
||||||
|
ngettext ("invalid character in bracketed name",
|
||||||
|
"invalid characters in bracketed name", yyleng),
|
||||||
quote_mem (yytext, yyleng));
|
quote_mem (yytext, yyleng));
|
||||||
}
|
}
|
||||||
|
|
||||||
<<EOF>> {
|
<<EOF>> {
|
||||||
BEGIN bracketed_id_context_state;
|
BEGIN bracketed_id_context_state;
|
||||||
unexpected_eof (bracketed_id_start, "]");
|
unexpected_eof (bracketed_id_start, "]");
|
||||||
@@ -479,7 +484,7 @@ eqopt ([[:space:]]*=)?
|
|||||||
<SC_YACC_COMMENT>
|
<SC_YACC_COMMENT>
|
||||||
{
|
{
|
||||||
"*/" BEGIN context_state;
|
"*/" BEGIN context_state;
|
||||||
.|\n ;
|
.|\n continue;
|
||||||
<<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
|
<<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -73,8 +73,8 @@ AT_CLEANUP
|
|||||||
## Initial location. ##
|
## Initial location. ##
|
||||||
## ------------------ ##
|
## ------------------ ##
|
||||||
|
|
||||||
# AT_TEST(SKELETON-NAME, DIRECTIVES)
|
# AT_TEST(SKELETON-NAME, DIRECTIVES, [MORE-DIRECTIVES], [LOCATION = 1.1])
|
||||||
# ----------------------------------
|
# -----------------------------------------------------------------------
|
||||||
# Check that the initial location is correct.
|
# Check that the initial location is correct.
|
||||||
m4_pushdef([AT_TEST],
|
m4_pushdef([AT_TEST],
|
||||||
[AT_SETUP([Initial location: $1 $2])
|
[AT_SETUP([Initial location: $1 $2])
|
||||||
@@ -85,7 +85,8 @@ AT_DATA_GRAMMAR([[input.y]],
|
|||||||
%locations
|
%locations
|
||||||
%debug
|
%debug
|
||||||
%skeleton "$1"
|
%skeleton "$1"
|
||||||
$2
|
]$2[
|
||||||
|
]$3[
|
||||||
%parse-param { int x } // Useless, but used to force yyerror purity.
|
%parse-param { int x } // Useless, but used to force yyerror purity.
|
||||||
%code
|
%code
|
||||||
{
|
{
|
||||||
@@ -122,8 +123,8 @@ main (void)
|
|||||||
|
|
||||||
AT_FULL_COMPILE([input])
|
AT_FULL_COMPILE([input])
|
||||||
AT_PARSER_CHECK([./input], 1, [],
|
AT_PARSER_CHECK([./input], 1, [],
|
||||||
[[1.1
|
[m4_default([$4], [1.1])
|
||||||
1.1: syntax error
|
m4_default([$4], [1.1])[: syntax error
|
||||||
]])
|
]])
|
||||||
AT_BISON_OPTION_POPDEFS
|
AT_BISON_OPTION_POPDEFS
|
||||||
AT_CLEANUP
|
AT_CLEANUP
|
||||||
@@ -138,6 +139,36 @@ AT_TEST([glr.c])
|
|||||||
AT_TEST([lalr1.cc])
|
AT_TEST([lalr1.cc])
|
||||||
AT_TEST([glr.cc])
|
AT_TEST([glr.cc])
|
||||||
|
|
||||||
|
## A very different test, based on PostgreSQL's implementation of the
|
||||||
|
## locations. See
|
||||||
|
## http://lists.gnu.org/archive/html/bug-bison/2012-11/msg00023.html
|
||||||
|
##
|
||||||
|
## Weirdly enough, to trigger the warning with GCC 4.7, we must not
|
||||||
|
## use fprintf, so run the test twice: once to check the warning
|
||||||
|
## (absence thereof), and another time to check the value.
|
||||||
|
AT_TEST([yacc.c], [%define api.pure],
|
||||||
|
[[%{
|
||||||
|
# define YYLTYPE int
|
||||||
|
# define YY_LOCATION_PRINT(Stream, Loc) \
|
||||||
|
(void) (Loc)
|
||||||
|
# define YYLLOC_DEFAULT(Current, Rhs, N) \
|
||||||
|
(Current) = ((Rhs)[N ? 1 : 0])
|
||||||
|
%}
|
||||||
|
]],
|
||||||
|
[@&t@])
|
||||||
|
|
||||||
|
AT_TEST([yacc.c], [%define api.pure],
|
||||||
|
[[%{
|
||||||
|
# define YYLTYPE int
|
||||||
|
# define YY_LOCATION_PRINT(Stream, Loc) \
|
||||||
|
fprintf ((Stream), "%d", (Loc))
|
||||||
|
# define YYLLOC_DEFAULT(Current, Rhs, N) \
|
||||||
|
(Current) = ((Rhs)[N ? 1 : 0])
|
||||||
|
%}
|
||||||
|
]],
|
||||||
|
[0])
|
||||||
|
|
||||||
|
|
||||||
m4_popdef([AT_TEST])
|
m4_popdef([AT_TEST])
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -39,11 +39,7 @@ default: 'a' }
|
|||||||
AT_CHECK([[$PERL -pi -e 's/\\(\d{3})/chr(oct($1))/ge' input.y || exit 77]])
|
AT_CHECK([[$PERL -pi -e 's/\\(\d{3})/chr(oct($1))/ge' input.y || exit 77]])
|
||||||
|
|
||||||
AT_BISON_CHECK([input.y], [1], [],
|
AT_BISON_CHECK([input.y], [1], [],
|
||||||
[[input.y:1.1: error: invalid character: '\0'
|
[[input.y:1.1-2: error: invalid characters: '\0\001\002\377?'
|
||||||
input.y:1.1: error: invalid character: '\001'
|
|
||||||
input.y:1.1: error: invalid character: '\002'
|
|
||||||
input.y:1.1: error: invalid character: '\377'
|
|
||||||
input.y:1.2: error: invalid character: '?'
|
|
||||||
input.y:3.1: error: invalid character: '?'
|
input.y:3.1: error: invalid character: '?'
|
||||||
input.y:4.14: error: invalid character: '}'
|
input.y:4.14: error: invalid character: '}'
|
||||||
input.y:5.1: error: invalid character: '%'
|
input.y:5.1: error: invalid character: '%'
|
||||||
|
|||||||
@@ -55,12 +55,12 @@ static int power (int base, int exponent);
|
|||||||
%%
|
%%
|
||||||
input:
|
input:
|
||||||
line
|
line
|
||||||
| input line { }
|
| input line {}
|
||||||
;
|
;
|
||||||
|
|
||||||
line:
|
line:
|
||||||
'\n'
|
'\n'
|
||||||
| exp '\n' { }
|
| exp '\n' {}
|
||||||
;
|
;
|
||||||
|
|
||||||
exp:
|
exp:
|
||||||
@@ -72,12 +72,12 @@ exp:
|
|||||||
$$ = $l;
|
$$ = $l;
|
||||||
}
|
}
|
||||||
| exp[x] '+' { $<ival>$ = $x; } [l] exp[r] { $$ = $<ival>l + $r; }
|
| exp[x] '+' { $<ival>$ = $x; } [l] exp[r] { $$ = $<ival>l + $r; }
|
||||||
| exp[l] '-' exp[r] { $$ = $l - $r; }
|
| exp[l] '-' exp[r] { $$ = $l - $r; }
|
||||||
| exp[l] '*' exp[r] { $$ = $l * $r; }
|
| exp[l] '*' exp[r] { $$ = $l * $r; }
|
||||||
| exp[l] '/' exp[r] { $$ = $l / $r; }
|
| exp[l] '/' exp[r] { $$ = $l / $r; }
|
||||||
| '-' exp %prec NEG { $$ = -$2; }
|
| '-' exp %prec NEG { $$ = -$2; }
|
||||||
| exp[l] '^' exp[r] { $$ = power ($l, $r); }
|
| exp[l] '^' exp[r] { $$ = power ($l, $r); }
|
||||||
| '(' exp[e] ')' { $$ = $e; }
|
| '(' exp[e] ')' { $$ = $e; }
|
||||||
| '(' error ')' { $$ = 1111; yyerrok; }
|
| '(' error ')' { $$ = 1111; yyerrok; }
|
||||||
| '!' { $$ = 0; YYERROR; }
|
| '!' { $$ = 0; YYERROR; }
|
||||||
| '-' error { $$ = 0; YYERROR; }
|
| '-' error { $$ = 0; YYERROR; }
|
||||||
@@ -220,12 +220,12 @@ static int power (int base, int exponent);
|
|||||||
%%
|
%%
|
||||||
input:
|
input:
|
||||||
line
|
line
|
||||||
| input line { }
|
| input line {}
|
||||||
;
|
;
|
||||||
|
|
||||||
line:
|
line:
|
||||||
'\n'
|
'\n'
|
||||||
| exp '\n' { }
|
| exp '\n' {}
|
||||||
;
|
;
|
||||||
|
|
||||||
exp:
|
exp:
|
||||||
@@ -241,7 +241,7 @@ exp:
|
|||||||
| exp[x] '*' { $<ival>$ = $x; } [l] exp[r] { $$ = $l * $r; }
|
| exp[x] '*' { $<ival>$ = $x; } [l] exp[r] { $$ = $l * $r; }
|
||||||
| exp[l] '/' exp[r] { $$ = $l / $r; }
|
| exp[l] '/' exp[r] { $$ = $l / $r; }
|
||||||
| '-' exp %prec NEG { $$ = -$2; }
|
| '-' exp %prec NEG { $$ = -$2; }
|
||||||
| exp[l] '^' exp[r] { $$ = power ($l, $r12); }
|
| exp[l] '^' exp[r] { $$ = power ($l, $r12); }
|
||||||
| '(' exp ')' { $$ = $expo; }
|
| '(' exp ')' { $$ = $expo; }
|
||||||
| '(' error ')' { $$ = 1111; yyerrok; }
|
| '(' error ')' { $$ = 1111; yyerrok; }
|
||||||
| '!' { $$ = 0; YYERROR; }
|
| '!' { $$ = 0; YYERROR; }
|
||||||
@@ -258,8 +258,8 @@ test.y:42.1-3: refers to: $exp at $$
|
|||||||
test.y:51.7: possibly meant: $x, hiding $exp at $1
|
test.y:51.7: possibly meant: $x, hiding $exp at $1
|
||||||
test.y:51.41: possibly meant: $r, hiding $exp at $4
|
test.y:51.41: possibly meant: $r, hiding $exp at $4
|
||||||
test.y:52.51-52: error: $l of 'exp' has no declared type
|
test.y:52.51-52: error: $l of 'exp' has no declared type
|
||||||
test.y:55.46-49: error: invalid reference: '$r12'
|
test.y:55.40-43: error: invalid reference: '$r12'
|
||||||
test.y:55.3-53: symbol not found in production: r12
|
test.y:55.3-47: symbol not found in production: r12
|
||||||
test.y:56.29-33: error: invalid reference: '$expo'
|
test.y:56.29-33: error: invalid reference: '$expo'
|
||||||
test.y:56.3-46: symbol not found in production: expo
|
test.y:56.3-46: symbol not found in production: expo
|
||||||
]])
|
]])
|
||||||
@@ -443,19 +443,14 @@ AT_SETUP([Stray symbols in brackets])
|
|||||||
AT_DATA_GRAMMAR([test.y],
|
AT_DATA_GRAMMAR([test.y],
|
||||||
[[
|
[[
|
||||||
%%
|
%%
|
||||||
start: foo[ /* aaa */ *&-.+\000\001\002\377 ] bar
|
start: foo[ % /* aaa */ *&-.+\000\001\002\377 ] bar
|
||||||
{ s = $foo; }
|
{ s = $foo; }
|
||||||
]])
|
]])
|
||||||
AT_CHECK([[$PERL -pi -e 's/\\(\d{3})/chr(oct($1))/ge' test.y || exit 77]])
|
AT_CHECK([[$PERL -pi -e 's/\\(\d{3})/chr(oct($1))/ge' test.y || exit 77]])
|
||||||
AT_BISON_CHECK([-o test.c test.y], 1, [],
|
AT_BISON_CHECK([-o test.c test.y], 1, [],
|
||||||
[[test.y:11.23: error: invalid character in bracketed name: '*'
|
[[test.y:11.13: error: invalid character in bracketed name: '%'
|
||||||
test.y:11.24: error: invalid character in bracketed name: '&'
|
test.y:11.25-27: error: invalid characters in bracketed name: '*&-'
|
||||||
test.y:11.25: error: invalid character in bracketed name: '-'
|
test.y:11.29-30: error: invalid characters in bracketed name: '+\0\001\002\377'
|
||||||
test.y:11.27: error: invalid character in bracketed name: '+'
|
|
||||||
test.y:11.28: error: invalid character in bracketed name: '\0'
|
|
||||||
test.y:11.28: error: invalid character in bracketed name: '\001'
|
|
||||||
test.y:11.28: error: invalid character in bracketed name: '\002'
|
|
||||||
test.y:11.28: error: invalid character in bracketed name: '\377'
|
|
||||||
]])
|
]])
|
||||||
AT_CLEANUP
|
AT_CLEANUP
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user