mirror of
https://git.savannah.gnu.org/git/bison.git
synced 2026-03-19 17:23:02 +00:00
* doc/bison.texinfo (Location Tracking Calc): New node.
This commit is contained in:
@@ -1,3 +1,7 @@
|
|||||||
|
2001-08-29 Robert Anisko <anisko_r@epita.fr>
|
||||||
|
|
||||||
|
* doc/bison.texinfo (Location Tracking Calc): New node.
|
||||||
|
|
||||||
2001-08-29 Paul Eggert <eggert@twinsun.com>
|
2001-08-29 Paul Eggert <eggert@twinsun.com>
|
||||||
|
|
||||||
* src/output.c (output): Do not define const, as this now
|
* src/output.c (output): Do not define const, as this now
|
||||||
|
|||||||
208
doc/bison.info
208
doc/bison.info
@@ -31,111 +31,115 @@ instead of in the original English.
|
|||||||
|
|
||||||
Indirect:
|
Indirect:
|
||||||
bison.info-1: 1313
|
bison.info-1: 1313
|
||||||
bison.info-2: 50345
|
bison.info-2: 50688
|
||||||
bison.info-3: 99970
|
bison.info-3: 100578
|
||||||
bison.info-4: 148168
|
bison.info-4: 150128
|
||||||
bison.info-5: 191130
|
bison.info-5: 197515
|
||||||
|
|
||||||
Tag Table:
|
Tag Table:
|
||||||
(Indirect)
|
(Indirect)
|
||||||
Node: Top1313
|
Node: Top1313
|
||||||
Node: Introduction8686
|
Node: Introduction8966
|
||||||
Node: Conditions9961
|
Node: Conditions10241
|
||||||
Node: Copying11425
|
Node: Copying11705
|
||||||
Node: Concepts30628
|
Node: Concepts30908
|
||||||
Node: Language and Grammar31707
|
Node: Language and Grammar31987
|
||||||
Node: Grammar in Bison36723
|
Node: Grammar in Bison37003
|
||||||
Node: Semantic Values38647
|
Node: Semantic Values38927
|
||||||
Node: Semantic Actions40748
|
Node: Semantic Actions41028
|
||||||
Node: Locations Overview41937
|
Node: Locations Overview42217
|
||||||
Node: Bison Parser43384
|
Node: Bison Parser43664
|
||||||
Node: Stages45696
|
Node: Stages45976
|
||||||
Node: Grammar Layout46979
|
Node: Grammar Layout47259
|
||||||
Node: Examples48236
|
Node: Examples48516
|
||||||
Node: RPN Calc49371
|
Node: RPN Calc49714
|
||||||
Node: Rpcalc Decls50345
|
Node: Rpcalc Decls50688
|
||||||
Node: Rpcalc Rules51932
|
Node: Rpcalc Rules52275
|
||||||
Node: Rpcalc Input53732
|
Node: Rpcalc Input54075
|
||||||
Node: Rpcalc Line55193
|
Node: Rpcalc Line55536
|
||||||
Node: Rpcalc Expr56308
|
Node: Rpcalc Expr56651
|
||||||
Node: Rpcalc Lexer58253
|
Node: Rpcalc Lexer58596
|
||||||
Node: Rpcalc Main60825
|
Node: Rpcalc Main61168
|
||||||
Node: Rpcalc Error61223
|
Node: Rpcalc Error61566
|
||||||
Node: Rpcalc Gen62231
|
Node: Rpcalc Gen62574
|
||||||
Node: Rpcalc Compile63380
|
Node: Rpcalc Compile63723
|
||||||
Node: Infix Calc64255
|
Node: Infix Calc64598
|
||||||
Node: Simple Error Recovery66962
|
Node: Simple Error Recovery67305
|
||||||
Node: Multi-function Calc68848
|
Node: Location Tracking Calc69194
|
||||||
Node: Mfcalc Decl70414
|
Node: Ltcalc Decls69924
|
||||||
Node: Mfcalc Rules72437
|
Node: Ltcalc Rules70833
|
||||||
Node: Mfcalc Symtab73817
|
Node: Ltcalc Lexer72894
|
||||||
Node: Exercises80190
|
Node: Multi-function Calc75232
|
||||||
Node: Grammar File80696
|
Node: Mfcalc Decl76799
|
||||||
Node: Grammar Outline81544
|
Node: Mfcalc Rules78822
|
||||||
Node: C Declarations82278
|
Node: Mfcalc Symtab80202
|
||||||
Node: Bison Declarations82858
|
Node: Exercises86575
|
||||||
Node: Grammar Rules83270
|
Node: Grammar File87081
|
||||||
Node: C Code83730
|
Node: Grammar Outline87929
|
||||||
Node: Symbols84660
|
Node: C Declarations88663
|
||||||
Node: Rules89741
|
Node: Bison Declarations89243
|
||||||
Node: Recursion91380
|
Node: Grammar Rules89655
|
||||||
Node: Semantics93099
|
Node: C Code90115
|
||||||
Node: Value Type94193
|
Node: Symbols91045
|
||||||
Node: Multiple Types94865
|
Node: Rules96126
|
||||||
Node: Actions95882
|
Node: Recursion97765
|
||||||
Node: Action Types98667
|
Node: Semantics99484
|
||||||
Node: Mid-Rule Actions99970
|
Node: Value Type100578
|
||||||
Node: Locations105540
|
Node: Multiple Types101250
|
||||||
Node: Location Type106188
|
Node: Actions102267
|
||||||
Node: Actions and Locations106746
|
Node: Action Types105052
|
||||||
Node: Location Default Action108902
|
Node: Mid-Rule Actions106355
|
||||||
Node: Declarations110365
|
Node: Locations111925
|
||||||
Node: Token Decl111684
|
Node: Location Type112573
|
||||||
Node: Precedence Decl113697
|
Node: Actions and Locations113131
|
||||||
Node: Union Decl115248
|
Node: Location Default Action115287
|
||||||
Node: Type Decl116092
|
Node: Declarations116750
|
||||||
Node: Expect Decl116998
|
Node: Token Decl118069
|
||||||
Node: Start Decl118544
|
Node: Precedence Decl120082
|
||||||
Node: Pure Decl118922
|
Node: Union Decl121633
|
||||||
Node: Decl Summary120599
|
Node: Type Decl122477
|
||||||
Node: Multiple Parsers125982
|
Node: Expect Decl123383
|
||||||
Node: Interface127476
|
Node: Start Decl124929
|
||||||
Node: Parser Function128348
|
Node: Pure Decl125307
|
||||||
Node: Lexical129183
|
Node: Decl Summary126984
|
||||||
Node: Calling Convention130589
|
Node: Multiple Parsers132367
|
||||||
Node: Token Values133360
|
Node: Interface133861
|
||||||
Node: Token Positions134509
|
Node: Parser Function134733
|
||||||
Node: Pure Calling135394
|
Node: Lexical135568
|
||||||
Node: Error Reporting138326
|
Node: Calling Convention136974
|
||||||
Node: Action Features140448
|
Node: Token Values139745
|
||||||
Node: Algorithm143743
|
Node: Token Positions140894
|
||||||
Node: Look-Ahead146036
|
Node: Pure Calling141779
|
||||||
Node: Shift/Reduce148168
|
Node: Error Reporting144711
|
||||||
Node: Precedence151080
|
Node: Action Features146833
|
||||||
Node: Why Precedence151731
|
Node: Algorithm150128
|
||||||
Node: Using Precedence153596
|
Node: Look-Ahead152421
|
||||||
Node: Precedence Examples154564
|
Node: Shift/Reduce154553
|
||||||
Node: How Precedence155265
|
Node: Precedence157465
|
||||||
Node: Contextual Precedence156414
|
Node: Why Precedence158116
|
||||||
Node: Parser States158205
|
Node: Using Precedence159981
|
||||||
Node: Reduce/Reduce159448
|
Node: Precedence Examples160949
|
||||||
Node: Mystery Conflicts163009
|
Node: How Precedence161650
|
||||||
Node: Stack Overflow166395
|
Node: Contextual Precedence162799
|
||||||
Node: Error Recovery167768
|
Node: Parser States164590
|
||||||
Node: Context Dependency172904
|
Node: Reduce/Reduce165833
|
||||||
Node: Semantic Tokens173752
|
Node: Mystery Conflicts169394
|
||||||
Node: Lexical Tie-ins176769
|
Node: Stack Overflow172780
|
||||||
Node: Tie-in Recovery178317
|
Node: Error Recovery174153
|
||||||
Node: Debugging180489
|
Node: Context Dependency179289
|
||||||
Node: Invocation183790
|
Node: Semantic Tokens180137
|
||||||
Node: Bison Options185042
|
Node: Lexical Tie-ins183154
|
||||||
Node: Environment Variables188654
|
Node: Tie-in Recovery184702
|
||||||
Node: Option Cross Key189502
|
Node: Debugging186874
|
||||||
Node: VMS Invocation190346
|
Node: Invocation190175
|
||||||
Node: Table of Symbols191130
|
Node: Bison Options191427
|
||||||
Node: Glossary198769
|
Node: Environment Variables195039
|
||||||
Node: Copying This Manual205073
|
Node: Option Cross Key195887
|
||||||
Node: GNU Free Documentation License205282
|
Node: VMS Invocation196731
|
||||||
Node: Index225147
|
Node: Table of Symbols197515
|
||||||
|
Node: Glossary205154
|
||||||
|
Node: Copying This Manual211458
|
||||||
|
Node: GNU Free Documentation License211667
|
||||||
|
Node: Index231532
|
||||||
|
|
||||||
End Tag Table
|
End Tag Table
|
||||||
|
|||||||
@@ -83,6 +83,7 @@ Examples
|
|||||||
* Infix Calc:: Infix (algebraic) notation calculator.
|
* Infix Calc:: Infix (algebraic) notation calculator.
|
||||||
Operator precedence is introduced.
|
Operator precedence is introduced.
|
||||||
* Simple Error Recovery:: Continuing after syntax errors.
|
* Simple Error Recovery:: Continuing after syntax errors.
|
||||||
|
* Location Tracking Calc:: Demonstrating the use of @N and @$.
|
||||||
* Multi-function Calc:: Calculator with memory and trig functions.
|
* Multi-function Calc:: Calculator with memory and trig functions.
|
||||||
It uses multiple data-types for semantic values.
|
It uses multiple data-types for semantic values.
|
||||||
* Exercises:: Ideas for improving the multi-function calculator.
|
* Exercises:: Ideas for improving the multi-function calculator.
|
||||||
@@ -103,6 +104,12 @@ Grammar Rules for `rpcalc'
|
|||||||
* Rpcalc Line::
|
* Rpcalc Line::
|
||||||
* Rpcalc Expr::
|
* Rpcalc Expr::
|
||||||
|
|
||||||
|
Location Tracking Calculator: `ltcalc'
|
||||||
|
|
||||||
|
* Decls: Ltcalc Decls. Bison and C declarations for ltcalc.
|
||||||
|
* Rules: Ltcalc Rules. Grammar rules for ltcalc, with explanations.
|
||||||
|
* Lexer: Ltcalc Lexer. The lexical analyzer.
|
||||||
|
|
||||||
Multi-Function Calculator: `mfcalc'
|
Multi-Function Calculator: `mfcalc'
|
||||||
|
|
||||||
* Decl: Mfcalc Decl. Bison declarations for multi-function calculator.
|
* Decl: Mfcalc Decl. Bison declarations for multi-function calculator.
|
||||||
@@ -1036,6 +1043,7 @@ the Info file and into a source file to try them.
|
|||||||
* Infix Calc:: Infix (algebraic) notation calculator.
|
* Infix Calc:: Infix (algebraic) notation calculator.
|
||||||
Operator precedence is introduced.
|
Operator precedence is introduced.
|
||||||
* Simple Error Recovery:: Continuing after syntax errors.
|
* Simple Error Recovery:: Continuing after syntax errors.
|
||||||
|
* Location Tracking Calc:: Demonstrating the use of @N and @$.
|
||||||
* Multi-function Calc:: Calculator with memory and trig functions.
|
* Multi-function Calc:: Calculator with memory and trig functions.
|
||||||
It uses multiple data-types for semantic values.
|
It uses multiple data-types for semantic values.
|
||||||
* Exercises:: Ideas for improving the multi-function calculator.
|
* Exercises:: Ideas for improving the multi-function calculator.
|
||||||
|
|||||||
333
doc/bison.info-2
333
doc/bison.info-2
@@ -494,7 +494,7 @@ Precedence.
|
|||||||
9
|
9
|
||||||
|
|
||||||
|
|
||||||
File: bison.info, Node: Simple Error Recovery, Next: Multi-function Calc, Prev: Infix Calc, Up: Examples
|
File: bison.info, Node: Simple Error Recovery, Next: Location Tracking Calc, Prev: Infix Calc, Up: Examples
|
||||||
|
|
||||||
Simple Error Recovery
|
Simple Error Recovery
|
||||||
=====================
|
=====================
|
||||||
@@ -533,7 +533,192 @@ the current line of input. We won't discuss this issue further because
|
|||||||
it is not specific to Bison programs.
|
it is not specific to Bison programs.
|
||||||
|
|
||||||
|
|
||||||
File: bison.info, Node: Multi-function Calc, Next: Exercises, Prev: Simple Error Recovery, Up: Examples
|
File: bison.info, Node: Location Tracking Calc, Next: Multi-function Calc, Prev: Simple Error Recovery, Up: Examples
|
||||||
|
|
||||||
|
Location Tracking Calculator: `ltcalc'
|
||||||
|
======================================
|
||||||
|
|
||||||
|
This example extends the infix notation calculator with location
|
||||||
|
tracking. This feature will be used to improve error reporting, and
|
||||||
|
provide better error messages.
|
||||||
|
|
||||||
|
For the sake of clarity, we will switch for this example to an
|
||||||
|
integer calculator, since most of the work needed to use locations will
|
||||||
|
be done in the lexical analyser.
|
||||||
|
|
||||||
|
* Menu:
|
||||||
|
|
||||||
|
* Decls: Ltcalc Decls. Bison and C declarations for ltcalc.
|
||||||
|
* Rules: Ltcalc Rules. Grammar rules for ltcalc, with explanations.
|
||||||
|
* Lexer: Ltcalc Lexer. The lexical analyzer.
|
||||||
|
|
||||||
|
|
||||||
|
File: bison.info, Node: Ltcalc Decls, Next: Ltcalc Rules, Up: Location Tracking Calc
|
||||||
|
|
||||||
|
Declarations for `ltcalc'
|
||||||
|
-------------------------
|
||||||
|
|
||||||
|
The C and Bison declarations for the location tracking calculator
|
||||||
|
are the same as the declarations for the infix notation calculator.
|
||||||
|
|
||||||
|
/* Location tracking calculator. */
|
||||||
|
|
||||||
|
%{
|
||||||
|
#define YYSTYPE int
|
||||||
|
#include <math.h>
|
||||||
|
%}
|
||||||
|
|
||||||
|
/* Bison declarations. */
|
||||||
|
%token NUM
|
||||||
|
|
||||||
|
%left '-' '+'
|
||||||
|
%left '*' '/'
|
||||||
|
%left NEG
|
||||||
|
%right '^'
|
||||||
|
|
||||||
|
%% /* Grammar follows */
|
||||||
|
|
||||||
|
In the code above, there are no declarations specific to locations.
|
||||||
|
Defining a data type for storing locations is not needed: we will use
|
||||||
|
the type provided by default (*note Data Types of Locations: Location
|
||||||
|
Type.), which is a four member structure with the following integer
|
||||||
|
fields: `first_line', `first_column', `last_line' and `last_column'.
|
||||||
|
|
||||||
|
|
||||||
|
File: bison.info, Node: Ltcalc Rules, Next: Ltcalc Lexer, Prev: Ltcalc Decls, Up: Location Tracking Calc
|
||||||
|
|
||||||
|
Grammar Rules for `ltcalc'
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
Whether you choose to handle locations or not has no effect on the
|
||||||
|
syntax of your language. Therefore, grammar rules for this example
|
||||||
|
will be very close to those of the previous example: we will only
|
||||||
|
modify them to benefit from the new informations we will have.
|
||||||
|
|
||||||
|
Here, we will use locations to report divisions by zero, and locate
|
||||||
|
the wrong expressions or subexpressions.
|
||||||
|
|
||||||
|
input : /* empty */
|
||||||
|
| input line
|
||||||
|
;
|
||||||
|
|
||||||
|
line : '\n'
|
||||||
|
| exp '\n' { printf ("%d\n", $1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
exp : NUM { $$ = $1; }
|
||||||
|
| exp '+' exp { $$ = $1 + $3; }
|
||||||
|
| exp '-' exp { $$ = $1 - $3; }
|
||||||
|
| exp '*' exp { $$ = $1 * $3; }
|
||||||
|
| exp '/' exp
|
||||||
|
{
|
||||||
|
if ($3)
|
||||||
|
$$ = $1 / $3;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
$$ = 1;
|
||||||
|
printf("Division by zero, l%d,c%d-l%d,c%d",
|
||||||
|
@3.first_line, @3.first_column,
|
||||||
|
@3.last_line, @3.last_column);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
| '-' exp %preg NEG { $$ = -$2; }
|
||||||
|
| exp '^' exp { $$ = pow ($1, $3); }
|
||||||
|
| '(' exp ')' { $$ = $2; }
|
||||||
|
|
||||||
|
This code shows how to reach locations inside of semantic actions, by
|
||||||
|
using the pseudo-variables `@N' for rule components, and the
|
||||||
|
pseudo-variable `@$' for groupings.
|
||||||
|
|
||||||
|
In this example, we never assign a value to `@$', because the output
|
||||||
|
parser can do this automatically. By default, before executing the C
|
||||||
|
code of each action, `@$' is set to range from the beginning of `@1' to
|
||||||
|
the end of `@N', for a rule with N components.
|
||||||
|
|
||||||
|
Of course, this behavior can be redefined (*note Default Action for
|
||||||
|
Locations: Location Default Action.), and for very specific rules, `@$'
|
||||||
|
can be computed by hand.
|
||||||
|
|
||||||
|
|
||||||
|
File: bison.info, Node: Ltcalc Lexer, Prev: Ltcalc Rules, Up: Location Tracking Calc
|
||||||
|
|
||||||
|
The `ltcalc' Lexical Analyzer.
|
||||||
|
------------------------------
|
||||||
|
|
||||||
|
Until now, we relied on Bison's defaults to enable location
|
||||||
|
tracking. The next step is to rewrite the lexical analyser, and make it
|
||||||
|
able to feed the parser with locations of tokens, as he already does
|
||||||
|
for semantic values.
|
||||||
|
|
||||||
|
To do so, we must take into account every single character of the
|
||||||
|
input text, to avoid the computed locations of being fuzzy or wrong:
|
||||||
|
|
||||||
|
int
|
||||||
|
yylex (void)
|
||||||
|
{
|
||||||
|
int c;
|
||||||
|
|
||||||
|
/* skip white space */
|
||||||
|
while ((c = getchar ()) == ' ' || c == '\t')
|
||||||
|
++yylloc.last_column;
|
||||||
|
|
||||||
|
/* step */
|
||||||
|
yylloc.first_line = yylloc.last_line;
|
||||||
|
yylloc.first_column = yylloc.last_column;
|
||||||
|
|
||||||
|
/* process numbers */
|
||||||
|
if (isdigit (c))
|
||||||
|
{
|
||||||
|
yylval = c - '0';
|
||||||
|
++yylloc.last_column;
|
||||||
|
while (isdigit (c = getchar ()))
|
||||||
|
{
|
||||||
|
++yylloc.last_column;
|
||||||
|
yylval = yylval * 10 + c - '0';
|
||||||
|
}
|
||||||
|
ungetc (c, stdin);
|
||||||
|
return NUM;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* return end-of-file */
|
||||||
|
if (c == EOF)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* return single chars and update location */
|
||||||
|
if (c == '\n')
|
||||||
|
{
|
||||||
|
++yylloc.last_line;
|
||||||
|
yylloc.last_column = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
++yylloc.last_column;
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
Basically, the lexical analyzer does the same processing as before:
|
||||||
|
it skips blanks and tabs, and reads numbers or single-character tokens.
|
||||||
|
In addition to this, it updates the `yylloc' global variable (of type
|
||||||
|
`YYLTYPE'), where the location of tokens is stored.
|
||||||
|
|
||||||
|
Now, each time this function returns a token, the parser has it's
|
||||||
|
number as well as it's semantic value, and it's position in the text.
|
||||||
|
The last needed change is to initialize `yylloc', for example in the
|
||||||
|
controlling function:
|
||||||
|
|
||||||
|
int
|
||||||
|
main (void)
|
||||||
|
{
|
||||||
|
yylloc.first_line = yylloc.last_line = 1;
|
||||||
|
yylloc.first_column = yylloc.last_column = 0;
|
||||||
|
return yyparse ();
|
||||||
|
}
|
||||||
|
|
||||||
|
Remember that computing locations is not a matter of syntax. Every
|
||||||
|
character must be associated to a location update, whether it is in
|
||||||
|
valid input, in comments, in literal strings, and so on...
|
||||||
|
|
||||||
|
|
||||||
|
File: bison.info, Node: Multi-function Calc, Next: Exercises, Prev: Location Tracking Calc, Up: Examples
|
||||||
|
|
||||||
Multi-Function Calculator: `mfcalc'
|
Multi-Function Calculator: `mfcalc'
|
||||||
===================================
|
===================================
|
||||||
@@ -1240,147 +1425,3 @@ associated with X and Y.
|
|||||||
This says when, why and how to use the exceptional
|
This says when, why and how to use the exceptional
|
||||||
action in the middle of a rule.
|
action in the middle of a rule.
|
||||||
|
|
||||||
|
|
||||||
File: bison.info, Node: Value Type, Next: Multiple Types, Up: Semantics
|
|
||||||
|
|
||||||
Data Types of Semantic Values
|
|
||||||
-----------------------------
|
|
||||||
|
|
||||||
In a simple program it may be sufficient to use the same data type
|
|
||||||
for the semantic values of all language constructs. This was true in
|
|
||||||
the RPN and infix calculator examples (*note Reverse Polish Notation
|
|
||||||
Calculator: RPN Calc.).
|
|
||||||
|
|
||||||
Bison's default is to use type `int' for all semantic values. To
|
|
||||||
specify some other type, define `YYSTYPE' as a macro, like this:
|
|
||||||
|
|
||||||
#define YYSTYPE double
|
|
||||||
|
|
||||||
This macro definition must go in the C declarations section of the
|
|
||||||
grammar file (*note Outline of a Bison Grammar: Grammar Outline.).
|
|
||||||
|
|
||||||
|
|
||||||
File: bison.info, Node: Multiple Types, Next: Actions, Prev: Value Type, Up: Semantics
|
|
||||||
|
|
||||||
More Than One Value Type
|
|
||||||
------------------------
|
|
||||||
|
|
||||||
In most programs, you will need different data types for different
|
|
||||||
kinds of tokens and groupings. For example, a numeric constant may
|
|
||||||
need type `int' or `long', while a string constant needs type `char *',
|
|
||||||
and an identifier might need a pointer to an entry in the symbol table.
|
|
||||||
|
|
||||||
To use more than one data type for semantic values in one parser,
|
|
||||||
Bison requires you to do two things:
|
|
||||||
|
|
||||||
* Specify the entire collection of possible data types, with the
|
|
||||||
`%union' Bison declaration (*note The Collection of Value Types:
|
|
||||||
Union Decl.).
|
|
||||||
|
|
||||||
* Choose one of those types for each symbol (terminal or
|
|
||||||
nonterminal) for which semantic values are used. This is done for
|
|
||||||
tokens with the `%token' Bison declaration (*note Token Type
|
|
||||||
Names: Token Decl.) and for groupings with the `%type' Bison
|
|
||||||
declaration (*note Nonterminal Symbols: Type Decl.).
|
|
||||||
|
|
||||||
|
|
||||||
File: bison.info, Node: Actions, Next: Action Types, Prev: Multiple Types, Up: Semantics
|
|
||||||
|
|
||||||
Actions
|
|
||||||
-------
|
|
||||||
|
|
||||||
An action accompanies a syntactic rule and contains C code to be
|
|
||||||
executed each time an instance of that rule is recognized. The task of
|
|
||||||
most actions is to compute a semantic value for the grouping built by
|
|
||||||
the rule from the semantic values associated with tokens or smaller
|
|
||||||
groupings.
|
|
||||||
|
|
||||||
An action consists of C statements surrounded by braces, much like a
|
|
||||||
compound statement in C. It can be placed at any position in the rule;
|
|
||||||
it is executed at that position. Most rules have just one action at
|
|
||||||
the end of the rule, following all the components. Actions in the
|
|
||||||
middle of a rule are tricky and used only for special purposes (*note
|
|
||||||
Actions in Mid-Rule: Mid-Rule Actions.).
|
|
||||||
|
|
||||||
The C code in an action can refer to the semantic values of the
|
|
||||||
components matched by the rule with the construct `$N', which stands for
|
|
||||||
the value of the Nth component. The semantic value for the grouping
|
|
||||||
being constructed is `$$'. (Bison translates both of these constructs
|
|
||||||
into array element references when it copies the actions into the parser
|
|
||||||
file.)
|
|
||||||
|
|
||||||
Here is a typical example:
|
|
||||||
|
|
||||||
exp: ...
|
|
||||||
| exp '+' exp
|
|
||||||
{ $$ = $1 + $3; }
|
|
||||||
|
|
||||||
This rule constructs an `exp' from two smaller `exp' groupings
|
|
||||||
connected by a plus-sign token. In the action, `$1' and `$3' refer to
|
|
||||||
the semantic values of the two component `exp' groupings, which are the
|
|
||||||
first and third symbols on the right hand side of the rule. The sum is
|
|
||||||
stored into `$$' so that it becomes the semantic value of the
|
|
||||||
addition-expression just recognized by the rule. If there were a
|
|
||||||
useful semantic value associated with the `+' token, it could be
|
|
||||||
referred to as `$2'.
|
|
||||||
|
|
||||||
If you don't specify an action for a rule, Bison supplies a default:
|
|
||||||
`$$ = $1'. Thus, the value of the first symbol in the rule becomes the
|
|
||||||
value of the whole rule. Of course, the default rule is valid only if
|
|
||||||
the two data types match. There is no meaningful default action for an
|
|
||||||
empty rule; every empty rule must have an explicit action unless the
|
|
||||||
rule's value does not matter.
|
|
||||||
|
|
||||||
`$N' with N zero or negative is allowed for reference to tokens and
|
|
||||||
groupings on the stack _before_ those that match the current rule.
|
|
||||||
This is a very risky practice, and to use it reliably you must be
|
|
||||||
certain of the context in which the rule is applied. Here is a case in
|
|
||||||
which you can use this reliably:
|
|
||||||
|
|
||||||
foo: expr bar '+' expr { ... }
|
|
||||||
| expr bar '-' expr { ... }
|
|
||||||
;
|
|
||||||
|
|
||||||
bar: /* empty */
|
|
||||||
{ previous_expr = $0; }
|
|
||||||
;
|
|
||||||
|
|
||||||
As long as `bar' is used only in the fashion shown here, `$0' always
|
|
||||||
refers to the `expr' which precedes `bar' in the definition of `foo'.
|
|
||||||
|
|
||||||
|
|
||||||
File: bison.info, Node: Action Types, Next: Mid-Rule Actions, Prev: Actions, Up: Semantics
|
|
||||||
|
|
||||||
Data Types of Values in Actions
|
|
||||||
-------------------------------
|
|
||||||
|
|
||||||
If you have chosen a single data type for semantic values, the `$$'
|
|
||||||
and `$N' constructs always have that data type.
|
|
||||||
|
|
||||||
If you have used `%union' to specify a variety of data types, then
|
|
||||||
you must declare a choice among these types for each terminal or
|
|
||||||
nonterminal symbol that can have a semantic value. Then each time you
|
|
||||||
use `$$' or `$N', its data type is determined by which symbol it refers
|
|
||||||
to in the rule. In this example,
|
|
||||||
|
|
||||||
exp: ...
|
|
||||||
| exp '+' exp
|
|
||||||
{ $$ = $1 + $3; }
|
|
||||||
|
|
||||||
`$1' and `$3' refer to instances of `exp', so they all have the data
|
|
||||||
type declared for the nonterminal symbol `exp'. If `$2' were used, it
|
|
||||||
would have the data type declared for the terminal symbol `'+'',
|
|
||||||
whatever that might be.
|
|
||||||
|
|
||||||
Alternatively, you can specify the data type when you refer to the
|
|
||||||
value, by inserting `<TYPE>' after the `$' at the beginning of the
|
|
||||||
reference. For example, if you have defined types as shown here:
|
|
||||||
|
|
||||||
%union {
|
|
||||||
int itype;
|
|
||||||
double dtype;
|
|
||||||
}
|
|
||||||
|
|
||||||
then you can write `$<itype>1' to refer to the first subunit of the
|
|
||||||
rule as an integer, or `$<dtype>1' to refer to it as a double.
|
|
||||||
|
|
||||||
|
|||||||
251
doc/bison.info-3
251
doc/bison.info-3
@@ -28,6 +28,150 @@ License", "Conditions for Using Bison" and this permission notice may be
|
|||||||
included in translations approved by the Free Software Foundation
|
included in translations approved by the Free Software Foundation
|
||||||
instead of in the original English.
|
instead of in the original English.
|
||||||
|
|
||||||
|
|
||||||
|
File: bison.info, Node: Value Type, Next: Multiple Types, Up: Semantics
|
||||||
|
|
||||||
|
Data Types of Semantic Values
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
In a simple program it may be sufficient to use the same data type
|
||||||
|
for the semantic values of all language constructs. This was true in
|
||||||
|
the RPN and infix calculator examples (*note Reverse Polish Notation
|
||||||
|
Calculator: RPN Calc.).
|
||||||
|
|
||||||
|
Bison's default is to use type `int' for all semantic values. To
|
||||||
|
specify some other type, define `YYSTYPE' as a macro, like this:
|
||||||
|
|
||||||
|
#define YYSTYPE double
|
||||||
|
|
||||||
|
This macro definition must go in the C declarations section of the
|
||||||
|
grammar file (*note Outline of a Bison Grammar: Grammar Outline.).
|
||||||
|
|
||||||
|
|
||||||
|
File: bison.info, Node: Multiple Types, Next: Actions, Prev: Value Type, Up: Semantics
|
||||||
|
|
||||||
|
More Than One Value Type
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
In most programs, you will need different data types for different
|
||||||
|
kinds of tokens and groupings. For example, a numeric constant may
|
||||||
|
need type `int' or `long', while a string constant needs type `char *',
|
||||||
|
and an identifier might need a pointer to an entry in the symbol table.
|
||||||
|
|
||||||
|
To use more than one data type for semantic values in one parser,
|
||||||
|
Bison requires you to do two things:
|
||||||
|
|
||||||
|
* Specify the entire collection of possible data types, with the
|
||||||
|
`%union' Bison declaration (*note The Collection of Value Types:
|
||||||
|
Union Decl.).
|
||||||
|
|
||||||
|
* Choose one of those types for each symbol (terminal or
|
||||||
|
nonterminal) for which semantic values are used. This is done for
|
||||||
|
tokens with the `%token' Bison declaration (*note Token Type
|
||||||
|
Names: Token Decl.) and for groupings with the `%type' Bison
|
||||||
|
declaration (*note Nonterminal Symbols: Type Decl.).
|
||||||
|
|
||||||
|
|
||||||
|
File: bison.info, Node: Actions, Next: Action Types, Prev: Multiple Types, Up: Semantics
|
||||||
|
|
||||||
|
Actions
|
||||||
|
-------
|
||||||
|
|
||||||
|
An action accompanies a syntactic rule and contains C code to be
|
||||||
|
executed each time an instance of that rule is recognized. The task of
|
||||||
|
most actions is to compute a semantic value for the grouping built by
|
||||||
|
the rule from the semantic values associated with tokens or smaller
|
||||||
|
groupings.
|
||||||
|
|
||||||
|
An action consists of C statements surrounded by braces, much like a
|
||||||
|
compound statement in C. It can be placed at any position in the rule;
|
||||||
|
it is executed at that position. Most rules have just one action at
|
||||||
|
the end of the rule, following all the components. Actions in the
|
||||||
|
middle of a rule are tricky and used only for special purposes (*note
|
||||||
|
Actions in Mid-Rule: Mid-Rule Actions.).
|
||||||
|
|
||||||
|
The C code in an action can refer to the semantic values of the
|
||||||
|
components matched by the rule with the construct `$N', which stands for
|
||||||
|
the value of the Nth component. The semantic value for the grouping
|
||||||
|
being constructed is `$$'. (Bison translates both of these constructs
|
||||||
|
into array element references when it copies the actions into the parser
|
||||||
|
file.)
|
||||||
|
|
||||||
|
Here is a typical example:
|
||||||
|
|
||||||
|
exp: ...
|
||||||
|
| exp '+' exp
|
||||||
|
{ $$ = $1 + $3; }
|
||||||
|
|
||||||
|
This rule constructs an `exp' from two smaller `exp' groupings
|
||||||
|
connected by a plus-sign token. In the action, `$1' and `$3' refer to
|
||||||
|
the semantic values of the two component `exp' groupings, which are the
|
||||||
|
first and third symbols on the right hand side of the rule. The sum is
|
||||||
|
stored into `$$' so that it becomes the semantic value of the
|
||||||
|
addition-expression just recognized by the rule. If there were a
|
||||||
|
useful semantic value associated with the `+' token, it could be
|
||||||
|
referred to as `$2'.
|
||||||
|
|
||||||
|
If you don't specify an action for a rule, Bison supplies a default:
|
||||||
|
`$$ = $1'. Thus, the value of the first symbol in the rule becomes the
|
||||||
|
value of the whole rule. Of course, the default rule is valid only if
|
||||||
|
the two data types match. There is no meaningful default action for an
|
||||||
|
empty rule; every empty rule must have an explicit action unless the
|
||||||
|
rule's value does not matter.
|
||||||
|
|
||||||
|
`$N' with N zero or negative is allowed for reference to tokens and
|
||||||
|
groupings on the stack _before_ those that match the current rule.
|
||||||
|
This is a very risky practice, and to use it reliably you must be
|
||||||
|
certain of the context in which the rule is applied. Here is a case in
|
||||||
|
which you can use this reliably:
|
||||||
|
|
||||||
|
foo: expr bar '+' expr { ... }
|
||||||
|
| expr bar '-' expr { ... }
|
||||||
|
;
|
||||||
|
|
||||||
|
bar: /* empty */
|
||||||
|
{ previous_expr = $0; }
|
||||||
|
;
|
||||||
|
|
||||||
|
As long as `bar' is used only in the fashion shown here, `$0' always
|
||||||
|
refers to the `expr' which precedes `bar' in the definition of `foo'.
|
||||||
|
|
||||||
|
|
||||||
|
File: bison.info, Node: Action Types, Next: Mid-Rule Actions, Prev: Actions, Up: Semantics
|
||||||
|
|
||||||
|
Data Types of Values in Actions
|
||||||
|
-------------------------------
|
||||||
|
|
||||||
|
If you have chosen a single data type for semantic values, the `$$'
|
||||||
|
and `$N' constructs always have that data type.
|
||||||
|
|
||||||
|
If you have used `%union' to specify a variety of data types, then
|
||||||
|
you must declare a choice among these types for each terminal or
|
||||||
|
nonterminal symbol that can have a semantic value. Then each time you
|
||||||
|
use `$$' or `$N', its data type is determined by which symbol it refers
|
||||||
|
to in the rule. In this example,
|
||||||
|
|
||||||
|
exp: ...
|
||||||
|
| exp '+' exp
|
||||||
|
{ $$ = $1 + $3; }
|
||||||
|
|
||||||
|
`$1' and `$3' refer to instances of `exp', so they all have the data
|
||||||
|
type declared for the nonterminal symbol `exp'. If `$2' were used, it
|
||||||
|
would have the data type declared for the terminal symbol `'+'',
|
||||||
|
whatever that might be.
|
||||||
|
|
||||||
|
Alternatively, you can specify the data type when you refer to the
|
||||||
|
value, by inserting `<TYPE>' after the `$' at the beginning of the
|
||||||
|
reference. For example, if you have defined types as shown here:
|
||||||
|
|
||||||
|
%union {
|
||||||
|
int itype;
|
||||||
|
double dtype;
|
||||||
|
}
|
||||||
|
|
||||||
|
then you can write `$<itype>1' to refer to the first subunit of the
|
||||||
|
rule as an integer, or `$<dtype>1' to refer to it as a double.
|
||||||
|
|
||||||
|
|
||||||
File: bison.info, Node: Mid-Rule Actions, Prev: Action Types, Up: Semantics
|
File: bison.info, Node: Mid-Rule Actions, Prev: Action Types, Up: Semantics
|
||||||
|
|
||||||
@@ -1171,110 +1315,3 @@ useful in actions.
|
|||||||
textual position of the Nth component of the current rule. *Note
|
textual position of the Nth component of the current rule. *Note
|
||||||
Tracking Locations: Locations.
|
Tracking Locations: Locations.
|
||||||
|
|
||||||
|
|
||||||
File: bison.info, Node: Algorithm, Next: Error Recovery, Prev: Interface, Up: Top
|
|
||||||
|
|
||||||
The Bison Parser Algorithm
|
|
||||||
**************************
|
|
||||||
|
|
||||||
As Bison reads tokens, it pushes them onto a stack along with their
|
|
||||||
semantic values. The stack is called the "parser stack". Pushing a
|
|
||||||
token is traditionally called "shifting".
|
|
||||||
|
|
||||||
For example, suppose the infix calculator has read `1 + 5 *', with a
|
|
||||||
`3' to come. The stack will have four elements, one for each token
|
|
||||||
that was shifted.
|
|
||||||
|
|
||||||
But the stack does not always have an element for each token read.
|
|
||||||
When the last N tokens and groupings shifted match the components of a
|
|
||||||
grammar rule, they can be combined according to that rule. This is
|
|
||||||
called "reduction". Those tokens and groupings are replaced on the
|
|
||||||
stack by a single grouping whose symbol is the result (left hand side)
|
|
||||||
of that rule. Running the rule's action is part of the process of
|
|
||||||
reduction, because this is what computes the semantic value of the
|
|
||||||
resulting grouping.
|
|
||||||
|
|
||||||
For example, if the infix calculator's parser stack contains this:
|
|
||||||
|
|
||||||
1 + 5 * 3
|
|
||||||
|
|
||||||
and the next input token is a newline character, then the last three
|
|
||||||
elements can be reduced to 15 via the rule:
|
|
||||||
|
|
||||||
expr: expr '*' expr;
|
|
||||||
|
|
||||||
Then the stack contains just these three elements:
|
|
||||||
|
|
||||||
1 + 15
|
|
||||||
|
|
||||||
At this point, another reduction can be made, resulting in the single
|
|
||||||
value 16. Then the newline token can be shifted.
|
|
||||||
|
|
||||||
The parser tries, by shifts and reductions, to reduce the entire
|
|
||||||
input down to a single grouping whose symbol is the grammar's
|
|
||||||
start-symbol (*note Languages and Context-Free Grammars: Language and
|
|
||||||
Grammar.).
|
|
||||||
|
|
||||||
This kind of parser is known in the literature as a bottom-up parser.
|
|
||||||
|
|
||||||
* Menu:
|
|
||||||
|
|
||||||
* Look-Ahead:: Parser looks one token ahead when deciding what to do.
|
|
||||||
* Shift/Reduce:: Conflicts: when either shifting or reduction is valid.
|
|
||||||
* Precedence:: Operator precedence works by resolving conflicts.
|
|
||||||
* Contextual Precedence:: When an operator's precedence depends on context.
|
|
||||||
* Parser States:: The parser is a finite-state-machine with stack.
|
|
||||||
* Reduce/Reduce:: When two rules are applicable in the same situation.
|
|
||||||
* Mystery Conflicts:: Reduce/reduce conflicts that look unjustified.
|
|
||||||
* Stack Overflow:: What happens when stack gets full. How to avoid it.
|
|
||||||
|
|
||||||
|
|
||||||
File: bison.info, Node: Look-Ahead, Next: Shift/Reduce, Up: Algorithm
|
|
||||||
|
|
||||||
Look-Ahead Tokens
|
|
||||||
=================
|
|
||||||
|
|
||||||
The Bison parser does _not_ always reduce immediately as soon as the
|
|
||||||
last N tokens and groupings match a rule. This is because such a
|
|
||||||
simple strategy is inadequate to handle most languages. Instead, when a
|
|
||||||
reduction is possible, the parser sometimes "looks ahead" at the next
|
|
||||||
token in order to decide what to do.
|
|
||||||
|
|
||||||
When a token is read, it is not immediately shifted; first it
|
|
||||||
becomes the "look-ahead token", which is not on the stack. Now the
|
|
||||||
parser can perform one or more reductions of tokens and groupings on
|
|
||||||
the stack, while the look-ahead token remains off to the side. When no
|
|
||||||
more reductions should take place, the look-ahead token is shifted onto
|
|
||||||
the stack. This does not mean that all possible reductions have been
|
|
||||||
done; depending on the token type of the look-ahead token, some rules
|
|
||||||
may choose to delay their application.
|
|
||||||
|
|
||||||
Here is a simple case where look-ahead is needed. These three rules
|
|
||||||
define expressions which contain binary addition operators and postfix
|
|
||||||
unary factorial operators (`!'), and allow parentheses for grouping.
|
|
||||||
|
|
||||||
expr: term '+' expr
|
|
||||||
| term
|
|
||||||
;
|
|
||||||
|
|
||||||
term: '(' expr ')'
|
|
||||||
| term '!'
|
|
||||||
| NUMBER
|
|
||||||
;
|
|
||||||
|
|
||||||
Suppose that the tokens `1 + 2' have been read and shifted; what
|
|
||||||
should be done? If the following token is `)', then the first three
|
|
||||||
tokens must be reduced to form an `expr'. This is the only valid
|
|
||||||
course, because shifting the `)' would produce a sequence of symbols
|
|
||||||
`term ')'', and no rule allows this.
|
|
||||||
|
|
||||||
If the following token is `!', then it must be shifted immediately so
|
|
||||||
that `2 !' can be reduced to make a `term'. If instead the parser were
|
|
||||||
to reduce before shifting, `1 + 2' would become an `expr'. It would
|
|
||||||
then be impossible to shift the `!' because doing so would produce on
|
|
||||||
the stack the sequence of symbols `expr '!''. No rule allows that
|
|
||||||
sequence.
|
|
||||||
|
|
||||||
The current look-ahead token is stored in the variable `yychar'.
|
|
||||||
*Note Special Features for Use in Actions: Action Features.
|
|
||||||
|
|
||||||
|
|||||||
107
doc/bison.info-4
107
doc/bison.info-4
@@ -28,6 +28,113 @@ License", "Conditions for Using Bison" and this permission notice may be
|
|||||||
included in translations approved by the Free Software Foundation
|
included in translations approved by the Free Software Foundation
|
||||||
instead of in the original English.
|
instead of in the original English.
|
||||||
|
|
||||||
|
|
||||||
|
File: bison.info, Node: Algorithm, Next: Error Recovery, Prev: Interface, Up: Top
|
||||||
|
|
||||||
|
The Bison Parser Algorithm
|
||||||
|
**************************
|
||||||
|
|
||||||
|
As Bison reads tokens, it pushes them onto a stack along with their
|
||||||
|
semantic values. The stack is called the "parser stack". Pushing a
|
||||||
|
token is traditionally called "shifting".
|
||||||
|
|
||||||
|
For example, suppose the infix calculator has read `1 + 5 *', with a
|
||||||
|
`3' to come. The stack will have four elements, one for each token
|
||||||
|
that was shifted.
|
||||||
|
|
||||||
|
But the stack does not always have an element for each token read.
|
||||||
|
When the last N tokens and groupings shifted match the components of a
|
||||||
|
grammar rule, they can be combined according to that rule. This is
|
||||||
|
called "reduction". Those tokens and groupings are replaced on the
|
||||||
|
stack by a single grouping whose symbol is the result (left hand side)
|
||||||
|
of that rule. Running the rule's action is part of the process of
|
||||||
|
reduction, because this is what computes the semantic value of the
|
||||||
|
resulting grouping.
|
||||||
|
|
||||||
|
For example, if the infix calculator's parser stack contains this:
|
||||||
|
|
||||||
|
1 + 5 * 3
|
||||||
|
|
||||||
|
and the next input token is a newline character, then the last three
|
||||||
|
elements can be reduced to 15 via the rule:
|
||||||
|
|
||||||
|
expr: expr '*' expr;
|
||||||
|
|
||||||
|
Then the stack contains just these three elements:
|
||||||
|
|
||||||
|
1 + 15
|
||||||
|
|
||||||
|
At this point, another reduction can be made, resulting in the single
|
||||||
|
value 16. Then the newline token can be shifted.
|
||||||
|
|
||||||
|
The parser tries, by shifts and reductions, to reduce the entire
|
||||||
|
input down to a single grouping whose symbol is the grammar's
|
||||||
|
start-symbol (*note Languages and Context-Free Grammars: Language and
|
||||||
|
Grammar.).
|
||||||
|
|
||||||
|
This kind of parser is known in the literature as a bottom-up parser.
|
||||||
|
|
||||||
|
* Menu:
|
||||||
|
|
||||||
|
* Look-Ahead:: Parser looks one token ahead when deciding what to do.
|
||||||
|
* Shift/Reduce:: Conflicts: when either shifting or reduction is valid.
|
||||||
|
* Precedence:: Operator precedence works by resolving conflicts.
|
||||||
|
* Contextual Precedence:: When an operator's precedence depends on context.
|
||||||
|
* Parser States:: The parser is a finite-state-machine with stack.
|
||||||
|
* Reduce/Reduce:: When two rules are applicable in the same situation.
|
||||||
|
* Mystery Conflicts:: Reduce/reduce conflicts that look unjustified.
|
||||||
|
* Stack Overflow:: What happens when stack gets full. How to avoid it.
|
||||||
|
|
||||||
|
|
||||||
|
File: bison.info, Node: Look-Ahead, Next: Shift/Reduce, Up: Algorithm
|
||||||
|
|
||||||
|
Look-Ahead Tokens
|
||||||
|
=================
|
||||||
|
|
||||||
|
The Bison parser does _not_ always reduce immediately as soon as the
|
||||||
|
last N tokens and groupings match a rule. This is because such a
|
||||||
|
simple strategy is inadequate to handle most languages. Instead, when a
|
||||||
|
reduction is possible, the parser sometimes "looks ahead" at the next
|
||||||
|
token in order to decide what to do.
|
||||||
|
|
||||||
|
When a token is read, it is not immediately shifted; first it
|
||||||
|
becomes the "look-ahead token", which is not on the stack. Now the
|
||||||
|
parser can perform one or more reductions of tokens and groupings on
|
||||||
|
the stack, while the look-ahead token remains off to the side. When no
|
||||||
|
more reductions should take place, the look-ahead token is shifted onto
|
||||||
|
the stack. This does not mean that all possible reductions have been
|
||||||
|
done; depending on the token type of the look-ahead token, some rules
|
||||||
|
may choose to delay their application.
|
||||||
|
|
||||||
|
Here is a simple case where look-ahead is needed. These three rules
|
||||||
|
define expressions which contain binary addition operators and postfix
|
||||||
|
unary factorial operators (`!'), and allow parentheses for grouping.
|
||||||
|
|
||||||
|
expr: term '+' expr
|
||||||
|
| term
|
||||||
|
;
|
||||||
|
|
||||||
|
term: '(' expr ')'
|
||||||
|
| term '!'
|
||||||
|
| NUMBER
|
||||||
|
;
|
||||||
|
|
||||||
|
Suppose that the tokens `1 + 2' have been read and shifted; what
|
||||||
|
should be done? If the following token is `)', then the first three
|
||||||
|
tokens must be reduced to form an `expr'. This is the only valid
|
||||||
|
course, because shifting the `)' would produce a sequence of symbols
|
||||||
|
`term ')'', and no rule allows this.
|
||||||
|
|
||||||
|
If the following token is `!', then it must be shifted immediately so
|
||||||
|
that `2 !' can be reduced to make a `term'. If instead the parser were
|
||||||
|
to reduce before shifting, `1 + 2' would become an `expr'. It would
|
||||||
|
then be impossible to shift the `!' because doing so would produce on
|
||||||
|
the stack the sequence of symbols `expr '!''. No rule allows that
|
||||||
|
sequence.
|
||||||
|
|
||||||
|
The current look-ahead token is stored in the variable `yychar'.
|
||||||
|
*Note Special Features for Use in Actions: Action Features.
|
||||||
|
|
||||||
|
|
||||||
File: bison.info, Node: Shift/Reduce, Next: Precedence, Prev: Look-Ahead, Up: Algorithm
|
File: bison.info, Node: Shift/Reduce, Next: Precedence, Prev: Look-Ahead, Up: Algorithm
|
||||||
|
|
||||||
|
|||||||
@@ -855,6 +855,7 @@ Index
|
|||||||
* C-language interface: Interface.
|
* C-language interface: Interface.
|
||||||
* calc: Infix Calc.
|
* calc: Infix Calc.
|
||||||
* calculator, infix notation: Infix Calc.
|
* calculator, infix notation: Infix Calc.
|
||||||
|
* calculator, location tracking: Location Tracking Calc.
|
||||||
* calculator, multi-function: Multi-function Calc.
|
* calculator, multi-function: Multi-function Calc.
|
||||||
* calculator, simple: RPN Calc.
|
* calculator, simple: RPN Calc.
|
||||||
* character token: Symbols.
|
* character token: Symbols.
|
||||||
@@ -925,8 +926,10 @@ Index
|
|||||||
* location <1>: Locations.
|
* location <1>: Locations.
|
||||||
* location: Locations Overview.
|
* location: Locations Overview.
|
||||||
* location actions: Actions and Locations.
|
* location actions: Actions and Locations.
|
||||||
|
* location tracking calculator: Location Tracking Calc.
|
||||||
* look-ahead token: Look-Ahead.
|
* look-ahead token: Look-Ahead.
|
||||||
* LR(1): Mystery Conflicts.
|
* LR(1): Mystery Conflicts.
|
||||||
|
* ltcalc: Location Tracking Calc.
|
||||||
* main function in simple example: Rpcalc Main.
|
* main function in simple example: Rpcalc Main.
|
||||||
* mfcalc: Multi-function Calc.
|
* mfcalc: Multi-function Calc.
|
||||||
* mid-rule actions: Mid-Rule Actions.
|
* mid-rule actions: Mid-Rule Actions.
|
||||||
|
|||||||
@@ -184,6 +184,7 @@ Examples
|
|||||||
* Infix Calc:: Infix (algebraic) notation calculator.
|
* Infix Calc:: Infix (algebraic) notation calculator.
|
||||||
Operator precedence is introduced.
|
Operator precedence is introduced.
|
||||||
* Simple Error Recovery:: Continuing after syntax errors.
|
* Simple Error Recovery:: Continuing after syntax errors.
|
||||||
|
* Location Tracking Calc:: Demonstrating the use of @@@var{n} and @@$.
|
||||||
* Multi-function Calc:: Calculator with memory and trig functions.
|
* Multi-function Calc:: Calculator with memory and trig functions.
|
||||||
It uses multiple data-types for semantic values.
|
It uses multiple data-types for semantic values.
|
||||||
* Exercises:: Ideas for improving the multi-function calculator.
|
* Exercises:: Ideas for improving the multi-function calculator.
|
||||||
@@ -204,6 +205,12 @@ Grammar Rules for @code{rpcalc}
|
|||||||
* Rpcalc Line::
|
* Rpcalc Line::
|
||||||
* Rpcalc Expr::
|
* Rpcalc Expr::
|
||||||
|
|
||||||
|
Location Tracking Calculator: @code{ltcalc}
|
||||||
|
|
||||||
|
* Decls: Ltcalc Decls. Bison and C declarations for ltcalc.
|
||||||
|
* Rules: Ltcalc Rules. Grammar rules for ltcalc, with explanations.
|
||||||
|
* Lexer: Ltcalc Lexer. The lexical analyzer.
|
||||||
|
|
||||||
Multi-Function Calculator: @code{mfcalc}
|
Multi-Function Calculator: @code{mfcalc}
|
||||||
|
|
||||||
* Decl: Mfcalc Decl. Bison declarations for multi-function calculator.
|
* Decl: Mfcalc Decl. Bison declarations for multi-function calculator.
|
||||||
@@ -794,6 +801,7 @@ to try them.
|
|||||||
* Infix Calc:: Infix (algebraic) notation calculator.
|
* Infix Calc:: Infix (algebraic) notation calculator.
|
||||||
Operator precedence is introduced.
|
Operator precedence is introduced.
|
||||||
* Simple Error Recovery:: Continuing after syntax errors.
|
* Simple Error Recovery:: Continuing after syntax errors.
|
||||||
|
* Location Tracking Calc:: Demonstrating the use of @@@var{n} and @@$.
|
||||||
* Multi-function Calc:: Calculator with memory and trig functions.
|
* Multi-function Calc:: Calculator with memory and trig functions.
|
||||||
It uses multiple data-types for semantic values.
|
It uses multiple data-types for semantic values.
|
||||||
* Exercises:: Ideas for improving the multi-function calculator.
|
* Exercises:: Ideas for improving the multi-function calculator.
|
||||||
@@ -1358,6 +1366,204 @@ input lines; it would also have to discard the rest of the current line of
|
|||||||
input. We won't discuss this issue further because it is not specific to
|
input. We won't discuss this issue further because it is not specific to
|
||||||
Bison programs.
|
Bison programs.
|
||||||
|
|
||||||
|
@node Location Tracking Calc
|
||||||
|
@section Location Tracking Calculator: @code{ltcalc}
|
||||||
|
@cindex location tracking calculator
|
||||||
|
@cindex @code{ltcalc}
|
||||||
|
@cindex calculator, location tracking
|
||||||
|
|
||||||
|
This example extends the infix notation calculator with location tracking.
|
||||||
|
This feature will be used to improve error reporting, and provide better
|
||||||
|
error messages.
|
||||||
|
|
||||||
|
For the sake of clarity, we will switch for this example to an integer
|
||||||
|
calculator, since most of the work needed to use locations will be done
|
||||||
|
in the lexical analyser.
|
||||||
|
|
||||||
|
@menu
|
||||||
|
* Decls: Ltcalc Decls. Bison and C declarations for ltcalc.
|
||||||
|
* Rules: Ltcalc Rules. Grammar rules for ltcalc, with explanations.
|
||||||
|
* Lexer: Ltcalc Lexer. The lexical analyzer.
|
||||||
|
@end menu
|
||||||
|
|
||||||
|
@node Ltcalc Decls
|
||||||
|
@subsection Declarations for @code{ltcalc}
|
||||||
|
|
||||||
|
The C and Bison declarations for the location tracking calculator are the same
|
||||||
|
as the declarations for the infix notation calculator.
|
||||||
|
|
||||||
|
@example
|
||||||
|
/* Location tracking calculator. */
|
||||||
|
|
||||||
|
%@{
|
||||||
|
#define YYSTYPE int
|
||||||
|
#include <math.h>
|
||||||
|
%@}
|
||||||
|
|
||||||
|
/* Bison declarations. */
|
||||||
|
%token NUM
|
||||||
|
|
||||||
|
%left '-' '+'
|
||||||
|
%left '*' '/'
|
||||||
|
%left NEG
|
||||||
|
%right '^'
|
||||||
|
|
||||||
|
%% /* Grammar follows */
|
||||||
|
@end example
|
||||||
|
|
||||||
|
In the code above, there are no declarations specific to locations. Defining
|
||||||
|
a data type for storing locations is not needed: we will use the type provided
|
||||||
|
by default (@pxref{Location Type, ,Data Types of Locations}), which is a four
|
||||||
|
member structure with the following integer fields: @code{first_line},
|
||||||
|
@code{first_column}, @code{last_line} and @code{last_column}.
|
||||||
|
|
||||||
|
@node Ltcalc Rules
|
||||||
|
@subsection Grammar Rules for @code{ltcalc}
|
||||||
|
|
||||||
|
Whether you choose to handle locations or not has no effect on the syntax of
|
||||||
|
your language. Therefore, grammar rules for this example will be very close to
|
||||||
|
those of the previous example: we will only modify them to benefit from the new
|
||||||
|
informations we will have.
|
||||||
|
|
||||||
|
Here, we will use locations to report divisions by zero, and locate the wrong
|
||||||
|
expressions or subexpressions.
|
||||||
|
|
||||||
|
@example
|
||||||
|
@group
|
||||||
|
input : /* empty */
|
||||||
|
| input line
|
||||||
|
;
|
||||||
|
@end group
|
||||||
|
|
||||||
|
@group
|
||||||
|
line : '\n'
|
||||||
|
| exp '\n' @{ printf ("%d\n", $1); @}
|
||||||
|
;
|
||||||
|
@end group
|
||||||
|
|
||||||
|
@group
|
||||||
|
exp : NUM @{ $$ = $1; @}
|
||||||
|
| exp '+' exp @{ $$ = $1 + $3; @}
|
||||||
|
| exp '-' exp @{ $$ = $1 - $3; @}
|
||||||
|
| exp '*' exp @{ $$ = $1 * $3; @}
|
||||||
|
@end group
|
||||||
|
| exp '/' exp
|
||||||
|
@group
|
||||||
|
@{
|
||||||
|
if ($3)
|
||||||
|
$$ = $1 / $3;
|
||||||
|
else
|
||||||
|
@{
|
||||||
|
$$ = 1;
|
||||||
|
printf("Division by zero, l%d,c%d-l%d,c%d",
|
||||||
|
@@3.first_line, @@3.first_column,
|
||||||
|
@@3.last_line, @@3.last_column);
|
||||||
|
@}
|
||||||
|
@}
|
||||||
|
@end group
|
||||||
|
@group
|
||||||
|
| '-' exp %preg NEG @{ $$ = -$2; @}
|
||||||
|
| exp '^' exp @{ $$ = pow ($1, $3); @}
|
||||||
|
| '(' exp ')' @{ $$ = $2; @}
|
||||||
|
@end group
|
||||||
|
@end example
|
||||||
|
|
||||||
|
This code shows how to reach locations inside of semantic actions, by
|
||||||
|
using the pseudo-variables @code{@@@var{n}} for rule components, and the
|
||||||
|
pseudo-variable @code{@@$} for groupings.
|
||||||
|
|
||||||
|
In this example, we never assign a value to @code{@@$}, because the
|
||||||
|
output parser can do this automatically. By default, before executing
|
||||||
|
the C code of each action, @code{@@$} is set to range from the beginning
|
||||||
|
of @code{@@1} to the end of @code{@@@var{n}}, for a rule with @var{n}
|
||||||
|
components.
|
||||||
|
|
||||||
|
Of course, this behavior can be redefined (@pxref{Location Default
|
||||||
|
Action, , Default Action for Locations}), and for very specific rules,
|
||||||
|
@code{@@$} can be computed by hand.
|
||||||
|
|
||||||
|
@node Ltcalc Lexer
|
||||||
|
@subsection The @code{ltcalc} Lexical Analyzer.
|
||||||
|
|
||||||
|
Until now, we relied on Bison's defaults to enable location tracking. The next
|
||||||
|
step is to rewrite the lexical analyser, and make it able to feed the parser
|
||||||
|
with locations of tokens, as he already does for semantic values.
|
||||||
|
|
||||||
|
To do so, we must take into account every single character of the input text,
|
||||||
|
to avoid the computed locations of being fuzzy or wrong:
|
||||||
|
|
||||||
|
@example
|
||||||
|
@group
|
||||||
|
int
|
||||||
|
yylex (void)
|
||||||
|
@{
|
||||||
|
int c;
|
||||||
|
|
||||||
|
/* skip white space */
|
||||||
|
while ((c = getchar ()) == ' ' || c == '\t')
|
||||||
|
++yylloc.last_column;
|
||||||
|
|
||||||
|
/* step */
|
||||||
|
yylloc.first_line = yylloc.last_line;
|
||||||
|
yylloc.first_column = yylloc.last_column;
|
||||||
|
@end group
|
||||||
|
|
||||||
|
@group
|
||||||
|
/* process numbers */
|
||||||
|
if (isdigit (c))
|
||||||
|
@{
|
||||||
|
yylval = c - '0';
|
||||||
|
++yylloc.last_column;
|
||||||
|
while (isdigit (c = getchar ()))
|
||||||
|
@{
|
||||||
|
++yylloc.last_column;
|
||||||
|
yylval = yylval * 10 + c - '0';
|
||||||
|
@}
|
||||||
|
ungetc (c, stdin);
|
||||||
|
return NUM;
|
||||||
|
@}
|
||||||
|
@end group
|
||||||
|
|
||||||
|
/* return end-of-file */
|
||||||
|
if (c == EOF)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* return single chars and update location */
|
||||||
|
if (c == '\n')
|
||||||
|
@{
|
||||||
|
++yylloc.last_line;
|
||||||
|
yylloc.last_column = 0;
|
||||||
|
@}
|
||||||
|
else
|
||||||
|
++yylloc.last_column;
|
||||||
|
return c;
|
||||||
|
@}
|
||||||
|
@end example
|
||||||
|
|
||||||
|
Basically, the lexical analyzer does the same processing as before: it skips
|
||||||
|
blanks and tabs, and reads numbers or single-character tokens. In addition
|
||||||
|
to this, it updates the @code{yylloc} global variable (of type @code{YYLTYPE}),
|
||||||
|
where the location of tokens is stored.
|
||||||
|
|
||||||
|
Now, each time this function returns a token, the parser has it's number as
|
||||||
|
well as it's semantic value, and it's position in the text. The last needed
|
||||||
|
change is to initialize @code{yylloc}, for example in the controlling
|
||||||
|
function:
|
||||||
|
|
||||||
|
@example
|
||||||
|
int
|
||||||
|
main (void)
|
||||||
|
@{
|
||||||
|
yylloc.first_line = yylloc.last_line = 1;
|
||||||
|
yylloc.first_column = yylloc.last_column = 0;
|
||||||
|
return yyparse ();
|
||||||
|
@}
|
||||||
|
@end example
|
||||||
|
|
||||||
|
Remember that computing locations is not a matter of syntax. Every character
|
||||||
|
must be associated to a location update, whether it is in valid input, in
|
||||||
|
comments, in literal strings, and so on...
|
||||||
|
|
||||||
@node Multi-function Calc
|
@node Multi-function Calc
|
||||||
@section Multi-Function Calculator: @code{mfcalc}
|
@section Multi-Function Calculator: @code{mfcalc}
|
||||||
@cindex multi-function calculator
|
@cindex multi-function calculator
|
||||||
|
|||||||
Reference in New Issue
Block a user