mirror of
https://git.savannah.gnu.org/git/bison.git
synced 2026-03-10 04:43:03 +00:00
* doc/bison.texinfo (Location Tracking Calc): New node.
This commit is contained in:
@@ -1,3 +1,7 @@
|
||||
2001-08-29 Robert Anisko <anisko_r@epita.fr>
|
||||
|
||||
* doc/bison.texinfo (Location Tracking Calc): New node.
|
||||
|
||||
2001-08-29 Paul Eggert <eggert@twinsun.com>
|
||||
|
||||
* src/output.c (output): Do not define const, as this now
|
||||
|
||||
208
doc/bison.info
208
doc/bison.info
@@ -31,111 +31,115 @@ instead of in the original English.
|
||||
|
||||
Indirect:
|
||||
bison.info-1: 1313
|
||||
bison.info-2: 50345
|
||||
bison.info-3: 99970
|
||||
bison.info-4: 148168
|
||||
bison.info-5: 191130
|
||||
bison.info-2: 50688
|
||||
bison.info-3: 100578
|
||||
bison.info-4: 150128
|
||||
bison.info-5: 197515
|
||||
|
||||
Tag Table:
|
||||
(Indirect)
|
||||
Node: Top1313
|
||||
Node: Introduction8686
|
||||
Node: Conditions9961
|
||||
Node: Copying11425
|
||||
Node: Concepts30628
|
||||
Node: Language and Grammar31707
|
||||
Node: Grammar in Bison36723
|
||||
Node: Semantic Values38647
|
||||
Node: Semantic Actions40748
|
||||
Node: Locations Overview41937
|
||||
Node: Bison Parser43384
|
||||
Node: Stages45696
|
||||
Node: Grammar Layout46979
|
||||
Node: Examples48236
|
||||
Node: RPN Calc49371
|
||||
Node: Rpcalc Decls50345
|
||||
Node: Rpcalc Rules51932
|
||||
Node: Rpcalc Input53732
|
||||
Node: Rpcalc Line55193
|
||||
Node: Rpcalc Expr56308
|
||||
Node: Rpcalc Lexer58253
|
||||
Node: Rpcalc Main60825
|
||||
Node: Rpcalc Error61223
|
||||
Node: Rpcalc Gen62231
|
||||
Node: Rpcalc Compile63380
|
||||
Node: Infix Calc64255
|
||||
Node: Simple Error Recovery66962
|
||||
Node: Multi-function Calc68848
|
||||
Node: Mfcalc Decl70414
|
||||
Node: Mfcalc Rules72437
|
||||
Node: Mfcalc Symtab73817
|
||||
Node: Exercises80190
|
||||
Node: Grammar File80696
|
||||
Node: Grammar Outline81544
|
||||
Node: C Declarations82278
|
||||
Node: Bison Declarations82858
|
||||
Node: Grammar Rules83270
|
||||
Node: C Code83730
|
||||
Node: Symbols84660
|
||||
Node: Rules89741
|
||||
Node: Recursion91380
|
||||
Node: Semantics93099
|
||||
Node: Value Type94193
|
||||
Node: Multiple Types94865
|
||||
Node: Actions95882
|
||||
Node: Action Types98667
|
||||
Node: Mid-Rule Actions99970
|
||||
Node: Locations105540
|
||||
Node: Location Type106188
|
||||
Node: Actions and Locations106746
|
||||
Node: Location Default Action108902
|
||||
Node: Declarations110365
|
||||
Node: Token Decl111684
|
||||
Node: Precedence Decl113697
|
||||
Node: Union Decl115248
|
||||
Node: Type Decl116092
|
||||
Node: Expect Decl116998
|
||||
Node: Start Decl118544
|
||||
Node: Pure Decl118922
|
||||
Node: Decl Summary120599
|
||||
Node: Multiple Parsers125982
|
||||
Node: Interface127476
|
||||
Node: Parser Function128348
|
||||
Node: Lexical129183
|
||||
Node: Calling Convention130589
|
||||
Node: Token Values133360
|
||||
Node: Token Positions134509
|
||||
Node: Pure Calling135394
|
||||
Node: Error Reporting138326
|
||||
Node: Action Features140448
|
||||
Node: Algorithm143743
|
||||
Node: Look-Ahead146036
|
||||
Node: Shift/Reduce148168
|
||||
Node: Precedence151080
|
||||
Node: Why Precedence151731
|
||||
Node: Using Precedence153596
|
||||
Node: Precedence Examples154564
|
||||
Node: How Precedence155265
|
||||
Node: Contextual Precedence156414
|
||||
Node: Parser States158205
|
||||
Node: Reduce/Reduce159448
|
||||
Node: Mystery Conflicts163009
|
||||
Node: Stack Overflow166395
|
||||
Node: Error Recovery167768
|
||||
Node: Context Dependency172904
|
||||
Node: Semantic Tokens173752
|
||||
Node: Lexical Tie-ins176769
|
||||
Node: Tie-in Recovery178317
|
||||
Node: Debugging180489
|
||||
Node: Invocation183790
|
||||
Node: Bison Options185042
|
||||
Node: Environment Variables188654
|
||||
Node: Option Cross Key189502
|
||||
Node: VMS Invocation190346
|
||||
Node: Table of Symbols191130
|
||||
Node: Glossary198769
|
||||
Node: Copying This Manual205073
|
||||
Node: GNU Free Documentation License205282
|
||||
Node: Index225147
|
||||
Node: Introduction8966
|
||||
Node: Conditions10241
|
||||
Node: Copying11705
|
||||
Node: Concepts30908
|
||||
Node: Language and Grammar31987
|
||||
Node: Grammar in Bison37003
|
||||
Node: Semantic Values38927
|
||||
Node: Semantic Actions41028
|
||||
Node: Locations Overview42217
|
||||
Node: Bison Parser43664
|
||||
Node: Stages45976
|
||||
Node: Grammar Layout47259
|
||||
Node: Examples48516
|
||||
Node: RPN Calc49714
|
||||
Node: Rpcalc Decls50688
|
||||
Node: Rpcalc Rules52275
|
||||
Node: Rpcalc Input54075
|
||||
Node: Rpcalc Line55536
|
||||
Node: Rpcalc Expr56651
|
||||
Node: Rpcalc Lexer58596
|
||||
Node: Rpcalc Main61168
|
||||
Node: Rpcalc Error61566
|
||||
Node: Rpcalc Gen62574
|
||||
Node: Rpcalc Compile63723
|
||||
Node: Infix Calc64598
|
||||
Node: Simple Error Recovery67305
|
||||
Node: Location Tracking Calc69194
|
||||
Node: Ltcalc Decls69924
|
||||
Node: Ltcalc Rules70833
|
||||
Node: Ltcalc Lexer72894
|
||||
Node: Multi-function Calc75232
|
||||
Node: Mfcalc Decl76799
|
||||
Node: Mfcalc Rules78822
|
||||
Node: Mfcalc Symtab80202
|
||||
Node: Exercises86575
|
||||
Node: Grammar File87081
|
||||
Node: Grammar Outline87929
|
||||
Node: C Declarations88663
|
||||
Node: Bison Declarations89243
|
||||
Node: Grammar Rules89655
|
||||
Node: C Code90115
|
||||
Node: Symbols91045
|
||||
Node: Rules96126
|
||||
Node: Recursion97765
|
||||
Node: Semantics99484
|
||||
Node: Value Type100578
|
||||
Node: Multiple Types101250
|
||||
Node: Actions102267
|
||||
Node: Action Types105052
|
||||
Node: Mid-Rule Actions106355
|
||||
Node: Locations111925
|
||||
Node: Location Type112573
|
||||
Node: Actions and Locations113131
|
||||
Node: Location Default Action115287
|
||||
Node: Declarations116750
|
||||
Node: Token Decl118069
|
||||
Node: Precedence Decl120082
|
||||
Node: Union Decl121633
|
||||
Node: Type Decl122477
|
||||
Node: Expect Decl123383
|
||||
Node: Start Decl124929
|
||||
Node: Pure Decl125307
|
||||
Node: Decl Summary126984
|
||||
Node: Multiple Parsers132367
|
||||
Node: Interface133861
|
||||
Node: Parser Function134733
|
||||
Node: Lexical135568
|
||||
Node: Calling Convention136974
|
||||
Node: Token Values139745
|
||||
Node: Token Positions140894
|
||||
Node: Pure Calling141779
|
||||
Node: Error Reporting144711
|
||||
Node: Action Features146833
|
||||
Node: Algorithm150128
|
||||
Node: Look-Ahead152421
|
||||
Node: Shift/Reduce154553
|
||||
Node: Precedence157465
|
||||
Node: Why Precedence158116
|
||||
Node: Using Precedence159981
|
||||
Node: Precedence Examples160949
|
||||
Node: How Precedence161650
|
||||
Node: Contextual Precedence162799
|
||||
Node: Parser States164590
|
||||
Node: Reduce/Reduce165833
|
||||
Node: Mystery Conflicts169394
|
||||
Node: Stack Overflow172780
|
||||
Node: Error Recovery174153
|
||||
Node: Context Dependency179289
|
||||
Node: Semantic Tokens180137
|
||||
Node: Lexical Tie-ins183154
|
||||
Node: Tie-in Recovery184702
|
||||
Node: Debugging186874
|
||||
Node: Invocation190175
|
||||
Node: Bison Options191427
|
||||
Node: Environment Variables195039
|
||||
Node: Option Cross Key195887
|
||||
Node: VMS Invocation196731
|
||||
Node: Table of Symbols197515
|
||||
Node: Glossary205154
|
||||
Node: Copying This Manual211458
|
||||
Node: GNU Free Documentation License211667
|
||||
Node: Index231532
|
||||
|
||||
End Tag Table
|
||||
|
||||
@@ -83,6 +83,7 @@ Examples
|
||||
* Infix Calc:: Infix (algebraic) notation calculator.
|
||||
Operator precedence is introduced.
|
||||
* Simple Error Recovery:: Continuing after syntax errors.
|
||||
* Location Tracking Calc:: Demonstrating the use of @N and @$.
|
||||
* Multi-function Calc:: Calculator with memory and trig functions.
|
||||
It uses multiple data-types for semantic values.
|
||||
* Exercises:: Ideas for improving the multi-function calculator.
|
||||
@@ -103,6 +104,12 @@ Grammar Rules for `rpcalc'
|
||||
* Rpcalc Line::
|
||||
* Rpcalc Expr::
|
||||
|
||||
Location Tracking Calculator: `ltcalc'
|
||||
|
||||
* Decls: Ltcalc Decls. Bison and C declarations for ltcalc.
|
||||
* Rules: Ltcalc Rules. Grammar rules for ltcalc, with explanations.
|
||||
* Lexer: Ltcalc Lexer. The lexical analyzer.
|
||||
|
||||
Multi-Function Calculator: `mfcalc'
|
||||
|
||||
* Decl: Mfcalc Decl. Bison declarations for multi-function calculator.
|
||||
@@ -1036,6 +1043,7 @@ the Info file and into a source file to try them.
|
||||
* Infix Calc:: Infix (algebraic) notation calculator.
|
||||
Operator precedence is introduced.
|
||||
* Simple Error Recovery:: Continuing after syntax errors.
|
||||
* Location Tracking Calc:: Demonstrating the use of @N and @$.
|
||||
* Multi-function Calc:: Calculator with memory and trig functions.
|
||||
It uses multiple data-types for semantic values.
|
||||
* Exercises:: Ideas for improving the multi-function calculator.
|
||||
|
||||
333
doc/bison.info-2
333
doc/bison.info-2
@@ -494,7 +494,7 @@ Precedence.
|
||||
9
|
||||
|
||||
|
||||
File: bison.info, Node: Simple Error Recovery, Next: Multi-function Calc, Prev: Infix Calc, Up: Examples
|
||||
File: bison.info, Node: Simple Error Recovery, Next: Location Tracking Calc, Prev: Infix Calc, Up: Examples
|
||||
|
||||
Simple Error Recovery
|
||||
=====================
|
||||
@@ -533,7 +533,192 @@ the current line of input. We won't discuss this issue further because
|
||||
it is not specific to Bison programs.
|
||||
|
||||
|
||||
File: bison.info, Node: Multi-function Calc, Next: Exercises, Prev: Simple Error Recovery, Up: Examples
|
||||
File: bison.info, Node: Location Tracking Calc, Next: Multi-function Calc, Prev: Simple Error Recovery, Up: Examples
|
||||
|
||||
Location Tracking Calculator: `ltcalc'
|
||||
======================================
|
||||
|
||||
This example extends the infix notation calculator with location
|
||||
tracking. This feature will be used to improve error reporting, and
|
||||
provide better error messages.
|
||||
|
||||
For the sake of clarity, we will switch for this example to an
|
||||
integer calculator, since most of the work needed to use locations will
|
||||
be done in the lexical analyser.
|
||||
|
||||
* Menu:
|
||||
|
||||
* Decls: Ltcalc Decls. Bison and C declarations for ltcalc.
|
||||
* Rules: Ltcalc Rules. Grammar rules for ltcalc, with explanations.
|
||||
* Lexer: Ltcalc Lexer. The lexical analyzer.
|
||||
|
||||
|
||||
File: bison.info, Node: Ltcalc Decls, Next: Ltcalc Rules, Up: Location Tracking Calc
|
||||
|
||||
Declarations for `ltcalc'
|
||||
-------------------------
|
||||
|
||||
The C and Bison declarations for the location tracking calculator
|
||||
are the same as the declarations for the infix notation calculator.
|
||||
|
||||
/* Location tracking calculator. */
|
||||
|
||||
%{
|
||||
#define YYSTYPE int
|
||||
#include <math.h>
|
||||
%}
|
||||
|
||||
/* Bison declarations. */
|
||||
%token NUM
|
||||
|
||||
%left '-' '+'
|
||||
%left '*' '/'
|
||||
%left NEG
|
||||
%right '^'
|
||||
|
||||
%% /* Grammar follows */
|
||||
|
||||
In the code above, there are no declarations specific to locations.
|
||||
Defining a data type for storing locations is not needed: we will use
|
||||
the type provided by default (*note Data Types of Locations: Location
|
||||
Type.), which is a four member structure with the following integer
|
||||
fields: `first_line', `first_column', `last_line' and `last_column'.
|
||||
|
||||
|
||||
File: bison.info, Node: Ltcalc Rules, Next: Ltcalc Lexer, Prev: Ltcalc Decls, Up: Location Tracking Calc
|
||||
|
||||
Grammar Rules for `ltcalc'
|
||||
--------------------------
|
||||
|
||||
Whether you choose to handle locations or not has no effect on the
|
||||
syntax of your language. Therefore, grammar rules for this example
|
||||
will be very close to those of the previous example: we will only
|
||||
modify them to benefit from the new informations we will have.
|
||||
|
||||
Here, we will use locations to report divisions by zero, and locate
|
||||
the wrong expressions or subexpressions.
|
||||
|
||||
input : /* empty */
|
||||
| input line
|
||||
;
|
||||
|
||||
line : '\n'
|
||||
| exp '\n' { printf ("%d\n", $1); }
|
||||
;
|
||||
|
||||
exp : NUM { $$ = $1; }
|
||||
| exp '+' exp { $$ = $1 + $3; }
|
||||
| exp '-' exp { $$ = $1 - $3; }
|
||||
| exp '*' exp { $$ = $1 * $3; }
|
||||
| exp '/' exp
|
||||
{
|
||||
if ($3)
|
||||
$$ = $1 / $3;
|
||||
else
|
||||
{
|
||||
$$ = 1;
|
||||
printf("Division by zero, l%d,c%d-l%d,c%d",
|
||||
@3.first_line, @3.first_column,
|
||||
@3.last_line, @3.last_column);
|
||||
}
|
||||
}
|
||||
| '-' exp %preg NEG { $$ = -$2; }
|
||||
| exp '^' exp { $$ = pow ($1, $3); }
|
||||
| '(' exp ')' { $$ = $2; }
|
||||
|
||||
This code shows how to reach locations inside of semantic actions, by
|
||||
using the pseudo-variables `@N' for rule components, and the
|
||||
pseudo-variable `@$' for groupings.
|
||||
|
||||
In this example, we never assign a value to `@$', because the output
|
||||
parser can do this automatically. By default, before executing the C
|
||||
code of each action, `@$' is set to range from the beginning of `@1' to
|
||||
the end of `@N', for a rule with N components.
|
||||
|
||||
Of course, this behavior can be redefined (*note Default Action for
|
||||
Locations: Location Default Action.), and for very specific rules, `@$'
|
||||
can be computed by hand.
|
||||
|
||||
|
||||
File: bison.info, Node: Ltcalc Lexer, Prev: Ltcalc Rules, Up: Location Tracking Calc
|
||||
|
||||
The `ltcalc' Lexical Analyzer.
|
||||
------------------------------
|
||||
|
||||
Until now, we relied on Bison's defaults to enable location
|
||||
tracking. The next step is to rewrite the lexical analyser, and make it
|
||||
able to feed the parser with locations of tokens, as he already does
|
||||
for semantic values.
|
||||
|
||||
To do so, we must take into account every single character of the
|
||||
input text, to avoid the computed locations of being fuzzy or wrong:
|
||||
|
||||
int
|
||||
yylex (void)
|
||||
{
|
||||
int c;
|
||||
|
||||
/* skip white space */
|
||||
while ((c = getchar ()) == ' ' || c == '\t')
|
||||
++yylloc.last_column;
|
||||
|
||||
/* step */
|
||||
yylloc.first_line = yylloc.last_line;
|
||||
yylloc.first_column = yylloc.last_column;
|
||||
|
||||
/* process numbers */
|
||||
if (isdigit (c))
|
||||
{
|
||||
yylval = c - '0';
|
||||
++yylloc.last_column;
|
||||
while (isdigit (c = getchar ()))
|
||||
{
|
||||
++yylloc.last_column;
|
||||
yylval = yylval * 10 + c - '0';
|
||||
}
|
||||
ungetc (c, stdin);
|
||||
return NUM;
|
||||
}
|
||||
|
||||
/* return end-of-file */
|
||||
if (c == EOF)
|
||||
return 0;
|
||||
|
||||
/* return single chars and update location */
|
||||
if (c == '\n')
|
||||
{
|
||||
++yylloc.last_line;
|
||||
yylloc.last_column = 0;
|
||||
}
|
||||
else
|
||||
++yylloc.last_column;
|
||||
return c;
|
||||
}
|
||||
|
||||
Basically, the lexical analyzer does the same processing as before:
|
||||
it skips blanks and tabs, and reads numbers or single-character tokens.
|
||||
In addition to this, it updates the `yylloc' global variable (of type
|
||||
`YYLTYPE'), where the location of tokens is stored.
|
||||
|
||||
Now, each time this function returns a token, the parser has it's
|
||||
number as well as it's semantic value, and it's position in the text.
|
||||
The last needed change is to initialize `yylloc', for example in the
|
||||
controlling function:
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
yylloc.first_line = yylloc.last_line = 1;
|
||||
yylloc.first_column = yylloc.last_column = 0;
|
||||
return yyparse ();
|
||||
}
|
||||
|
||||
Remember that computing locations is not a matter of syntax. Every
|
||||
character must be associated to a location update, whether it is in
|
||||
valid input, in comments, in literal strings, and so on...
|
||||
|
||||
|
||||
File: bison.info, Node: Multi-function Calc, Next: Exercises, Prev: Location Tracking Calc, Up: Examples
|
||||
|
||||
Multi-Function Calculator: `mfcalc'
|
||||
===================================
|
||||
@@ -1240,147 +1425,3 @@ associated with X and Y.
|
||||
This says when, why and how to use the exceptional
|
||||
action in the middle of a rule.
|
||||
|
||||
|
||||
File: bison.info, Node: Value Type, Next: Multiple Types, Up: Semantics
|
||||
|
||||
Data Types of Semantic Values
|
||||
-----------------------------
|
||||
|
||||
In a simple program it may be sufficient to use the same data type
|
||||
for the semantic values of all language constructs. This was true in
|
||||
the RPN and infix calculator examples (*note Reverse Polish Notation
|
||||
Calculator: RPN Calc.).
|
||||
|
||||
Bison's default is to use type `int' for all semantic values. To
|
||||
specify some other type, define `YYSTYPE' as a macro, like this:
|
||||
|
||||
#define YYSTYPE double
|
||||
|
||||
This macro definition must go in the C declarations section of the
|
||||
grammar file (*note Outline of a Bison Grammar: Grammar Outline.).
|
||||
|
||||
|
||||
File: bison.info, Node: Multiple Types, Next: Actions, Prev: Value Type, Up: Semantics
|
||||
|
||||
More Than One Value Type
|
||||
------------------------
|
||||
|
||||
In most programs, you will need different data types for different
|
||||
kinds of tokens and groupings. For example, a numeric constant may
|
||||
need type `int' or `long', while a string constant needs type `char *',
|
||||
and an identifier might need a pointer to an entry in the symbol table.
|
||||
|
||||
To use more than one data type for semantic values in one parser,
|
||||
Bison requires you to do two things:
|
||||
|
||||
* Specify the entire collection of possible data types, with the
|
||||
`%union' Bison declaration (*note The Collection of Value Types:
|
||||
Union Decl.).
|
||||
|
||||
* Choose one of those types for each symbol (terminal or
|
||||
nonterminal) for which semantic values are used. This is done for
|
||||
tokens with the `%token' Bison declaration (*note Token Type
|
||||
Names: Token Decl.) and for groupings with the `%type' Bison
|
||||
declaration (*note Nonterminal Symbols: Type Decl.).
|
||||
|
||||
|
||||
File: bison.info, Node: Actions, Next: Action Types, Prev: Multiple Types, Up: Semantics
|
||||
|
||||
Actions
|
||||
-------
|
||||
|
||||
An action accompanies a syntactic rule and contains C code to be
|
||||
executed each time an instance of that rule is recognized. The task of
|
||||
most actions is to compute a semantic value for the grouping built by
|
||||
the rule from the semantic values associated with tokens or smaller
|
||||
groupings.
|
||||
|
||||
An action consists of C statements surrounded by braces, much like a
|
||||
compound statement in C. It can be placed at any position in the rule;
|
||||
it is executed at that position. Most rules have just one action at
|
||||
the end of the rule, following all the components. Actions in the
|
||||
middle of a rule are tricky and used only for special purposes (*note
|
||||
Actions in Mid-Rule: Mid-Rule Actions.).
|
||||
|
||||
The C code in an action can refer to the semantic values of the
|
||||
components matched by the rule with the construct `$N', which stands for
|
||||
the value of the Nth component. The semantic value for the grouping
|
||||
being constructed is `$$'. (Bison translates both of these constructs
|
||||
into array element references when it copies the actions into the parser
|
||||
file.)
|
||||
|
||||
Here is a typical example:
|
||||
|
||||
exp: ...
|
||||
| exp '+' exp
|
||||
{ $$ = $1 + $3; }
|
||||
|
||||
This rule constructs an `exp' from two smaller `exp' groupings
|
||||
connected by a plus-sign token. In the action, `$1' and `$3' refer to
|
||||
the semantic values of the two component `exp' groupings, which are the
|
||||
first and third symbols on the right hand side of the rule. The sum is
|
||||
stored into `$$' so that it becomes the semantic value of the
|
||||
addition-expression just recognized by the rule. If there were a
|
||||
useful semantic value associated with the `+' token, it could be
|
||||
referred to as `$2'.
|
||||
|
||||
If you don't specify an action for a rule, Bison supplies a default:
|
||||
`$$ = $1'. Thus, the value of the first symbol in the rule becomes the
|
||||
value of the whole rule. Of course, the default rule is valid only if
|
||||
the two data types match. There is no meaningful default action for an
|
||||
empty rule; every empty rule must have an explicit action unless the
|
||||
rule's value does not matter.
|
||||
|
||||
`$N' with N zero or negative is allowed for reference to tokens and
|
||||
groupings on the stack _before_ those that match the current rule.
|
||||
This is a very risky practice, and to use it reliably you must be
|
||||
certain of the context in which the rule is applied. Here is a case in
|
||||
which you can use this reliably:
|
||||
|
||||
foo: expr bar '+' expr { ... }
|
||||
| expr bar '-' expr { ... }
|
||||
;
|
||||
|
||||
bar: /* empty */
|
||||
{ previous_expr = $0; }
|
||||
;
|
||||
|
||||
As long as `bar' is used only in the fashion shown here, `$0' always
|
||||
refers to the `expr' which precedes `bar' in the definition of `foo'.
|
||||
|
||||
|
||||
File: bison.info, Node: Action Types, Next: Mid-Rule Actions, Prev: Actions, Up: Semantics
|
||||
|
||||
Data Types of Values in Actions
|
||||
-------------------------------
|
||||
|
||||
If you have chosen a single data type for semantic values, the `$$'
|
||||
and `$N' constructs always have that data type.
|
||||
|
||||
If you have used `%union' to specify a variety of data types, then
|
||||
you must declare a choice among these types for each terminal or
|
||||
nonterminal symbol that can have a semantic value. Then each time you
|
||||
use `$$' or `$N', its data type is determined by which symbol it refers
|
||||
to in the rule. In this example,
|
||||
|
||||
exp: ...
|
||||
| exp '+' exp
|
||||
{ $$ = $1 + $3; }
|
||||
|
||||
`$1' and `$3' refer to instances of `exp', so they all have the data
|
||||
type declared for the nonterminal symbol `exp'. If `$2' were used, it
|
||||
would have the data type declared for the terminal symbol `'+'',
|
||||
whatever that might be.
|
||||
|
||||
Alternatively, you can specify the data type when you refer to the
|
||||
value, by inserting `<TYPE>' after the `$' at the beginning of the
|
||||
reference. For example, if you have defined types as shown here:
|
||||
|
||||
%union {
|
||||
int itype;
|
||||
double dtype;
|
||||
}
|
||||
|
||||
then you can write `$<itype>1' to refer to the first subunit of the
|
||||
rule as an integer, or `$<dtype>1' to refer to it as a double.
|
||||
|
||||
|
||||
251
doc/bison.info-3
251
doc/bison.info-3
@@ -28,6 +28,150 @@ License", "Conditions for Using Bison" and this permission notice may be
|
||||
included in translations approved by the Free Software Foundation
|
||||
instead of in the original English.
|
||||
|
||||
|
||||
File: bison.info, Node: Value Type, Next: Multiple Types, Up: Semantics
|
||||
|
||||
Data Types of Semantic Values
|
||||
-----------------------------
|
||||
|
||||
In a simple program it may be sufficient to use the same data type
|
||||
for the semantic values of all language constructs. This was true in
|
||||
the RPN and infix calculator examples (*note Reverse Polish Notation
|
||||
Calculator: RPN Calc.).
|
||||
|
||||
Bison's default is to use type `int' for all semantic values. To
|
||||
specify some other type, define `YYSTYPE' as a macro, like this:
|
||||
|
||||
#define YYSTYPE double
|
||||
|
||||
This macro definition must go in the C declarations section of the
|
||||
grammar file (*note Outline of a Bison Grammar: Grammar Outline.).
|
||||
|
||||
|
||||
File: bison.info, Node: Multiple Types, Next: Actions, Prev: Value Type, Up: Semantics
|
||||
|
||||
More Than One Value Type
|
||||
------------------------
|
||||
|
||||
In most programs, you will need different data types for different
|
||||
kinds of tokens and groupings. For example, a numeric constant may
|
||||
need type `int' or `long', while a string constant needs type `char *',
|
||||
and an identifier might need a pointer to an entry in the symbol table.
|
||||
|
||||
To use more than one data type for semantic values in one parser,
|
||||
Bison requires you to do two things:
|
||||
|
||||
* Specify the entire collection of possible data types, with the
|
||||
`%union' Bison declaration (*note The Collection of Value Types:
|
||||
Union Decl.).
|
||||
|
||||
* Choose one of those types for each symbol (terminal or
|
||||
nonterminal) for which semantic values are used. This is done for
|
||||
tokens with the `%token' Bison declaration (*note Token Type
|
||||
Names: Token Decl.) and for groupings with the `%type' Bison
|
||||
declaration (*note Nonterminal Symbols: Type Decl.).
|
||||
|
||||
|
||||
File: bison.info, Node: Actions, Next: Action Types, Prev: Multiple Types, Up: Semantics
|
||||
|
||||
Actions
|
||||
-------
|
||||
|
||||
An action accompanies a syntactic rule and contains C code to be
|
||||
executed each time an instance of that rule is recognized. The task of
|
||||
most actions is to compute a semantic value for the grouping built by
|
||||
the rule from the semantic values associated with tokens or smaller
|
||||
groupings.
|
||||
|
||||
An action consists of C statements surrounded by braces, much like a
|
||||
compound statement in C. It can be placed at any position in the rule;
|
||||
it is executed at that position. Most rules have just one action at
|
||||
the end of the rule, following all the components. Actions in the
|
||||
middle of a rule are tricky and used only for special purposes (*note
|
||||
Actions in Mid-Rule: Mid-Rule Actions.).
|
||||
|
||||
The C code in an action can refer to the semantic values of the
|
||||
components matched by the rule with the construct `$N', which stands for
|
||||
the value of the Nth component. The semantic value for the grouping
|
||||
being constructed is `$$'. (Bison translates both of these constructs
|
||||
into array element references when it copies the actions into the parser
|
||||
file.)
|
||||
|
||||
Here is a typical example:
|
||||
|
||||
exp: ...
|
||||
| exp '+' exp
|
||||
{ $$ = $1 + $3; }
|
||||
|
||||
This rule constructs an `exp' from two smaller `exp' groupings
|
||||
connected by a plus-sign token. In the action, `$1' and `$3' refer to
|
||||
the semantic values of the two component `exp' groupings, which are the
|
||||
first and third symbols on the right hand side of the rule. The sum is
|
||||
stored into `$$' so that it becomes the semantic value of the
|
||||
addition-expression just recognized by the rule. If there were a
|
||||
useful semantic value associated with the `+' token, it could be
|
||||
referred to as `$2'.
|
||||
|
||||
If you don't specify an action for a rule, Bison supplies a default:
|
||||
`$$ = $1'. Thus, the value of the first symbol in the rule becomes the
|
||||
value of the whole rule. Of course, the default rule is valid only if
|
||||
the two data types match. There is no meaningful default action for an
|
||||
empty rule; every empty rule must have an explicit action unless the
|
||||
rule's value does not matter.
|
||||
|
||||
`$N' with N zero or negative is allowed for reference to tokens and
|
||||
groupings on the stack _before_ those that match the current rule.
|
||||
This is a very risky practice, and to use it reliably you must be
|
||||
certain of the context in which the rule is applied. Here is a case in
|
||||
which you can use this reliably:
|
||||
|
||||
foo: expr bar '+' expr { ... }
|
||||
| expr bar '-' expr { ... }
|
||||
;
|
||||
|
||||
bar: /* empty */
|
||||
{ previous_expr = $0; }
|
||||
;
|
||||
|
||||
As long as `bar' is used only in the fashion shown here, `$0' always
|
||||
refers to the `expr' which precedes `bar' in the definition of `foo'.
|
||||
|
||||
|
||||
File: bison.info, Node: Action Types, Next: Mid-Rule Actions, Prev: Actions, Up: Semantics
|
||||
|
||||
Data Types of Values in Actions
|
||||
-------------------------------
|
||||
|
||||
If you have chosen a single data type for semantic values, the `$$'
|
||||
and `$N' constructs always have that data type.
|
||||
|
||||
If you have used `%union' to specify a variety of data types, then
|
||||
you must declare a choice among these types for each terminal or
|
||||
nonterminal symbol that can have a semantic value. Then each time you
|
||||
use `$$' or `$N', its data type is determined by which symbol it refers
|
||||
to in the rule. In this example,
|
||||
|
||||
exp: ...
|
||||
| exp '+' exp
|
||||
{ $$ = $1 + $3; }
|
||||
|
||||
`$1' and `$3' refer to instances of `exp', so they all have the data
|
||||
type declared for the nonterminal symbol `exp'. If `$2' were used, it
|
||||
would have the data type declared for the terminal symbol `'+'',
|
||||
whatever that might be.
|
||||
|
||||
Alternatively, you can specify the data type when you refer to the
|
||||
value, by inserting `<TYPE>' after the `$' at the beginning of the
|
||||
reference. For example, if you have defined types as shown here:
|
||||
|
||||
%union {
|
||||
int itype;
|
||||
double dtype;
|
||||
}
|
||||
|
||||
then you can write `$<itype>1' to refer to the first subunit of the
|
||||
rule as an integer, or `$<dtype>1' to refer to it as a double.
|
||||
|
||||
|
||||
File: bison.info, Node: Mid-Rule Actions, Prev: Action Types, Up: Semantics
|
||||
|
||||
@@ -1171,110 +1315,3 @@ useful in actions.
|
||||
textual position of the Nth component of the current rule. *Note
|
||||
Tracking Locations: Locations.
|
||||
|
||||
|
||||
File: bison.info, Node: Algorithm, Next: Error Recovery, Prev: Interface, Up: Top
|
||||
|
||||
The Bison Parser Algorithm
|
||||
**************************
|
||||
|
||||
As Bison reads tokens, it pushes them onto a stack along with their
|
||||
semantic values. The stack is called the "parser stack". Pushing a
|
||||
token is traditionally called "shifting".
|
||||
|
||||
For example, suppose the infix calculator has read `1 + 5 *', with a
|
||||
`3' to come. The stack will have four elements, one for each token
|
||||
that was shifted.
|
||||
|
||||
But the stack does not always have an element for each token read.
|
||||
When the last N tokens and groupings shifted match the components of a
|
||||
grammar rule, they can be combined according to that rule. This is
|
||||
called "reduction". Those tokens and groupings are replaced on the
|
||||
stack by a single grouping whose symbol is the result (left hand side)
|
||||
of that rule. Running the rule's action is part of the process of
|
||||
reduction, because this is what computes the semantic value of the
|
||||
resulting grouping.
|
||||
|
||||
For example, if the infix calculator's parser stack contains this:
|
||||
|
||||
1 + 5 * 3
|
||||
|
||||
and the next input token is a newline character, then the last three
|
||||
elements can be reduced to 15 via the rule:
|
||||
|
||||
expr: expr '*' expr;
|
||||
|
||||
Then the stack contains just these three elements:
|
||||
|
||||
1 + 15
|
||||
|
||||
At this point, another reduction can be made, resulting in the single
|
||||
value 16. Then the newline token can be shifted.
|
||||
|
||||
The parser tries, by shifts and reductions, to reduce the entire
|
||||
input down to a single grouping whose symbol is the grammar's
|
||||
start-symbol (*note Languages and Context-Free Grammars: Language and
|
||||
Grammar.).
|
||||
|
||||
This kind of parser is known in the literature as a bottom-up parser.
|
||||
|
||||
* Menu:
|
||||
|
||||
* Look-Ahead:: Parser looks one token ahead when deciding what to do.
|
||||
* Shift/Reduce:: Conflicts: when either shifting or reduction is valid.
|
||||
* Precedence:: Operator precedence works by resolving conflicts.
|
||||
* Contextual Precedence:: When an operator's precedence depends on context.
|
||||
* Parser States:: The parser is a finite-state-machine with stack.
|
||||
* Reduce/Reduce:: When two rules are applicable in the same situation.
|
||||
* Mystery Conflicts:: Reduce/reduce conflicts that look unjustified.
|
||||
* Stack Overflow:: What happens when stack gets full. How to avoid it.
|
||||
|
||||
|
||||
File: bison.info, Node: Look-Ahead, Next: Shift/Reduce, Up: Algorithm
|
||||
|
||||
Look-Ahead Tokens
|
||||
=================
|
||||
|
||||
The Bison parser does _not_ always reduce immediately as soon as the
|
||||
last N tokens and groupings match a rule. This is because such a
|
||||
simple strategy is inadequate to handle most languages. Instead, when a
|
||||
reduction is possible, the parser sometimes "looks ahead" at the next
|
||||
token in order to decide what to do.
|
||||
|
||||
When a token is read, it is not immediately shifted; first it
|
||||
becomes the "look-ahead token", which is not on the stack. Now the
|
||||
parser can perform one or more reductions of tokens and groupings on
|
||||
the stack, while the look-ahead token remains off to the side. When no
|
||||
more reductions should take place, the look-ahead token is shifted onto
|
||||
the stack. This does not mean that all possible reductions have been
|
||||
done; depending on the token type of the look-ahead token, some rules
|
||||
may choose to delay their application.
|
||||
|
||||
Here is a simple case where look-ahead is needed. These three rules
|
||||
define expressions which contain binary addition operators and postfix
|
||||
unary factorial operators (`!'), and allow parentheses for grouping.
|
||||
|
||||
expr: term '+' expr
|
||||
| term
|
||||
;
|
||||
|
||||
term: '(' expr ')'
|
||||
| term '!'
|
||||
| NUMBER
|
||||
;
|
||||
|
||||
Suppose that the tokens `1 + 2' have been read and shifted; what
|
||||
should be done? If the following token is `)', then the first three
|
||||
tokens must be reduced to form an `expr'. This is the only valid
|
||||
course, because shifting the `)' would produce a sequence of symbols
|
||||
`term ')'', and no rule allows this.
|
||||
|
||||
If the following token is `!', then it must be shifted immediately so
|
||||
that `2 !' can be reduced to make a `term'. If instead the parser were
|
||||
to reduce before shifting, `1 + 2' would become an `expr'. It would
|
||||
then be impossible to shift the `!' because doing so would produce on
|
||||
the stack the sequence of symbols `expr '!''. No rule allows that
|
||||
sequence.
|
||||
|
||||
The current look-ahead token is stored in the variable `yychar'.
|
||||
*Note Special Features for Use in Actions: Action Features.
|
||||
|
||||
|
||||
107
doc/bison.info-4
107
doc/bison.info-4
@@ -28,6 +28,113 @@ License", "Conditions for Using Bison" and this permission notice may be
|
||||
included in translations approved by the Free Software Foundation
|
||||
instead of in the original English.
|
||||
|
||||
|
||||
File: bison.info, Node: Algorithm, Next: Error Recovery, Prev: Interface, Up: Top
|
||||
|
||||
The Bison Parser Algorithm
|
||||
**************************
|
||||
|
||||
As Bison reads tokens, it pushes them onto a stack along with their
|
||||
semantic values. The stack is called the "parser stack". Pushing a
|
||||
token is traditionally called "shifting".
|
||||
|
||||
For example, suppose the infix calculator has read `1 + 5 *', with a
|
||||
`3' to come. The stack will have four elements, one for each token
|
||||
that was shifted.
|
||||
|
||||
But the stack does not always have an element for each token read.
|
||||
When the last N tokens and groupings shifted match the components of a
|
||||
grammar rule, they can be combined according to that rule. This is
|
||||
called "reduction". Those tokens and groupings are replaced on the
|
||||
stack by a single grouping whose symbol is the result (left hand side)
|
||||
of that rule. Running the rule's action is part of the process of
|
||||
reduction, because this is what computes the semantic value of the
|
||||
resulting grouping.
|
||||
|
||||
For example, if the infix calculator's parser stack contains this:
|
||||
|
||||
1 + 5 * 3
|
||||
|
||||
and the next input token is a newline character, then the last three
|
||||
elements can be reduced to 15 via the rule:
|
||||
|
||||
expr: expr '*' expr;
|
||||
|
||||
Then the stack contains just these three elements:
|
||||
|
||||
1 + 15
|
||||
|
||||
At this point, another reduction can be made, resulting in the single
|
||||
value 16. Then the newline token can be shifted.
|
||||
|
||||
The parser tries, by shifts and reductions, to reduce the entire
|
||||
input down to a single grouping whose symbol is the grammar's
|
||||
start-symbol (*note Languages and Context-Free Grammars: Language and
|
||||
Grammar.).
|
||||
|
||||
This kind of parser is known in the literature as a bottom-up parser.
|
||||
|
||||
* Menu:
|
||||
|
||||
* Look-Ahead:: Parser looks one token ahead when deciding what to do.
|
||||
* Shift/Reduce:: Conflicts: when either shifting or reduction is valid.
|
||||
* Precedence:: Operator precedence works by resolving conflicts.
|
||||
* Contextual Precedence:: When an operator's precedence depends on context.
|
||||
* Parser States:: The parser is a finite-state-machine with stack.
|
||||
* Reduce/Reduce:: When two rules are applicable in the same situation.
|
||||
* Mystery Conflicts:: Reduce/reduce conflicts that look unjustified.
|
||||
* Stack Overflow:: What happens when stack gets full. How to avoid it.
|
||||
|
||||
|
||||
File: bison.info, Node: Look-Ahead, Next: Shift/Reduce, Up: Algorithm
|
||||
|
||||
Look-Ahead Tokens
|
||||
=================
|
||||
|
||||
The Bison parser does _not_ always reduce immediately as soon as the
|
||||
last N tokens and groupings match a rule. This is because such a
|
||||
simple strategy is inadequate to handle most languages. Instead, when a
|
||||
reduction is possible, the parser sometimes "looks ahead" at the next
|
||||
token in order to decide what to do.
|
||||
|
||||
When a token is read, it is not immediately shifted; first it
|
||||
becomes the "look-ahead token", which is not on the stack. Now the
|
||||
parser can perform one or more reductions of tokens and groupings on
|
||||
the stack, while the look-ahead token remains off to the side. When no
|
||||
more reductions should take place, the look-ahead token is shifted onto
|
||||
the stack. This does not mean that all possible reductions have been
|
||||
done; depending on the token type of the look-ahead token, some rules
|
||||
may choose to delay their application.
|
||||
|
||||
Here is a simple case where look-ahead is needed. These three rules
|
||||
define expressions which contain binary addition operators and postfix
|
||||
unary factorial operators (`!'), and allow parentheses for grouping.
|
||||
|
||||
expr: term '+' expr
|
||||
| term
|
||||
;
|
||||
|
||||
term: '(' expr ')'
|
||||
| term '!'
|
||||
| NUMBER
|
||||
;
|
||||
|
||||
Suppose that the tokens `1 + 2' have been read and shifted; what
|
||||
should be done? If the following token is `)', then the first three
|
||||
tokens must be reduced to form an `expr'. This is the only valid
|
||||
course, because shifting the `)' would produce a sequence of symbols
|
||||
`term ')'', and no rule allows this.
|
||||
|
||||
If the following token is `!', then it must be shifted immediately so
|
||||
that `2 !' can be reduced to make a `term'. If instead the parser were
|
||||
to reduce before shifting, `1 + 2' would become an `expr'. It would
|
||||
then be impossible to shift the `!' because doing so would produce on
|
||||
the stack the sequence of symbols `expr '!''. No rule allows that
|
||||
sequence.
|
||||
|
||||
The current look-ahead token is stored in the variable `yychar'.
|
||||
*Note Special Features for Use in Actions: Action Features.
|
||||
|
||||
|
||||
File: bison.info, Node: Shift/Reduce, Next: Precedence, Prev: Look-Ahead, Up: Algorithm
|
||||
|
||||
|
||||
@@ -855,6 +855,7 @@ Index
|
||||
* C-language interface: Interface.
|
||||
* calc: Infix Calc.
|
||||
* calculator, infix notation: Infix Calc.
|
||||
* calculator, location tracking: Location Tracking Calc.
|
||||
* calculator, multi-function: Multi-function Calc.
|
||||
* calculator, simple: RPN Calc.
|
||||
* character token: Symbols.
|
||||
@@ -925,8 +926,10 @@ Index
|
||||
* location <1>: Locations.
|
||||
* location: Locations Overview.
|
||||
* location actions: Actions and Locations.
|
||||
* location tracking calculator: Location Tracking Calc.
|
||||
* look-ahead token: Look-Ahead.
|
||||
* LR(1): Mystery Conflicts.
|
||||
* ltcalc: Location Tracking Calc.
|
||||
* main function in simple example: Rpcalc Main.
|
||||
* mfcalc: Multi-function Calc.
|
||||
* mid-rule actions: Mid-Rule Actions.
|
||||
|
||||
@@ -184,6 +184,7 @@ Examples
|
||||
* Infix Calc:: Infix (algebraic) notation calculator.
|
||||
Operator precedence is introduced.
|
||||
* Simple Error Recovery:: Continuing after syntax errors.
|
||||
* Location Tracking Calc:: Demonstrating the use of @@@var{n} and @@$.
|
||||
* Multi-function Calc:: Calculator with memory and trig functions.
|
||||
It uses multiple data-types for semantic values.
|
||||
* Exercises:: Ideas for improving the multi-function calculator.
|
||||
@@ -204,6 +205,12 @@ Grammar Rules for @code{rpcalc}
|
||||
* Rpcalc Line::
|
||||
* Rpcalc Expr::
|
||||
|
||||
Location Tracking Calculator: @code{ltcalc}
|
||||
|
||||
* Decls: Ltcalc Decls. Bison and C declarations for ltcalc.
|
||||
* Rules: Ltcalc Rules. Grammar rules for ltcalc, with explanations.
|
||||
* Lexer: Ltcalc Lexer. The lexical analyzer.
|
||||
|
||||
Multi-Function Calculator: @code{mfcalc}
|
||||
|
||||
* Decl: Mfcalc Decl. Bison declarations for multi-function calculator.
|
||||
@@ -794,6 +801,7 @@ to try them.
|
||||
* Infix Calc:: Infix (algebraic) notation calculator.
|
||||
Operator precedence is introduced.
|
||||
* Simple Error Recovery:: Continuing after syntax errors.
|
||||
* Location Tracking Calc:: Demonstrating the use of @@@var{n} and @@$.
|
||||
* Multi-function Calc:: Calculator with memory and trig functions.
|
||||
It uses multiple data-types for semantic values.
|
||||
* Exercises:: Ideas for improving the multi-function calculator.
|
||||
@@ -1358,6 +1366,204 @@ input lines; it would also have to discard the rest of the current line of
|
||||
input. We won't discuss this issue further because it is not specific to
|
||||
Bison programs.
|
||||
|
||||
@node Location Tracking Calc
|
||||
@section Location Tracking Calculator: @code{ltcalc}
|
||||
@cindex location tracking calculator
|
||||
@cindex @code{ltcalc}
|
||||
@cindex calculator, location tracking
|
||||
|
||||
This example extends the infix notation calculator with location tracking.
|
||||
This feature will be used to improve error reporting, and provide better
|
||||
error messages.
|
||||
|
||||
For the sake of clarity, we will switch for this example to an integer
|
||||
calculator, since most of the work needed to use locations will be done
|
||||
in the lexical analyser.
|
||||
|
||||
@menu
|
||||
* Decls: Ltcalc Decls. Bison and C declarations for ltcalc.
|
||||
* Rules: Ltcalc Rules. Grammar rules for ltcalc, with explanations.
|
||||
* Lexer: Ltcalc Lexer. The lexical analyzer.
|
||||
@end menu
|
||||
|
||||
@node Ltcalc Decls
|
||||
@subsection Declarations for @code{ltcalc}
|
||||
|
||||
The C and Bison declarations for the location tracking calculator are the same
|
||||
as the declarations for the infix notation calculator.
|
||||
|
||||
@example
|
||||
/* Location tracking calculator. */
|
||||
|
||||
%@{
|
||||
#define YYSTYPE int
|
||||
#include <math.h>
|
||||
%@}
|
||||
|
||||
/* Bison declarations. */
|
||||
%token NUM
|
||||
|
||||
%left '-' '+'
|
||||
%left '*' '/'
|
||||
%left NEG
|
||||
%right '^'
|
||||
|
||||
%% /* Grammar follows */
|
||||
@end example
|
||||
|
||||
In the code above, there are no declarations specific to locations. Defining
|
||||
a data type for storing locations is not needed: we will use the type provided
|
||||
by default (@pxref{Location Type, ,Data Types of Locations}), which is a four
|
||||
member structure with the following integer fields: @code{first_line},
|
||||
@code{first_column}, @code{last_line} and @code{last_column}.
|
||||
|
||||
@node Ltcalc Rules
|
||||
@subsection Grammar Rules for @code{ltcalc}
|
||||
|
||||
Whether you choose to handle locations or not has no effect on the syntax of
|
||||
your language. Therefore, grammar rules for this example will be very close to
|
||||
those of the previous example: we will only modify them to benefit from the new
|
||||
informations we will have.
|
||||
|
||||
Here, we will use locations to report divisions by zero, and locate the wrong
|
||||
expressions or subexpressions.
|
||||
|
||||
@example
|
||||
@group
|
||||
input : /* empty */
|
||||
| input line
|
||||
;
|
||||
@end group
|
||||
|
||||
@group
|
||||
line : '\n'
|
||||
| exp '\n' @{ printf ("%d\n", $1); @}
|
||||
;
|
||||
@end group
|
||||
|
||||
@group
|
||||
exp : NUM @{ $$ = $1; @}
|
||||
| exp '+' exp @{ $$ = $1 + $3; @}
|
||||
| exp '-' exp @{ $$ = $1 - $3; @}
|
||||
| exp '*' exp @{ $$ = $1 * $3; @}
|
||||
@end group
|
||||
| exp '/' exp
|
||||
@group
|
||||
@{
|
||||
if ($3)
|
||||
$$ = $1 / $3;
|
||||
else
|
||||
@{
|
||||
$$ = 1;
|
||||
printf("Division by zero, l%d,c%d-l%d,c%d",
|
||||
@@3.first_line, @@3.first_column,
|
||||
@@3.last_line, @@3.last_column);
|
||||
@}
|
||||
@}
|
||||
@end group
|
||||
@group
|
||||
| '-' exp %preg NEG @{ $$ = -$2; @}
|
||||
| exp '^' exp @{ $$ = pow ($1, $3); @}
|
||||
| '(' exp ')' @{ $$ = $2; @}
|
||||
@end group
|
||||
@end example
|
||||
|
||||
This code shows how to reach locations inside of semantic actions, by
|
||||
using the pseudo-variables @code{@@@var{n}} for rule components, and the
|
||||
pseudo-variable @code{@@$} for groupings.
|
||||
|
||||
In this example, we never assign a value to @code{@@$}, because the
|
||||
output parser can do this automatically. By default, before executing
|
||||
the C code of each action, @code{@@$} is set to range from the beginning
|
||||
of @code{@@1} to the end of @code{@@@var{n}}, for a rule with @var{n}
|
||||
components.
|
||||
|
||||
Of course, this behavior can be redefined (@pxref{Location Default
|
||||
Action, , Default Action for Locations}), and for very specific rules,
|
||||
@code{@@$} can be computed by hand.
|
||||
|
||||
@node Ltcalc Lexer
|
||||
@subsection The @code{ltcalc} Lexical Analyzer.
|
||||
|
||||
Until now, we relied on Bison's defaults to enable location tracking. The next
|
||||
step is to rewrite the lexical analyser, and make it able to feed the parser
|
||||
with locations of tokens, as he already does for semantic values.
|
||||
|
||||
To do so, we must take into account every single character of the input text,
|
||||
to avoid the computed locations of being fuzzy or wrong:
|
||||
|
||||
@example
|
||||
@group
|
||||
int
|
||||
yylex (void)
|
||||
@{
|
||||
int c;
|
||||
|
||||
/* skip white space */
|
||||
while ((c = getchar ()) == ' ' || c == '\t')
|
||||
++yylloc.last_column;
|
||||
|
||||
/* step */
|
||||
yylloc.first_line = yylloc.last_line;
|
||||
yylloc.first_column = yylloc.last_column;
|
||||
@end group
|
||||
|
||||
@group
|
||||
/* process numbers */
|
||||
if (isdigit (c))
|
||||
@{
|
||||
yylval = c - '0';
|
||||
++yylloc.last_column;
|
||||
while (isdigit (c = getchar ()))
|
||||
@{
|
||||
++yylloc.last_column;
|
||||
yylval = yylval * 10 + c - '0';
|
||||
@}
|
||||
ungetc (c, stdin);
|
||||
return NUM;
|
||||
@}
|
||||
@end group
|
||||
|
||||
/* return end-of-file */
|
||||
if (c == EOF)
|
||||
return 0;
|
||||
|
||||
/* return single chars and update location */
|
||||
if (c == '\n')
|
||||
@{
|
||||
++yylloc.last_line;
|
||||
yylloc.last_column = 0;
|
||||
@}
|
||||
else
|
||||
++yylloc.last_column;
|
||||
return c;
|
||||
@}
|
||||
@end example
|
||||
|
||||
Basically, the lexical analyzer does the same processing as before: it skips
|
||||
blanks and tabs, and reads numbers or single-character tokens. In addition
|
||||
to this, it updates the @code{yylloc} global variable (of type @code{YYLTYPE}),
|
||||
where the location of tokens is stored.
|
||||
|
||||
Now, each time this function returns a token, the parser has it's number as
|
||||
well as it's semantic value, and it's position in the text. The last needed
|
||||
change is to initialize @code{yylloc}, for example in the controlling
|
||||
function:
|
||||
|
||||
@example
|
||||
int
|
||||
main (void)
|
||||
@{
|
||||
yylloc.first_line = yylloc.last_line = 1;
|
||||
yylloc.first_column = yylloc.last_column = 0;
|
||||
return yyparse ();
|
||||
@}
|
||||
@end example
|
||||
|
||||
Remember that computing locations is not a matter of syntax. Every character
|
||||
must be associated to a location update, whether it is in valid input, in
|
||||
comments, in literal strings, and so on...
|
||||
|
||||
@node Multi-function Calc
|
||||
@section Multi-Function Calculator: @code{mfcalc}
|
||||
@cindex multi-function calculator
|
||||
|
||||
Reference in New Issue
Block a user