mirror of
https://git.savannah.gnu.org/git/bison.git
synced 2026-03-18 00:33:03 +00:00
* doc/bison.texinfo (Location Tracking Calc): New node.
This commit is contained in:
333
doc/bison.info-2
333
doc/bison.info-2
@@ -494,7 +494,7 @@ Precedence.
|
||||
9
|
||||
|
||||
|
||||
File: bison.info, Node: Simple Error Recovery, Next: Multi-function Calc, Prev: Infix Calc, Up: Examples
|
||||
File: bison.info, Node: Simple Error Recovery, Next: Location Tracking Calc, Prev: Infix Calc, Up: Examples
|
||||
|
||||
Simple Error Recovery
|
||||
=====================
|
||||
@@ -533,7 +533,192 @@ the current line of input. We won't discuss this issue further because
|
||||
it is not specific to Bison programs.
|
||||
|
||||
|
||||
File: bison.info, Node: Multi-function Calc, Next: Exercises, Prev: Simple Error Recovery, Up: Examples
|
||||
File: bison.info, Node: Location Tracking Calc, Next: Multi-function Calc, Prev: Simple Error Recovery, Up: Examples
|
||||
|
||||
Location Tracking Calculator: `ltcalc'
|
||||
======================================
|
||||
|
||||
This example extends the infix notation calculator with location
|
||||
tracking. This feature will be used to improve error reporting, and
|
||||
provide better error messages.
|
||||
|
||||
For the sake of clarity, we will switch for this example to an
|
||||
integer calculator, since most of the work needed to use locations will
|
||||
be done in the lexical analyser.
|
||||
|
||||
* Menu:
|
||||
|
||||
* Decls: Ltcalc Decls. Bison and C declarations for ltcalc.
|
||||
* Rules: Ltcalc Rules. Grammar rules for ltcalc, with explanations.
|
||||
* Lexer: Ltcalc Lexer. The lexical analyzer.
|
||||
|
||||
|
||||
File: bison.info, Node: Ltcalc Decls, Next: Ltcalc Rules, Up: Location Tracking Calc
|
||||
|
||||
Declarations for `ltcalc'
|
||||
-------------------------
|
||||
|
||||
The C and Bison declarations for the location tracking calculator
|
||||
are the same as the declarations for the infix notation calculator.
|
||||
|
||||
/* Location tracking calculator. */
|
||||
|
||||
%{
|
||||
#define YYSTYPE int
|
||||
#include <math.h>
|
||||
%}
|
||||
|
||||
/* Bison declarations. */
|
||||
%token NUM
|
||||
|
||||
%left '-' '+'
|
||||
%left '*' '/'
|
||||
%left NEG
|
||||
%right '^'
|
||||
|
||||
%% /* Grammar follows */
|
||||
|
||||
In the code above, there are no declarations specific to locations.
|
||||
Defining a data type for storing locations is not needed: we will use
|
||||
the type provided by default (*note Data Types of Locations: Location
|
||||
Type.), which is a four member structure with the following integer
|
||||
fields: `first_line', `first_column', `last_line' and `last_column'.
|
||||
|
||||
|
||||
File: bison.info, Node: Ltcalc Rules, Next: Ltcalc Lexer, Prev: Ltcalc Decls, Up: Location Tracking Calc
|
||||
|
||||
Grammar Rules for `ltcalc'
|
||||
--------------------------
|
||||
|
||||
Whether you choose to handle locations or not has no effect on the
|
||||
syntax of your language. Therefore, grammar rules for this example
|
||||
will be very close to those of the previous example: we will only
|
||||
modify them to benefit from the new informations we will have.
|
||||
|
||||
Here, we will use locations to report divisions by zero, and locate
|
||||
the wrong expressions or subexpressions.
|
||||
|
||||
input : /* empty */
|
||||
| input line
|
||||
;
|
||||
|
||||
line : '\n'
|
||||
| exp '\n' { printf ("%d\n", $1); }
|
||||
;
|
||||
|
||||
exp : NUM { $$ = $1; }
|
||||
| exp '+' exp { $$ = $1 + $3; }
|
||||
| exp '-' exp { $$ = $1 - $3; }
|
||||
| exp '*' exp { $$ = $1 * $3; }
|
||||
| exp '/' exp
|
||||
{
|
||||
if ($3)
|
||||
$$ = $1 / $3;
|
||||
else
|
||||
{
|
||||
$$ = 1;
|
||||
printf("Division by zero, l%d,c%d-l%d,c%d",
|
||||
@3.first_line, @3.first_column,
|
||||
@3.last_line, @3.last_column);
|
||||
}
|
||||
}
|
||||
| '-' exp %preg NEG { $$ = -$2; }
|
||||
| exp '^' exp { $$ = pow ($1, $3); }
|
||||
| '(' exp ')' { $$ = $2; }
|
||||
|
||||
This code shows how to reach locations inside of semantic actions, by
|
||||
using the pseudo-variables `@N' for rule components, and the
|
||||
pseudo-variable `@$' for groupings.
|
||||
|
||||
In this example, we never assign a value to `@$', because the output
|
||||
parser can do this automatically. By default, before executing the C
|
||||
code of each action, `@$' is set to range from the beginning of `@1' to
|
||||
the end of `@N', for a rule with N components.
|
||||
|
||||
Of course, this behavior can be redefined (*note Default Action for
|
||||
Locations: Location Default Action.), and for very specific rules, `@$'
|
||||
can be computed by hand.
|
||||
|
||||
|
||||
File: bison.info, Node: Ltcalc Lexer, Prev: Ltcalc Rules, Up: Location Tracking Calc
|
||||
|
||||
The `ltcalc' Lexical Analyzer.
|
||||
------------------------------
|
||||
|
||||
Until now, we relied on Bison's defaults to enable location
|
||||
tracking. The next step is to rewrite the lexical analyser, and make it
|
||||
able to feed the parser with locations of tokens, as he already does
|
||||
for semantic values.
|
||||
|
||||
To do so, we must take into account every single character of the
|
||||
input text, to avoid the computed locations of being fuzzy or wrong:
|
||||
|
||||
int
|
||||
yylex (void)
|
||||
{
|
||||
int c;
|
||||
|
||||
/* skip white space */
|
||||
while ((c = getchar ()) == ' ' || c == '\t')
|
||||
++yylloc.last_column;
|
||||
|
||||
/* step */
|
||||
yylloc.first_line = yylloc.last_line;
|
||||
yylloc.first_column = yylloc.last_column;
|
||||
|
||||
/* process numbers */
|
||||
if (isdigit (c))
|
||||
{
|
||||
yylval = c - '0';
|
||||
++yylloc.last_column;
|
||||
while (isdigit (c = getchar ()))
|
||||
{
|
||||
++yylloc.last_column;
|
||||
yylval = yylval * 10 + c - '0';
|
||||
}
|
||||
ungetc (c, stdin);
|
||||
return NUM;
|
||||
}
|
||||
|
||||
/* return end-of-file */
|
||||
if (c == EOF)
|
||||
return 0;
|
||||
|
||||
/* return single chars and update location */
|
||||
if (c == '\n')
|
||||
{
|
||||
++yylloc.last_line;
|
||||
yylloc.last_column = 0;
|
||||
}
|
||||
else
|
||||
++yylloc.last_column;
|
||||
return c;
|
||||
}
|
||||
|
||||
Basically, the lexical analyzer does the same processing as before:
|
||||
it skips blanks and tabs, and reads numbers or single-character tokens.
|
||||
In addition to this, it updates the `yylloc' global variable (of type
|
||||
`YYLTYPE'), where the location of tokens is stored.
|
||||
|
||||
Now, each time this function returns a token, the parser has it's
|
||||
number as well as it's semantic value, and it's position in the text.
|
||||
The last needed change is to initialize `yylloc', for example in the
|
||||
controlling function:
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
yylloc.first_line = yylloc.last_line = 1;
|
||||
yylloc.first_column = yylloc.last_column = 0;
|
||||
return yyparse ();
|
||||
}
|
||||
|
||||
Remember that computing locations is not a matter of syntax. Every
|
||||
character must be associated to a location update, whether it is in
|
||||
valid input, in comments, in literal strings, and so on...
|
||||
|
||||
|
||||
File: bison.info, Node: Multi-function Calc, Next: Exercises, Prev: Location Tracking Calc, Up: Examples
|
||||
|
||||
Multi-Function Calculator: `mfcalc'
|
||||
===================================
|
||||
@@ -1240,147 +1425,3 @@ associated with X and Y.
|
||||
This says when, why and how to use the exceptional
|
||||
action in the middle of a rule.
|
||||
|
||||
|
||||
File: bison.info, Node: Value Type, Next: Multiple Types, Up: Semantics
|
||||
|
||||
Data Types of Semantic Values
|
||||
-----------------------------
|
||||
|
||||
In a simple program it may be sufficient to use the same data type
|
||||
for the semantic values of all language constructs. This was true in
|
||||
the RPN and infix calculator examples (*note Reverse Polish Notation
|
||||
Calculator: RPN Calc.).
|
||||
|
||||
Bison's default is to use type `int' for all semantic values. To
|
||||
specify some other type, define `YYSTYPE' as a macro, like this:
|
||||
|
||||
#define YYSTYPE double
|
||||
|
||||
This macro definition must go in the C declarations section of the
|
||||
grammar file (*note Outline of a Bison Grammar: Grammar Outline.).
|
||||
|
||||
|
||||
File: bison.info, Node: Multiple Types, Next: Actions, Prev: Value Type, Up: Semantics
|
||||
|
||||
More Than One Value Type
|
||||
------------------------
|
||||
|
||||
In most programs, you will need different data types for different
|
||||
kinds of tokens and groupings. For example, a numeric constant may
|
||||
need type `int' or `long', while a string constant needs type `char *',
|
||||
and an identifier might need a pointer to an entry in the symbol table.
|
||||
|
||||
To use more than one data type for semantic values in one parser,
|
||||
Bison requires you to do two things:
|
||||
|
||||
* Specify the entire collection of possible data types, with the
|
||||
`%union' Bison declaration (*note The Collection of Value Types:
|
||||
Union Decl.).
|
||||
|
||||
* Choose one of those types for each symbol (terminal or
|
||||
nonterminal) for which semantic values are used. This is done for
|
||||
tokens with the `%token' Bison declaration (*note Token Type
|
||||
Names: Token Decl.) and for groupings with the `%type' Bison
|
||||
declaration (*note Nonterminal Symbols: Type Decl.).
|
||||
|
||||
|
||||
File: bison.info, Node: Actions, Next: Action Types, Prev: Multiple Types, Up: Semantics
|
||||
|
||||
Actions
|
||||
-------
|
||||
|
||||
An action accompanies a syntactic rule and contains C code to be
|
||||
executed each time an instance of that rule is recognized. The task of
|
||||
most actions is to compute a semantic value for the grouping built by
|
||||
the rule from the semantic values associated with tokens or smaller
|
||||
groupings.
|
||||
|
||||
An action consists of C statements surrounded by braces, much like a
|
||||
compound statement in C. It can be placed at any position in the rule;
|
||||
it is executed at that position. Most rules have just one action at
|
||||
the end of the rule, following all the components. Actions in the
|
||||
middle of a rule are tricky and used only for special purposes (*note
|
||||
Actions in Mid-Rule: Mid-Rule Actions.).
|
||||
|
||||
The C code in an action can refer to the semantic values of the
|
||||
components matched by the rule with the construct `$N', which stands for
|
||||
the value of the Nth component. The semantic value for the grouping
|
||||
being constructed is `$$'. (Bison translates both of these constructs
|
||||
into array element references when it copies the actions into the parser
|
||||
file.)
|
||||
|
||||
Here is a typical example:
|
||||
|
||||
exp: ...
|
||||
| exp '+' exp
|
||||
{ $$ = $1 + $3; }
|
||||
|
||||
This rule constructs an `exp' from two smaller `exp' groupings
|
||||
connected by a plus-sign token. In the action, `$1' and `$3' refer to
|
||||
the semantic values of the two component `exp' groupings, which are the
|
||||
first and third symbols on the right hand side of the rule. The sum is
|
||||
stored into `$$' so that it becomes the semantic value of the
|
||||
addition-expression just recognized by the rule. If there were a
|
||||
useful semantic value associated with the `+' token, it could be
|
||||
referred to as `$2'.
|
||||
|
||||
If you don't specify an action for a rule, Bison supplies a default:
|
||||
`$$ = $1'. Thus, the value of the first symbol in the rule becomes the
|
||||
value of the whole rule. Of course, the default rule is valid only if
|
||||
the two data types match. There is no meaningful default action for an
|
||||
empty rule; every empty rule must have an explicit action unless the
|
||||
rule's value does not matter.
|
||||
|
||||
`$N' with N zero or negative is allowed for reference to tokens and
|
||||
groupings on the stack _before_ those that match the current rule.
|
||||
This is a very risky practice, and to use it reliably you must be
|
||||
certain of the context in which the rule is applied. Here is a case in
|
||||
which you can use this reliably:
|
||||
|
||||
foo: expr bar '+' expr { ... }
|
||||
| expr bar '-' expr { ... }
|
||||
;
|
||||
|
||||
bar: /* empty */
|
||||
{ previous_expr = $0; }
|
||||
;
|
||||
|
||||
As long as `bar' is used only in the fashion shown here, `$0' always
|
||||
refers to the `expr' which precedes `bar' in the definition of `foo'.
|
||||
|
||||
|
||||
File: bison.info, Node: Action Types, Next: Mid-Rule Actions, Prev: Actions, Up: Semantics
|
||||
|
||||
Data Types of Values in Actions
|
||||
-------------------------------
|
||||
|
||||
If you have chosen a single data type for semantic values, the `$$'
|
||||
and `$N' constructs always have that data type.
|
||||
|
||||
If you have used `%union' to specify a variety of data types, then
|
||||
you must declare a choice among these types for each terminal or
|
||||
nonterminal symbol that can have a semantic value. Then each time you
|
||||
use `$$' or `$N', its data type is determined by which symbol it refers
|
||||
to in the rule. In this example,
|
||||
|
||||
exp: ...
|
||||
| exp '+' exp
|
||||
{ $$ = $1 + $3; }
|
||||
|
||||
`$1' and `$3' refer to instances of `exp', so they all have the data
|
||||
type declared for the nonterminal symbol `exp'. If `$2' were used, it
|
||||
would have the data type declared for the terminal symbol `'+'',
|
||||
whatever that might be.
|
||||
|
||||
Alternatively, you can specify the data type when you refer to the
|
||||
value, by inserting `<TYPE>' after the `$' at the beginning of the
|
||||
reference. For example, if you have defined types as shown here:
|
||||
|
||||
%union {
|
||||
int itype;
|
||||
double dtype;
|
||||
}
|
||||
|
||||
then you can write `$<itype>1' to refer to the first subunit of the
|
||||
rule as an integer, or `$<dtype>1' to refer to it as a double.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user