mirror of
https://git.savannah.gnu.org/git/bison.git
synced 2026-03-21 02:03:03 +00:00
Minor spelling, grammar, and white space fixes.
(Symbols): Mention that any negative value returned from yylex signifies end-of-input. Warn about negative chars. Mention the portable Standard C character set.
This commit is contained in:
@@ -1039,7 +1039,7 @@ calculator. As in C, comments are placed between @samp{/*@dots{}*/}.
|
|||||||
|
|
||||||
%token NUM
|
%token NUM
|
||||||
|
|
||||||
%% /* Grammar rules and actions follow */
|
%% /* Grammar rules and actions follow. */
|
||||||
@end example
|
@end example
|
||||||
|
|
||||||
The declarations section (@pxref{Prologue, , The prologue}) contains two
|
The declarations section (@pxref{Prologue, , The prologue}) contains two
|
||||||
@@ -1148,7 +1148,7 @@ more times.
|
|||||||
|
|
||||||
The parser function @code{yyparse} continues to process input until a
|
The parser function @code{yyparse} continues to process input until a
|
||||||
grammatical error is seen or the lexical analyzer says there are no more
|
grammatical error is seen or the lexical analyzer says there are no more
|
||||||
input tokens; we will arrange for the latter to happen at end of file.
|
input tokens; we will arrange for the latter to happen at end-of-input.
|
||||||
|
|
||||||
@node Rpcalc Line
|
@node Rpcalc Line
|
||||||
@subsubsection Explanation of @code{line}
|
@subsubsection Explanation of @code{line}
|
||||||
@@ -1266,18 +1266,17 @@ for it. (The C data type of @code{yylval} is @code{YYSTYPE}, which was
|
|||||||
defined at the beginning of the grammar; @pxref{Rpcalc Decls,
|
defined at the beginning of the grammar; @pxref{Rpcalc Decls,
|
||||||
,Declarations for @code{rpcalc}}.)
|
,Declarations for @code{rpcalc}}.)
|
||||||
|
|
||||||
A token type code of zero is returned if the end-of-file is encountered.
|
A token type code of zero is returned if the end-of-input is encountered.
|
||||||
(Bison recognizes any nonpositive value as indicating the end of the
|
(Bison recognizes any nonpositive value as indicating end-of-input.)
|
||||||
input.)
|
|
||||||
|
|
||||||
Here is the code for the lexical analyzer:
|
Here is the code for the lexical analyzer:
|
||||||
|
|
||||||
@example
|
@example
|
||||||
@group
|
@group
|
||||||
/* Lexical analyzer returns a double floating point
|
/* The lexical analyzer returns a double floating point
|
||||||
number on the stack and the token NUM, or the numeric code
|
number on the stack and the token NUM, or the numeric code
|
||||||
of the character read if not a number. Skips all blanks
|
of the character read if not a number. It skips all blanks
|
||||||
and tabs, returns 0 for EOF. */
|
and tabs, and returns 0 for end-of-input. */
|
||||||
|
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
@end group
|
@end group
|
||||||
@@ -1288,12 +1287,12 @@ yylex (void)
|
|||||||
@{
|
@{
|
||||||
int c;
|
int c;
|
||||||
|
|
||||||
/* skip white space */
|
/* Skip white space. */
|
||||||
while ((c = getchar ()) == ' ' || c == '\t')
|
while ((c = getchar ()) == ' ' || c == '\t')
|
||||||
;
|
;
|
||||||
@end group
|
@end group
|
||||||
@group
|
@group
|
||||||
/* process numbers */
|
/* Process numbers. */
|
||||||
if (c == '.' || isdigit (c))
|
if (c == '.' || isdigit (c))
|
||||||
@{
|
@{
|
||||||
ungetc (c, stdin);
|
ungetc (c, stdin);
|
||||||
@@ -1302,10 +1301,10 @@ yylex (void)
|
|||||||
@}
|
@}
|
||||||
@end group
|
@end group
|
||||||
@group
|
@group
|
||||||
/* return end-of-file */
|
/* Return end-of-input. */
|
||||||
if (c == EOF)
|
if (c == EOF)
|
||||||
return 0;
|
return 0;
|
||||||
/* return single chars */
|
/* Return a single char. */
|
||||||
return c;
|
return c;
|
||||||
@}
|
@}
|
||||||
@end group
|
@end group
|
||||||
@@ -1345,7 +1344,7 @@ here is the definition we will use:
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
void
|
void
|
||||||
yyerror (const char *s) /* Called by yyparse on error */
|
yyerror (const char *s) /* called by yyparse on error */
|
||||||
@{
|
@{
|
||||||
printf ("%s\n", s);
|
printf ("%s\n", s);
|
||||||
@}
|
@}
|
||||||
@@ -1573,7 +1572,7 @@ This example extends the infix notation calculator with location
|
|||||||
tracking. This feature will be used to improve the error messages. For
|
tracking. This feature will be used to improve the error messages. For
|
||||||
the sake of clarity, this example is a simple integer calculator, since
|
the sake of clarity, this example is a simple integer calculator, since
|
||||||
most of the work needed to use locations will be done in the lexical
|
most of the work needed to use locations will be done in the lexical
|
||||||
analyser.
|
analyzer.
|
||||||
|
|
||||||
@menu
|
@menu
|
||||||
* Decls: Ltcalc Decls. Bison and C declarations for ltcalc.
|
* Decls: Ltcalc Decls. Bison and C declarations for ltcalc.
|
||||||
@@ -1681,7 +1680,7 @@ hand.
|
|||||||
@subsection The @code{ltcalc} Lexical Analyzer.
|
@subsection The @code{ltcalc} Lexical Analyzer.
|
||||||
|
|
||||||
Until now, we relied on Bison's defaults to enable location
|
Until now, we relied on Bison's defaults to enable location
|
||||||
tracking. The next step is to rewrite the lexical analyser, and make it
|
tracking. The next step is to rewrite the lexical analyzer, and make it
|
||||||
able to feed the parser with the token locations, as it already does for
|
able to feed the parser with the token locations, as it already does for
|
||||||
semantic values.
|
semantic values.
|
||||||
|
|
||||||
@@ -1695,17 +1694,17 @@ yylex (void)
|
|||||||
@{
|
@{
|
||||||
int c;
|
int c;
|
||||||
|
|
||||||
/* skip white space */
|
/* Skip white space. */
|
||||||
while ((c = getchar ()) == ' ' || c == '\t')
|
while ((c = getchar ()) == ' ' || c == '\t')
|
||||||
++yylloc.last_column;
|
++yylloc.last_column;
|
||||||
|
|
||||||
/* step */
|
/* Step. */
|
||||||
yylloc.first_line = yylloc.last_line;
|
yylloc.first_line = yylloc.last_line;
|
||||||
yylloc.first_column = yylloc.last_column;
|
yylloc.first_column = yylloc.last_column;
|
||||||
@end group
|
@end group
|
||||||
|
|
||||||
@group
|
@group
|
||||||
/* process numbers */
|
/* Process numbers. */
|
||||||
if (isdigit (c))
|
if (isdigit (c))
|
||||||
@{
|
@{
|
||||||
yylval = c - '0';
|
yylval = c - '0';
|
||||||
@@ -1720,11 +1719,11 @@ yylex (void)
|
|||||||
@}
|
@}
|
||||||
@end group
|
@end group
|
||||||
|
|
||||||
/* return end-of-file */
|
/* Return end-of-input. */
|
||||||
if (c == EOF)
|
if (c == EOF)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
/* return single chars and update location */
|
/* Return a single char, and update location. */
|
||||||
if (c == '\n')
|
if (c == '\n')
|
||||||
@{
|
@{
|
||||||
++yylloc.last_line;
|
++yylloc.last_line;
|
||||||
@@ -1915,7 +1914,7 @@ provides for either functions or variables to be placed in the table.
|
|||||||
|
|
||||||
@smallexample
|
@smallexample
|
||||||
@group
|
@group
|
||||||
/* Fonctions type. */
|
/* Function type. */
|
||||||
typedef double (*func_t) (double);
|
typedef double (*func_t) (double);
|
||||||
|
|
||||||
/* Data type for links in the chain of symbols. */
|
/* Data type for links in the chain of symbols. */
|
||||||
@@ -2024,7 +2023,7 @@ putsym (char *sym_name, int sym_type)
|
|||||||
ptr->name = (char *) malloc (strlen (sym_name) + 1);
|
ptr->name = (char *) malloc (strlen (sym_name) + 1);
|
||||||
strcpy (ptr->name,sym_name);
|
strcpy (ptr->name,sym_name);
|
||||||
ptr->type = sym_type;
|
ptr->type = sym_type;
|
||||||
ptr->value.var = 0; /* set value to 0 even if fctn. */
|
ptr->value.var = 0; /* Set value to 0 even if fctn. */
|
||||||
ptr->next = (struct symrec *)sym_table;
|
ptr->next = (struct symrec *)sym_table;
|
||||||
sym_table = ptr;
|
sym_table = ptr;
|
||||||
return ptr;
|
return ptr;
|
||||||
@@ -2117,7 +2116,7 @@ yylex (void)
|
|||||||
@}
|
@}
|
||||||
@end group
|
@end group
|
||||||
@group
|
@group
|
||||||
while (c != EOF && isalnum (c));
|
while (isalnum (c));
|
||||||
|
|
||||||
ungetc (c, stdin);
|
ungetc (c, stdin);
|
||||||
symbuf[i] = '\0';
|
symbuf[i] = '\0';
|
||||||
@@ -2346,8 +2345,8 @@ your program will confuse other readers.
|
|||||||
|
|
||||||
All the usual escape sequences used in character literals in C can be
|
All the usual escape sequences used in character literals in C can be
|
||||||
used in Bison as well, but you must not use the null character as a
|
used in Bison as well, but you must not use the null character as a
|
||||||
character literal because its numeric code, zero, is the code @code{yylex}
|
character literal because its numeric code, zero, signifies
|
||||||
returns for end-of-input (@pxref{Calling Convention, ,Calling Convention
|
end-of-input (@pxref{Calling Convention, ,Calling Convention
|
||||||
for @code{yylex}}).
|
for @code{yylex}}).
|
||||||
|
|
||||||
@item
|
@item
|
||||||
@@ -2384,12 +2383,15 @@ How you choose to write a terminal symbol has no effect on its
|
|||||||
grammatical meaning. That depends only on where it appears in rules and
|
grammatical meaning. That depends only on where it appears in rules and
|
||||||
on when the parser function returns that symbol.
|
on when the parser function returns that symbol.
|
||||||
|
|
||||||
The value returned by @code{yylex} is always one of the terminal symbols
|
The value returned by @code{yylex} is always one of the terminal
|
||||||
(or 0 for end-of-input). Whichever way you write the token type in the
|
symbols, except that a zero or negative value signifies end-of-input.
|
||||||
grammar rules, you write it the same way in the definition of @code{yylex}.
|
Whichever way you write the token type in the grammar rules, you write
|
||||||
The numeric code for a character token type is simply the numeric code of
|
it the same way in the definition of @code{yylex}. The numeric code
|
||||||
the character, so @code{yylex} can use the identical character constant to
|
for a character token type is simply the positive numeric code of the
|
||||||
generate the requisite code. Each named token type becomes a C macro in
|
character, so @code{yylex} can use the identical value to generate the
|
||||||
|
requisite code, though you may need to convert it to @code{unsigned
|
||||||
|
char} to avoid sign-extension on hosts where @code{char} is signed.
|
||||||
|
Each named token type becomes a C macro in
|
||||||
the parser file, so @code{yylex} can use the name to stand for the code.
|
the parser file, so @code{yylex} can use the name to stand for the code.
|
||||||
(This is why periods don't make sense in terminal symbols.)
|
(This is why periods don't make sense in terminal symbols.)
|
||||||
@xref{Calling Convention, ,Calling Convention for @code{yylex}}.
|
@xref{Calling Convention, ,Calling Convention for @code{yylex}}.
|
||||||
@@ -2400,15 +2402,23 @@ option when you run Bison, so that it will write these macro definitions
|
|||||||
into a separate header file @file{@var{name}.tab.h} which you can include
|
into a separate header file @file{@var{name}.tab.h} which you can include
|
||||||
in the other source files that need it. @xref{Invocation, ,Invoking Bison}.
|
in the other source files that need it. @xref{Invocation, ,Invoking Bison}.
|
||||||
|
|
||||||
The @code{yylex} function must use the same character set and encoding
|
If you want to write a grammar that is portable to any Standard C
|
||||||
that was used by Bison. For example, if you run Bison in an
|
host, you must use only non-null character tokens taken from the basic
|
||||||
|
execution character set of Standard C. This set consists of the ten
|
||||||
|
digits, the 52 lower- and upper-case English letters, and the
|
||||||
|
characters in the following C-language string:
|
||||||
|
|
||||||
|
@example
|
||||||
|
"\a\b\t\n\v\f\r !\"#%&'()*+,-./:;<=>?[\\]^_@{|@}~"
|
||||||
|
@end example
|
||||||
|
|
||||||
|
The @code{yylex} function and Bison must use a consistent character
|
||||||
|
set and encoding for character tokens. For example, if you run Bison in an
|
||||||
@sc{ascii} environment, but then compile and run the resulting program
|
@sc{ascii} environment, but then compile and run the resulting program
|
||||||
in an environment that uses an incompatible character set like
|
in an environment that uses an incompatible character set like
|
||||||
@sc{ebcdic}, the resulting program will probably not work because the
|
@sc{ebcdic}, the resulting program may not work because the
|
||||||
tables generated by Bison will assume @sc{ascii} numeric values for
|
tables generated by Bison will assume @sc{ascii} numeric values for
|
||||||
character tokens. Portable grammars should avoid non-@sc{ascii}
|
character tokens. It is standard
|
||||||
character tokens, as implementations in practice often use different
|
|
||||||
and incompatible extensions in this area. However, it is standard
|
|
||||||
practice for software distributions to contain C source files that
|
practice for software distributions to contain C source files that
|
||||||
were generated by Bison in an @sc{ascii} environment, so installers on
|
were generated by Bison in an @sc{ascii} environment, so installers on
|
||||||
platforms that are incompatible with @sc{ascii} must rebuild those
|
platforms that are incompatible with @sc{ascii} must rebuild those
|
||||||
@@ -2959,7 +2969,7 @@ actually does to implement mid-rule actions.
|
|||||||
@cindex position, textual
|
@cindex position, textual
|
||||||
|
|
||||||
Though grammar rules and semantic actions are enough to write a fully
|
Though grammar rules and semantic actions are enough to write a fully
|
||||||
functional parser, it can be useful to process some additionnal informations,
|
functional parser, it can be useful to process some additional information,
|
||||||
especially symbol locations.
|
especially symbol locations.
|
||||||
|
|
||||||
@c (terminal or not) ?
|
@c (terminal or not) ?
|
||||||
@@ -3722,8 +3732,9 @@ that need it. @xref{Invocation, ,Invoking Bison}.
|
|||||||
@node Calling Convention
|
@node Calling Convention
|
||||||
@subsection Calling Convention for @code{yylex}
|
@subsection Calling Convention for @code{yylex}
|
||||||
|
|
||||||
The value that @code{yylex} returns must be the numeric code for the type
|
The value that @code{yylex} returns must be the positive numeric code
|
||||||
of token it has just found, or 0 for end-of-input.
|
for the type of token it has just found; a zero or negative value
|
||||||
|
signifies end-of-input.
|
||||||
|
|
||||||
When a token is referred to in the grammar rules by a name, that name
|
When a token is referred to in the grammar rules by a name, that name
|
||||||
in the parser file becomes a C macro whose definition is the proper
|
in the parser file becomes a C macro whose definition is the proper
|
||||||
@@ -3732,8 +3743,9 @@ to indicate that type. @xref{Symbols}.
|
|||||||
|
|
||||||
When a token is referred to in the grammar rules by a character literal,
|
When a token is referred to in the grammar rules by a character literal,
|
||||||
the numeric code for that character is also the code for the token type.
|
the numeric code for that character is also the code for the token type.
|
||||||
So @code{yylex} can simply return that character code. The null character
|
So @code{yylex} can simply return that character code, possibly converted
|
||||||
must not be used this way, because its code is zero and that is what
|
to @code{unsigned char} to avoid sign-extension. The null character
|
||||||
|
must not be used this way, because its code is zero and that
|
||||||
signifies end-of-input.
|
signifies end-of-input.
|
||||||
|
|
||||||
Here is an example showing these things:
|
Here is an example showing these things:
|
||||||
@@ -3743,7 +3755,7 @@ int
|
|||||||
yylex (void)
|
yylex (void)
|
||||||
@{
|
@{
|
||||||
@dots{}
|
@dots{}
|
||||||
if (c == EOF) /* Detect end of file. */
|
if (c == EOF) /* Detect end-of-input. */
|
||||||
return 0;
|
return 0;
|
||||||
@dots{}
|
@dots{}
|
||||||
if (c == '+' || c == '-')
|
if (c == '+' || c == '-')
|
||||||
@@ -4989,7 +5001,7 @@ error recovery. A simple and useful strategy is simply to skip the rest of
|
|||||||
the current input line or current statement if an error is detected:
|
the current input line or current statement if an error is detected:
|
||||||
|
|
||||||
@example
|
@example
|
||||||
stmnt: error ';' /* on error, skip until ';' is read */
|
stmnt: error ';' /* On error, skip until ';' is read. */
|
||||||
@end example
|
@end example
|
||||||
|
|
||||||
It is also useful to recover to the matching close-delimiter of an
|
It is also useful to recover to the matching close-delimiter of an
|
||||||
@@ -5783,10 +5795,11 @@ Here @var{infile} is the grammar file name, which usually ends in
|
|||||||
@samp{.y}. The parser file's name is made by replacing the @samp{.y}
|
@samp{.y}. The parser file's name is made by replacing the @samp{.y}
|
||||||
with @samp{.tab.c}. Thus, the @samp{bison foo.y} filename yields
|
with @samp{.tab.c}. Thus, the @samp{bison foo.y} filename yields
|
||||||
@file{foo.tab.c}, and the @samp{bison hack/foo.y} filename yields
|
@file{foo.tab.c}, and the @samp{bison hack/foo.y} filename yields
|
||||||
@file{hack/foo.tab.c}. It's is also possible, in case you are writing
|
@file{hack/foo.tab.c}. It's also possible, in case you are writing
|
||||||
C++ code instead of C in your grammar file, to name it @file{foo.ypp}
|
C++ code instead of C in your grammar file, to name it @file{foo.ypp}
|
||||||
or @file{foo.y++}. Then, the output files will take an extention like
|
or @file{foo.y++}. Then, the output files will take an extension like
|
||||||
the given one as input (repectively @file{foo.tab.cpp} and @file{foo.tab.c++}).
|
the given one as input (respectively @file{foo.tab.cpp} and
|
||||||
|
@file{foo.tab.c++}).
|
||||||
This feature takes effect with all options that manipulate filenames like
|
This feature takes effect with all options that manipulate filenames like
|
||||||
@samp{-o} or @samp{-d}.
|
@samp{-o} or @samp{-d}.
|
||||||
|
|
||||||
@@ -5796,7 +5809,7 @@ For example :
|
|||||||
bison -d @var{infile.yxx}
|
bison -d @var{infile.yxx}
|
||||||
@end example
|
@end example
|
||||||
@noindent
|
@noindent
|
||||||
will produce @file{infile.tab.cxx} and @file{infile.tab.hxx}. and
|
will produce @file{infile.tab.cxx} and @file{infile.tab.hxx}, and
|
||||||
|
|
||||||
@example
|
@example
|
||||||
bison -d @var{infile.y} -o @var{output.c++}
|
bison -d @var{infile.y} -o @var{output.c++}
|
||||||
@@ -5950,8 +5963,8 @@ Bison. If the grammar file is @file{foo.y}, the VCG output file will
|
|||||||
be @file{foo.vcg}.
|
be @file{foo.vcg}.
|
||||||
|
|
||||||
@item --graph=@var{graph-file}
|
@item --graph=@var{graph-file}
|
||||||
The behaviour of @var{--graph} is the same than @samp{-g}. The only
|
The behavior of @var{--graph} is the same than @samp{-g}. The only
|
||||||
difference is that it has an optionnal argument which is the name of
|
difference is that it has an optional argument which is the name of
|
||||||
the output graph filename.
|
the output graph filename.
|
||||||
@end table
|
@end table
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user