mirror of
https://git.savannah.gnu.org/git/bison.git
synced 2026-03-28 13:43:02 +00:00
Minor spelling, grammar, and white space fixes.
(Symbols): Mention that any negative value returned from yylex signifies end-of-input. Warn about negative chars. Mention the portable Standard C character set.
This commit is contained in:
@@ -1039,7 +1039,7 @@ calculator. As in C, comments are placed between @samp{/*@dots{}*/}.
|
||||
|
||||
%token NUM
|
||||
|
||||
%% /* Grammar rules and actions follow */
|
||||
%% /* Grammar rules and actions follow. */
|
||||
@end example
|
||||
|
||||
The declarations section (@pxref{Prologue, , The prologue}) contains two
|
||||
@@ -1148,7 +1148,7 @@ more times.
|
||||
|
||||
The parser function @code{yyparse} continues to process input until a
|
||||
grammatical error is seen or the lexical analyzer says there are no more
|
||||
input tokens; we will arrange for the latter to happen at end of file.
|
||||
input tokens; we will arrange for the latter to happen at end-of-input.
|
||||
|
||||
@node Rpcalc Line
|
||||
@subsubsection Explanation of @code{line}
|
||||
@@ -1266,18 +1266,17 @@ for it. (The C data type of @code{yylval} is @code{YYSTYPE}, which was
|
||||
defined at the beginning of the grammar; @pxref{Rpcalc Decls,
|
||||
,Declarations for @code{rpcalc}}.)
|
||||
|
||||
A token type code of zero is returned if the end-of-file is encountered.
|
||||
(Bison recognizes any nonpositive value as indicating the end of the
|
||||
input.)
|
||||
A token type code of zero is returned if the end-of-input is encountered.
|
||||
(Bison recognizes any nonpositive value as indicating end-of-input.)
|
||||
|
||||
Here is the code for the lexical analyzer:
|
||||
|
||||
@example
|
||||
@group
|
||||
/* Lexical analyzer returns a double floating point
|
||||
/* The lexical analyzer returns a double floating point
|
||||
number on the stack and the token NUM, or the numeric code
|
||||
of the character read if not a number. Skips all blanks
|
||||
and tabs, returns 0 for EOF. */
|
||||
of the character read if not a number. It skips all blanks
|
||||
and tabs, and returns 0 for end-of-input. */
|
||||
|
||||
#include <ctype.h>
|
||||
@end group
|
||||
@@ -1288,12 +1287,12 @@ yylex (void)
|
||||
@{
|
||||
int c;
|
||||
|
||||
/* skip white space */
|
||||
/* Skip white space. */
|
||||
while ((c = getchar ()) == ' ' || c == '\t')
|
||||
;
|
||||
@end group
|
||||
@group
|
||||
/* process numbers */
|
||||
/* Process numbers. */
|
||||
if (c == '.' || isdigit (c))
|
||||
@{
|
||||
ungetc (c, stdin);
|
||||
@@ -1302,10 +1301,10 @@ yylex (void)
|
||||
@}
|
||||
@end group
|
||||
@group
|
||||
/* return end-of-file */
|
||||
/* Return end-of-input. */
|
||||
if (c == EOF)
|
||||
return 0;
|
||||
/* return single chars */
|
||||
/* Return a single char. */
|
||||
return c;
|
||||
@}
|
||||
@end group
|
||||
@@ -1345,7 +1344,7 @@ here is the definition we will use:
|
||||
#include <stdio.h>
|
||||
|
||||
void
|
||||
yyerror (const char *s) /* Called by yyparse on error */
|
||||
yyerror (const char *s) /* called by yyparse on error */
|
||||
@{
|
||||
printf ("%s\n", s);
|
||||
@}
|
||||
@@ -1573,7 +1572,7 @@ This example extends the infix notation calculator with location
|
||||
tracking. This feature will be used to improve the error messages. For
|
||||
the sake of clarity, this example is a simple integer calculator, since
|
||||
most of the work needed to use locations will be done in the lexical
|
||||
analyser.
|
||||
analyzer.
|
||||
|
||||
@menu
|
||||
* Decls: Ltcalc Decls. Bison and C declarations for ltcalc.
|
||||
@@ -1681,7 +1680,7 @@ hand.
|
||||
@subsection The @code{ltcalc} Lexical Analyzer.
|
||||
|
||||
Until now, we relied on Bison's defaults to enable location
|
||||
tracking. The next step is to rewrite the lexical analyser, and make it
|
||||
tracking. The next step is to rewrite the lexical analyzer, and make it
|
||||
able to feed the parser with the token locations, as it already does for
|
||||
semantic values.
|
||||
|
||||
@@ -1695,17 +1694,17 @@ yylex (void)
|
||||
@{
|
||||
int c;
|
||||
|
||||
/* skip white space */
|
||||
/* Skip white space. */
|
||||
while ((c = getchar ()) == ' ' || c == '\t')
|
||||
++yylloc.last_column;
|
||||
|
||||
/* step */
|
||||
/* Step. */
|
||||
yylloc.first_line = yylloc.last_line;
|
||||
yylloc.first_column = yylloc.last_column;
|
||||
@end group
|
||||
|
||||
@group
|
||||
/* process numbers */
|
||||
/* Process numbers. */
|
||||
if (isdigit (c))
|
||||
@{
|
||||
yylval = c - '0';
|
||||
@@ -1720,11 +1719,11 @@ yylex (void)
|
||||
@}
|
||||
@end group
|
||||
|
||||
/* return end-of-file */
|
||||
/* Return end-of-input. */
|
||||
if (c == EOF)
|
||||
return 0;
|
||||
|
||||
/* return single chars and update location */
|
||||
/* Return a single char, and update location. */
|
||||
if (c == '\n')
|
||||
@{
|
||||
++yylloc.last_line;
|
||||
@@ -1915,7 +1914,7 @@ provides for either functions or variables to be placed in the table.
|
||||
|
||||
@smallexample
|
||||
@group
|
||||
/* Fonctions type. */
|
||||
/* Function type. */
|
||||
typedef double (*func_t) (double);
|
||||
|
||||
/* Data type for links in the chain of symbols. */
|
||||
@@ -2024,7 +2023,7 @@ putsym (char *sym_name, int sym_type)
|
||||
ptr->name = (char *) malloc (strlen (sym_name) + 1);
|
||||
strcpy (ptr->name,sym_name);
|
||||
ptr->type = sym_type;
|
||||
ptr->value.var = 0; /* set value to 0 even if fctn. */
|
||||
ptr->value.var = 0; /* Set value to 0 even if fctn. */
|
||||
ptr->next = (struct symrec *)sym_table;
|
||||
sym_table = ptr;
|
||||
return ptr;
|
||||
@@ -2117,7 +2116,7 @@ yylex (void)
|
||||
@}
|
||||
@end group
|
||||
@group
|
||||
while (c != EOF && isalnum (c));
|
||||
while (isalnum (c));
|
||||
|
||||
ungetc (c, stdin);
|
||||
symbuf[i] = '\0';
|
||||
@@ -2346,8 +2345,8 @@ your program will confuse other readers.
|
||||
|
||||
All the usual escape sequences used in character literals in C can be
|
||||
used in Bison as well, but you must not use the null character as a
|
||||
character literal because its numeric code, zero, is the code @code{yylex}
|
||||
returns for end-of-input (@pxref{Calling Convention, ,Calling Convention
|
||||
character literal because its numeric code, zero, signifies
|
||||
end-of-input (@pxref{Calling Convention, ,Calling Convention
|
||||
for @code{yylex}}).
|
||||
|
||||
@item
|
||||
@@ -2384,12 +2383,15 @@ How you choose to write a terminal symbol has no effect on its
|
||||
grammatical meaning. That depends only on where it appears in rules and
|
||||
on when the parser function returns that symbol.
|
||||
|
||||
The value returned by @code{yylex} is always one of the terminal symbols
|
||||
(or 0 for end-of-input). Whichever way you write the token type in the
|
||||
grammar rules, you write it the same way in the definition of @code{yylex}.
|
||||
The numeric code for a character token type is simply the numeric code of
|
||||
the character, so @code{yylex} can use the identical character constant to
|
||||
generate the requisite code. Each named token type becomes a C macro in
|
||||
The value returned by @code{yylex} is always one of the terminal
|
||||
symbols, except that a zero or negative value signifies end-of-input.
|
||||
Whichever way you write the token type in the grammar rules, you write
|
||||
it the same way in the definition of @code{yylex}. The numeric code
|
||||
for a character token type is simply the positive numeric code of the
|
||||
character, so @code{yylex} can use the identical value to generate the
|
||||
requisite code, though you may need to convert it to @code{unsigned
|
||||
char} to avoid sign-extension on hosts where @code{char} is signed.
|
||||
Each named token type becomes a C macro in
|
||||
the parser file, so @code{yylex} can use the name to stand for the code.
|
||||
(This is why periods don't make sense in terminal symbols.)
|
||||
@xref{Calling Convention, ,Calling Convention for @code{yylex}}.
|
||||
@@ -2400,15 +2402,23 @@ option when you run Bison, so that it will write these macro definitions
|
||||
into a separate header file @file{@var{name}.tab.h} which you can include
|
||||
in the other source files that need it. @xref{Invocation, ,Invoking Bison}.
|
||||
|
||||
The @code{yylex} function must use the same character set and encoding
|
||||
that was used by Bison. For example, if you run Bison in an
|
||||
If you want to write a grammar that is portable to any Standard C
|
||||
host, you must use only non-null character tokens taken from the basic
|
||||
execution character set of Standard C. This set consists of the ten
|
||||
digits, the 52 lower- and upper-case English letters, and the
|
||||
characters in the following C-language string:
|
||||
|
||||
@example
|
||||
"\a\b\t\n\v\f\r !\"#%&'()*+,-./:;<=>?[\\]^_@{|@}~"
|
||||
@end example
|
||||
|
||||
The @code{yylex} function and Bison must use a consistent character
|
||||
set and encoding for character tokens. For example, if you run Bison in an
|
||||
@sc{ascii} environment, but then compile and run the resulting program
|
||||
in an environment that uses an incompatible character set like
|
||||
@sc{ebcdic}, the resulting program will probably not work because the
|
||||
@sc{ebcdic}, the resulting program may not work because the
|
||||
tables generated by Bison will assume @sc{ascii} numeric values for
|
||||
character tokens. Portable grammars should avoid non-@sc{ascii}
|
||||
character tokens, as implementations in practice often use different
|
||||
and incompatible extensions in this area. However, it is standard
|
||||
character tokens. It is standard
|
||||
practice for software distributions to contain C source files that
|
||||
were generated by Bison in an @sc{ascii} environment, so installers on
|
||||
platforms that are incompatible with @sc{ascii} must rebuild those
|
||||
@@ -2959,7 +2969,7 @@ actually does to implement mid-rule actions.
|
||||
@cindex position, textual
|
||||
|
||||
Though grammar rules and semantic actions are enough to write a fully
|
||||
functional parser, it can be useful to process some additionnal informations,
|
||||
functional parser, it can be useful to process some additional information,
|
||||
especially symbol locations.
|
||||
|
||||
@c (terminal or not) ?
|
||||
@@ -3722,8 +3732,9 @@ that need it. @xref{Invocation, ,Invoking Bison}.
|
||||
@node Calling Convention
|
||||
@subsection Calling Convention for @code{yylex}
|
||||
|
||||
The value that @code{yylex} returns must be the numeric code for the type
|
||||
of token it has just found, or 0 for end-of-input.
|
||||
The value that @code{yylex} returns must be the positive numeric code
|
||||
for the type of token it has just found; a zero or negative value
|
||||
signifies end-of-input.
|
||||
|
||||
When a token is referred to in the grammar rules by a name, that name
|
||||
in the parser file becomes a C macro whose definition is the proper
|
||||
@@ -3732,8 +3743,9 @@ to indicate that type. @xref{Symbols}.
|
||||
|
||||
When a token is referred to in the grammar rules by a character literal,
|
||||
the numeric code for that character is also the code for the token type.
|
||||
So @code{yylex} can simply return that character code. The null character
|
||||
must not be used this way, because its code is zero and that is what
|
||||
So @code{yylex} can simply return that character code, possibly converted
|
||||
to @code{unsigned char} to avoid sign-extension. The null character
|
||||
must not be used this way, because its code is zero and that
|
||||
signifies end-of-input.
|
||||
|
||||
Here is an example showing these things:
|
||||
@@ -3743,7 +3755,7 @@ int
|
||||
yylex (void)
|
||||
@{
|
||||
@dots{}
|
||||
if (c == EOF) /* Detect end of file. */
|
||||
if (c == EOF) /* Detect end-of-input. */
|
||||
return 0;
|
||||
@dots{}
|
||||
if (c == '+' || c == '-')
|
||||
@@ -4989,7 +5001,7 @@ error recovery. A simple and useful strategy is simply to skip the rest of
|
||||
the current input line or current statement if an error is detected:
|
||||
|
||||
@example
|
||||
stmnt: error ';' /* on error, skip until ';' is read */
|
||||
stmnt: error ';' /* On error, skip until ';' is read. */
|
||||
@end example
|
||||
|
||||
It is also useful to recover to the matching close-delimiter of an
|
||||
@@ -5783,10 +5795,11 @@ Here @var{infile} is the grammar file name, which usually ends in
|
||||
@samp{.y}. The parser file's name is made by replacing the @samp{.y}
|
||||
with @samp{.tab.c}. Thus, the @samp{bison foo.y} filename yields
|
||||
@file{foo.tab.c}, and the @samp{bison hack/foo.y} filename yields
|
||||
@file{hack/foo.tab.c}. It's is also possible, in case you are writing
|
||||
@file{hack/foo.tab.c}. It's also possible, in case you are writing
|
||||
C++ code instead of C in your grammar file, to name it @file{foo.ypp}
|
||||
or @file{foo.y++}. Then, the output files will take an extention like
|
||||
the given one as input (repectively @file{foo.tab.cpp} and @file{foo.tab.c++}).
|
||||
or @file{foo.y++}. Then, the output files will take an extension like
|
||||
the given one as input (respectively @file{foo.tab.cpp} and
|
||||
@file{foo.tab.c++}).
|
||||
This feature takes effect with all options that manipulate filenames like
|
||||
@samp{-o} or @samp{-d}.
|
||||
|
||||
@@ -5796,7 +5809,7 @@ For example :
|
||||
bison -d @var{infile.yxx}
|
||||
@end example
|
||||
@noindent
|
||||
will produce @file{infile.tab.cxx} and @file{infile.tab.hxx}. and
|
||||
will produce @file{infile.tab.cxx} and @file{infile.tab.hxx}, and
|
||||
|
||||
@example
|
||||
bison -d @var{infile.y} -o @var{output.c++}
|
||||
@@ -5950,8 +5963,8 @@ Bison. If the grammar file is @file{foo.y}, the VCG output file will
|
||||
be @file{foo.vcg}.
|
||||
|
||||
@item --graph=@var{graph-file}
|
||||
The behaviour of @var{--graph} is the same than @samp{-g}. The only
|
||||
difference is that it has an optionnal argument which is the name of
|
||||
The behavior of @var{--graph} is the same than @samp{-g}. The only
|
||||
difference is that it has an optional argument which is the name of
|
||||
the output graph filename.
|
||||
@end table
|
||||
|
||||
|
||||
Reference in New Issue
Block a user