doc: promote yytoken_kind_t, not yytokentype

* data/skeletons/c.m4 (yytoken_kind_t): New.
* data/skeletons/c++.m4, data/skeletons/lalr1.cc (yysymbol_kind_type):
New.
* examples/c/lexcalc/parse.y, examples/c/reccalc/parse.y,
* tests/regression.at:
Use them.
* doc/bison.texi: Replace "enum yytokentype" by "yytoken_kind_t".
(api.token.raw): Explain that it forces "yytoken_kind_t" to coincide
with "yysymbol_kind_t".
(Calling Convention): Mention YYEOF.
(Table of Symbols): Add entries for "yytoken_kind_t" and
"yysymbol_kind_t".
(Glossary): Add entries for "Kind", "Token kind" and "Symbol kind".
This commit is contained in:
Akim Demaille
2020-04-12 18:03:37 +02:00
parent c973361138
commit 7a226860ef
10 changed files with 110 additions and 76 deletions

12
TODO
View File

@@ -1,7 +1,6 @@
* Bison 3.6
** Documentation
- yyexpected_tokens/expected_tokens/expectedTokens in all the languages.
- YYENOMEM
- YYERRCODE, YYUNDEF, YYEOF
- i18n in Java
- symbol.type_get should be kind_get, and it's not documented.
@@ -9,20 +8,9 @@
- YYERRCODE and "end of file" and translation
** User token number, internal symbol number, external token number, etc.
There is some confusion over these terms, which is even a problem for
translators. We need something clear, especially if we provide access to
the symbol numbers (which would be useful for custom error messages).
We could use "number" and "code".
Update: the current best options would be "token kind" and "symbol kind",
instead of "token type" and "symbol type".
*** yytokentype
Make an alias so that it is about "kind", not "type".
*** The documentation
You can explicitly specify the numeric code for a token type...
The token numbered as 0.

View File

@@ -534,8 +534,8 @@ m4_define([b4_symbol_map],
# b4_token_visible_if(NUM, IF-TRUE, IF-FALSE)
# -------------------------------------------
# Whether NUM denotes a token that has an exported definition (i.e.,
# shows in enum yytokentype).
# Whether NUM denotes a token kind that has an exported definition
# (i.e., shows in enum yytokentype).
m4_define([b4_token_visible_if],
[b4_symbol_if([$1], [is_token],
[b4_symbol_if([$1], [has_id], [$2], [$3])],

View File

@@ -169,7 +169,7 @@ m4_bpatsubst(m4_dquote(m4_bpatsubst(m4_dquote(b4_namespace_ref[ ]),
# b4_token_enums
# --------------
# Output the definition of the tokens as enums.
# Output the definition of the token kinds.
m4_define([b4_token_enums],
[[enum yytokentype
{
@@ -260,8 +260,11 @@ m4_define([b4_public_types_declare],
]b4_token_enums[
};
/// (External) token kind, as returned by yylex.
typedef token::yytokentype token_type;
/// Token kind, as returned by yylex.
typedef token::yytokentype token_kind_type;
/// Backward compatibility alias.
typedef token_kind_type token_type;
/// Symbol kinds.
struct symbol_kind
@@ -385,7 +388,7 @@ m4_define([b4_symbol_type_define],
by_type (const by_type& that);
/// The symbol type as needed by the constructor.
typedef token_type kind_type;
typedef token_kind_type kind_type;
/// Constructor from (external) token numbers.
by_type (kind_type t);
@@ -493,7 +496,7 @@ m4_define([b4_public_types_define],
: type (that.type)
{}
]b4_inline([$1])b4_parser_class[::by_type::by_type (token_type t)
]b4_inline([$1])b4_parser_class[::by_type::by_type (token_kind_type t)
: type (yytranslate_ (t))
{}

View File

@@ -449,7 +449,7 @@ m4_define([b4_token_define],
# ----------------
# Output the definition of the tokens.
m4_define([b4_token_defines],
[b4_any_token_visible_if([/* Tokens. */
[b4_any_token_visible_if([/* Token kinds. */
m4_join([
], b4_symbol_map([b4_token_define]))
])])
@@ -470,15 +470,16 @@ m4_define([b4_token_enum],
# b4_token_enums
# --------------
# The definition of the tokens (if there are) as enums.
# The definition of the token kinds.
m4_define([b4_token_enums],
[b4_any_token_visible_if([[/* Token type. */
[b4_any_token_visible_if([[/* Token kinds. */
#ifndef ]b4_api_PREFIX[TOKENTYPE
# define ]b4_api_PREFIX[TOKENTYPE
enum ]b4_api_prefix[tokentype
{
]b4_symbol_foreach([b4_token_enum])dnl
[ };
typedef enum ]b4_api_prefix[tokentype ]b4_api_prefix[token_kind_t;
#endif
]])])

View File

@@ -302,7 +302,7 @@ m4_define([b4_shared_declarations],
static const ]b4_int_type(b4_table_ninf, b4_table_ninf)[ yytable_ninf_;
/// Convert a scanner token kind \a t to a symbol kind.
/// In theory \a t should be a token_type, but character literals
/// In theory \a t should be a token_kind_type, but character literals
/// are valid, yet not members of the token_type enum.
static symbol_kind_type yytranslate_ (int t);
]b4_parse_error_bmatch([custom\|detailed], [[

View File

@@ -2940,7 +2940,7 @@ declaration.
@group
%@{
static void print_token (enum yytokentype token, YYSTYPE val);
static void print_token (yytoken_kind_t token, YYSTYPE val);
%@}
@end group
@@ -2989,7 +2989,7 @@ Look again at the example of the previous section:
@group
%@{
static void print_token (enum yytokentype token, YYSTYPE val);
static void print_token (yytoken_kind_t token, YYSTYPE val);
%@}
@end group
@@ -3004,16 +3004,16 @@ override Bison's default definition for @code{YYLTYPE}, in which
write it in the first since Bison will insert that code into the parser
implementation file @emph{before} the default @code{YYLTYPE} definition. In
which @var{Prologue} section should you prototype an internal function,
@code{trace_token}, that accepts @code{YYLTYPE} and @code{yytokentype} as
@code{trace_token}, that accepts @code{YYLTYPE} and @code{yytoken_kind_t} as
arguments? You should prototype it in the second since Bison will insert
that code @emph{after} the @code{YYLTYPE} and @code{yytokentype}
that code @emph{after} the @code{YYLTYPE} and @code{yytoken_kind_t}
definitions.
This distinction in functionality between the two @var{Prologue} sections is
established by the appearance of the @code{%union} between them. This
behavior raises a few questions. First, why should the position of a
@code{%union} affect definitions related to @code{YYLTYPE} and
@code{yytokentype}? Second, what if there is no @code{%union}? In that
@code{yytoken_kind_t}? Second, what if there is no @code{%union}? In that
case, the second kind of @var{Prologue} section is not available. This
behavior is not intuitive.
@@ -3051,8 +3051,8 @@ the same time:
@group
%code @{
static void print_token (enum yytokentype token, YYSTYPE val);
static void trace_token (enum yytokentype token, YYLTYPE loc);
static void print_token (yytoken_kind_t token, YYSTYPE val);
static void trace_token (yytoken_kind_t token, YYLTYPE loc);
@}
@end group
@@ -3116,8 +3116,8 @@ Thus, they belong in one or more @code{%code requires}:
@group
%code @{
static void print_token (enum yytokentype token, YYSTYPE val);
static void trace_token (enum yytokentype token, YYLTYPE loc);
static void print_token (yytoken_kind_t token, YYSTYPE val);
static void trace_token (yytoken_kind_t token, YYLTYPE loc);
@}
@end group
@@ -3149,7 +3149,7 @@ might wish for Bison to insert the prototype into both the parser header
file and the parser implementation file. Since this function is not a
dependency required by @code{YYSTYPE} or @code{YYLTYPE}, it doesn't make
sense to move its prototype to a @code{%code requires}. More importantly,
since it depends upon @code{YYLTYPE} and @code{yytokentype}, @code{%code
since it depends upon @code{YYLTYPE} and @code{yytoken_kind_t}, @code{%code
requires} is not sufficient. Instead, move its prototype from the
unqualified @code{%code} to a @code{%code provides}:
@@ -3189,7 +3189,7 @@ unqualified @code{%code} to a @code{%code provides}:
@group
%code provides @{
void trace_token (enum yytokentype token, YYLTYPE loc);
void trace_token (yytoken_kind_t token, YYLTYPE loc);
@}
@end group
@@ -3205,7 +3205,7 @@ unqualified @code{%code} to a @code{%code provides}:
@noindent
Bison will insert the @code{trace_token} prototype into both the parser
header file and the parser implementation file after the definitions for
@code{yytokentype}, @code{YYLTYPE}, and @code{YYSTYPE}.
@code{yytoken_kind_t}, @code{YYLTYPE}, and @code{YYSTYPE}.
The above examples are careful to write directives in an order that reflects
the layout of the generated parser implementation and header files:
@@ -5755,7 +5755,7 @@ so on.
Contrary to defining @code{api.prefix}, some symbols are @emph{not} renamed
by @code{%name-prefix}, for instance @code{YYDEBUG}, @code{YYTOKENTYPE},
@code{yytokentype}, @code{YYSTYPE}, @code{YYLTYPE}.
@code{yytoken_kind_t}, @code{YYSTYPE}, @code{YYLTYPE}.
@end deffn
@ifset defaultprec
@@ -6296,18 +6296,19 @@ introduced in Bison 3.0.
all
@item Purpose:
The output files normally define the tokens with Yacc-compatible token
numbers: sequential numbers starting at 257 except for single character
tokens which stand for themselves (e.g., in ASCII, @samp{'a'} is numbered
65). The parser however uses symbol numbers assigned sequentially starting
at 3. Therefore each time the scanner returns an (external) token number,
it must be mapped to the (internal) symbol number.
The output files normally define the enumeration of the @emph{token kinds}
with Yacc-compatible token codes: sequential numbers starting at 257 except
for single character tokens which stand for themselves (e.g., in ASCII,
@samp{'a'} is numbered 65). The parser however uses @emph{symbol kinds}
which are assigned numbers sequentially starting at 0. Therefore each time
the scanner returns an (external) token kind, it must be mapped to the
(internal) symbol kind.
When @code{api.token.raw} is set, tokens are assigned their internal number,
which saves one table lookup per token to map them from the external to the
internal number, and also saves the generation of the mapping table. The
gain is typically moderate, but in extreme cases (very simple user actions),
a 10% improvement can be observed.
When @code{api.token.raw} is set, the code of the token kinds are forced to
coincide with the symbol kind. This saves one table lookup per token to map
them from the token kind to the symbol kind, and also saves the generation
of the mapping table. The gain is typically moderate, but in extreme cases
(very simple user actions), a 10% improvement can be observed.
When @code{api.token.raw} is set, the grammar cannot use character literals
(such as @samp{'a'}).
@@ -7138,13 +7139,14 @@ that need it. @xref{Invocation}.
@subsection Calling Convention for @code{yylex}
The value that @code{yylex} returns must be the positive numeric code for
the type of token it has just found; a zero or negative value signifies
the kind of token it has just found; a zero or negative value signifies
end-of-input.
When a token is referred to in the grammar rules by a name, that name in the
parser implementation file becomes a C macro whose definition is the proper
numeric code for that token kind. So @code{yylex} can use the name to
indicate that type. @xref{Symbols}.
When a token kind is referred to in the grammar rules by a name, that name
in the parser implementation file becomes an enumerator of the enum
@code{yytoken_kind_t} whose definition is the proper numeric code for that
token kind. So @code{yylex} should use the name to indicate that type.
@xref{Symbols}.
When a token is referred to in the grammar rules by a character literal, the
numeric code for that character is also the code for the token kind. So
@@ -7160,12 +7162,13 @@ yylex (void)
@{
@dots{}
if (c == EOF) /* Detect end-of-input. */
return 0;
return YYEOF;
@dots{}
if (c == '+' || c == '-')
else if (c == '+' || c == '-')
return c; /* Assume token kind for '+' is '+'. */
@dots{}
return INT; /* Return the type of the token. */
else
return INT; /* Return the type of the token. */
@dots{}
@}
@end example
@@ -7207,10 +7210,9 @@ The @code{yytname} table is generated only if you use the
@vindex yylval
In an ordinary (nonreentrant) parser, the semantic value of the token must
be stored into the global variable @code{yylval}. When you are using
just one data type for semantic values, @code{yylval} has that type.
Thus, if the type is @code{int} (the default), you might write this in
@code{yylex}:
be stored into the global variable @code{yylval}. When you are using just
one data type for semantic values, @code{yylval} has that type. Thus, if
the type is @code{int} (the default), you might write this in @code{yylex}:
@example
@group
@@ -10503,17 +10505,16 @@ calculator (@pxref{Mfcalc Declarations}):
@dots{} %% @dots{} %% @dots{}
static void
print_token_value (FILE *file, int type, YYSTYPE value)
print_token_value (FILE *file, yytoken_kind_t kind, YYSTYPE value)
@{
if (type == VAR)
if (kind == VAR)
fprintf (file, "%s", value.tptr->name);
else if (type == NUM)
else if (kind == NUM)
fprintf (file, "%d", value.val);
@}
@end example
@xref{Mfcalc Traces}, for the
proper use of @code{%printer}.
@xref{Mfcalc Traces}, for the proper use of @code{%printer}.
@c ================================================= Invoking Bison
@@ -11545,8 +11546,8 @@ Values}.
@end defcv
@defcv {Type} {parser} {token}
A structure that contains (only) the @code{yytokentype} enumeration, which
defines the tokens. To refer to the token @code{FOO}, use
A structure that contains (only) the @code{yytoken_kind_t} enumeration,
which defines the tokens. To refer to the token @code{FOO}, use
@code{yy::parser::token::FOO}. The scanner can use @samp{typedef
yy::parser::token token;} to ``import'' the token enumeration (@pxref{Calc++
Scanner}).
@@ -12005,7 +12006,7 @@ The generated parser expects @code{yylex} to have the following prototype.
@deftypefun {int} yylex (@code{semantic_type*} @var{yylval}, @code{location_type*} @var{yylloc}, @var{type1} @var{arg1}, @dots{})
@deftypefunx {int} yylex (@code{semantic_type*} @var{yylval}, @var{type1} @var{arg1}, @dots{})
Return the next token. Its type is the return value, its semantic value and
Return the next token. Its kind is the return value, its semantic value and
location (if enabled) being @var{yylval} and @var{yylloc}. Invocations of
@samp{%lex-param @{@var{type1} @var{arg1}@}} yield additional arguments.
@end deftypefun
@@ -14580,6 +14581,22 @@ Data type of semantic values; @code{int} by default.
@xref{Value Type}.
@end deffn
@deffn {Type} yysymbol_kind_t
An enum that includes all the symbols, tokens and nonterminals, of the
grammar. @xref{Syntax Error Reporting Function}. The symbol kinds are used
internally by the parser, and should not be confused with the token kinds:
the symbol kind of a terminal symbol is not equal to its token kind! (Unless
@samp{%define api.token.raw} was used).
@end deffn
@deffn {Type} yytoken_kind_t
An enum that includes all the declared @dfn{token kinds} declared with
@code{%token} (@pxref{Token Decl}). These are the return values for
@code{yylex}. They should not be confused with the @emph{symbol kinds},
used internally by the parser.
@end deffn
@node Glossary
@appendix Glossary
@cindex glossary
@@ -14662,6 +14679,21 @@ performs some operation.
@item Input stream
A continuous flow of data between devices or programs.
@item Kind
``Token'' and ``symbol'' are each overloaded to mean either a grammar symbol
(kind) or all parse info (kind, value, location) associated with occurrences
of that grammar symbol from the input. To disambiguate, we use ``token
kind'' and ``symbol kind'' to mean both grammar symbols and the types that
represent them in a base programming language (C, C++, etc.). However, we
use ``token'' and ``symbol'' without the word ``kind'' to mean parsed
occurrences, and we append the word ``type'' to refer to the types that
represent them in a base programming language.
In summary: When you see ``kind'', interpret ``symbol'' or ``token'' to mean
a @emph{grammar symbol}. When you don't see ``kind'' (including when you
see ``type''), interpret ``symbol'' or ``token'' to mean a @emph{parsed
symbol}.
@item LAC (Lookahead Correction)
A parsing mechanism that fixes the problem of delayed syntax error
detection, which is caused by LR state merging, default reductions, and the
@@ -14761,6 +14793,10 @@ the language being parsed. The start symbol is usually listed as the
first nonterminal symbol in a language specification.
@xref{Start Decl}.
@item Symbol kind
A finite enumeration of all the possible grammar symbols, as processed by
the parser. @xref{Symbols}.
@item Symbol table
A data structure where symbol names and associated data are stored
during parsing to allow for recognition and use of existing
@@ -14770,16 +14806,20 @@ information in repeated uses of a symbol. @xref{Multi-function Calc}.
An error encountered during parsing of an input stream due to invalid
syntax. @xref{Error Recovery}.
@item Terminal symbol
A grammar symbol that has no rules in the grammar and therefore is
grammatically indivisible. The piece of text it represents is a token.
@xref{Language and Grammar}.
@item Token
A basic, grammatically indivisible unit of a language. The symbol
that describes a token in the grammar is a terminal symbol.
The input of the Bison parser is a stream of tokens which comes from
the lexical analyzer. @xref{Symbols}.
@item Terminal symbol
A grammar symbol that has no rules in the grammar and therefore is
grammatically indivisible. The piece of text it represents is a token.
@xref{Language and Grammar}.
@item Token kind
A finite enumeration of all the possible grammar terminals, as disciminated
by the scanner. @xref{Symbols}.
@item Unreachable state
A parser state to which there does not exist a sequence of transitions from

View File

@@ -6,7 +6,7 @@
{
// Tell Flex the expected prototype of yylex.
#define YY_DECL \
enum yytokentype yylex (YYSTYPE* yylval, YYLTYPE *yylloc, int *nerrs)
yytoken_kind_t yylex (YYSTYPE* yylval, YYLTYPE *yylloc, int *nerrs)
YY_DECL;
void yyerror (YYLTYPE *loc, int *nerrs, const char *msg);

View File

@@ -26,7 +26,7 @@
// Tell Flex the expected prototype of yylex.
// The scanner argument must be named yyscanner.
#define YY_DECL \
enum yytokentype yylex (YYSTYPE* yylval, yyscan_t yyscanner, result *res)
yytoken_kind_t yylex (YYSTYPE* yylval, yyscan_t yyscanner, result *res)
YY_DECL;
void yyerror (yyscan_t scanner, result *res, const char *msg, ...);

View File

@@ -72,7 +72,7 @@ extern int gram_debug;
} value_type;
/* Token type. */
/* Token kinds. */
#ifndef GRAM_TOKENTYPE
# define GRAM_TOKENTYPE
enum gram_tokentype
@@ -139,6 +139,7 @@ extern int gram_debug;
PERCENT_UNION = 59, /* "%union" */
PERCENT_EMPTY = 60 /* "%empty" */
};
typedef enum gram_tokentype gram_token_kind_t;
#endif
/* Value type. */

View File

@@ -146,8 +146,9 @@ void print_my_token (void);
void
print_my_token (void)
{
enum yytokentype my_token = MY_TOKEN;
printf ("%d\n", my_token);
enum yytokentype tok1 = MY_TOKEN;
yytoken_kind_t tok2 = MY_TOKEN;
printf ("%d, %d\n", tok1, tok2);
}
%}
%token MY_TOKEN