doc: promote yytoken_kind_t, not yytokentype

* data/skeletons/c.m4 (yytoken_kind_t): New. * data/skeletons/c++.m4, data/skeletons/lalr1.cc (yysymbol_kind_type): New. * examples/c/lexcalc/parse.y, examples/c/reccalc/parse.y, * tests/regression.at: Use them. * doc/bison.texi: Replace "enum yytokentype" by "yytoken_kind_t". (api.token.raw): Explain that it forces "yytoken_kind_t" to coincide with "yysymbol_kind_t". (Calling Convention): Mention YYEOF. (Table of Symbols): Add entries for "yytoken_kind_t" and "yysymbol_kind_t". (Glossary): Add entries for "Kind", "Token kind" and "Symbol kind".
2026-03-20 09:43:03 +00:00 · 2020-04-12 18:03:37 +02:00
parent c973361138
commit 7a226860ef
10 changed files with 110 additions and 76 deletions
--- a/12
+++ b/12
@@ -1,7 +1,6 @@
 * Bison 3.6
 ** Documentation
 - yyexpected_tokens/expected_tokens/expectedTokens in all the languages.
 - YYENOMEM
 - YYERRCODE, YYUNDEF, YYEOF
 - i18n in Java
 - symbol.type_get should be kind_get, and it's not documented.
@@ -9,20 +8,9 @@
 - YYERRCODE and "end of file" and translation
 ** User token number, internal symbol number, external token number, etc.
 There is some confusion over these terms, which is even a problem for
 translators.  We need something clear, especially if we provide access to
 the symbol numbers (which would be useful for custom error messages).
 We could use "number" and "code".
 Update: the current best options would be "token kind" and "symbol kind",
 instead of "token type" and "symbol type".
 *** yytokentype
 Make an alias so that it is about "kind", not "type".
 *** The documentation
 You can explicitly specify the numeric code for a token type...
 The token numbered as 0.
--- a/data/skeletons/bison.m4
+++ b/data/skeletons/bison.m4
@@ -534,8 +534,8 @@ m4_define([b4_symbol_map],
 # b4_token_visible_if(NUM, IF-TRUE, IF-FALSE)
 # -------------------------------------------
-# Whether NUM denotes a token that has an exported definition (i.e.,
+# Whether NUM denotes a token kind that has an exported definition
-# shows in enum yytokentype).
+# (i.e., shows in enum yytokentype).
 m4_define([b4_token_visible_if],
 [b4_symbol_if([$1], [is_token],
              [b4_symbol_if([$1], [has_id], [$2], [$3])],
--- a/data/skeletons/c++.m4
+++ b/data/skeletons/c++.m4
@@ -169,7 +169,7 @@ m4_bpatsubst(m4_dquote(m4_bpatsubst(m4_dquote(b4_namespace_ref[ ]),
 # b4_token_enums
 # --------------
-# Output the definition of the tokens as enums.
+# Output the definition of the token kinds.
 m4_define([b4_token_enums],
 [[enum yytokentype
      {
@@ -260,8 +260,11 @@ m4_define([b4_public_types_declare],
      ]b4_token_enums[
    };
-    /// (External) token kind, as returned by yylex.
+    /// Token kind, as returned by yylex.
-    typedef token::yytokentype token_type;
+    typedef token::yytokentype token_kind_type;
    /// Backward compatibility alias.
    typedef token_kind_type token_type;
    /// Symbol kinds.
    struct symbol_kind
@@ -385,7 +388,7 @@ m4_define([b4_symbol_type_define],
      by_type (const by_type& that);
      /// The symbol type as needed by the constructor.
-      typedef token_type kind_type;
+      typedef token_kind_type kind_type;
      /// Constructor from (external) token numbers.
      by_type (kind_type t);
@@ -493,7 +496,7 @@ m4_define([b4_public_types_define],
    : type (that.type)
  {}
-  ]b4_inline([$1])b4_parser_class[::by_type::by_type (token_type t)
+  ]b4_inline([$1])b4_parser_class[::by_type::by_type (token_kind_type t)
    : type (yytranslate_ (t))
  {}
--- a/data/skeletons/c.m4
+++ b/data/skeletons/c.m4
@@ -449,7 +449,7 @@ m4_define([b4_token_define],
 # ----------------
 # Output the definition of the tokens.
 m4_define([b4_token_defines],
-[b4_any_token_visible_if([/* Tokens.  */
+[b4_any_token_visible_if([/* Token kinds.  */
 m4_join([
 ], b4_symbol_map([b4_token_define]))
 ])])
@@ -470,15 +470,16 @@ m4_define([b4_token_enum],
 # b4_token_enums
 # --------------
-# The definition of the tokens (if there are) as enums.
+# The definition of the token kinds.
 m4_define([b4_token_enums],
-[b4_any_token_visible_if([[/* Token type.  */
+[b4_any_token_visible_if([[/* Token kinds.  */
 #ifndef ]b4_api_PREFIX[TOKENTYPE
 # define ]b4_api_PREFIX[TOKENTYPE
  enum ]b4_api_prefix[tokentype
  {
 ]b4_symbol_foreach([b4_token_enum])dnl
 [  };
  typedef enum ]b4_api_prefix[tokentype ]b4_api_prefix[token_kind_t;
 #endif
 ]])])
--- a/data/skeletons/lalr1.cc
+++ b/data/skeletons/lalr1.cc
@@ -302,7 +302,7 @@ m4_define([b4_shared_declarations],
    static const ]b4_int_type(b4_table_ninf, b4_table_ninf)[ yytable_ninf_;
    /// Convert a scanner token kind \a t to a symbol kind.
-    /// In theory \a t should be a token_type, but character literals
+    /// In theory \a t should be a token_kind_type, but character literals
    /// are valid, yet not members of the token_type enum.
    static symbol_kind_type yytranslate_ (int t);
 ]b4_parse_error_bmatch([custom\|detailed], [[
--- a/doc/bison.texi
+++ b/doc/bison.texi
@@ -2940,7 +2940,7 @@ declaration.
@group
 %@{
-  static void print_token (enum yytokentype token, YYSTYPE val);
+  static void print_token (yytoken_kind_t token, YYSTYPE val);
 %@}
@end group
@@ -2989,7 +2989,7 @@ Look again at the example of the previous section:
@group
 %@{
-  static void print_token (enum yytokentype token, YYSTYPE val);
+  static void print_token (yytoken_kind_t token, YYSTYPE val);
 %@}
@end group
@@ -3004,16 +3004,16 @@ override Bison's default definition for @code{YYLTYPE}, in which
 write it in the first since Bison will insert that code into the parser
 implementation file @emph{before} the default @code{YYLTYPE} definition.  In
 which @var{Prologue} section should you prototype an internal function,
-@code{trace_token}, that accepts @code{YYLTYPE} and @code{yytokentype} as
+@code{trace_token}, that accepts @code{YYLTYPE} and @code{yytoken_kind_t} as
 arguments?  You should prototype it in the second since Bison will insert
-that code @emph{after} the @code{YYLTYPE} and @code{yytokentype}
+that code @emph{after} the @code{YYLTYPE} and @code{yytoken_kind_t}
 definitions.
 This distinction in functionality between the two @var{Prologue} sections is
 established by the appearance of the @code{%union} between them.  This
 behavior raises a few questions.  First, why should the position of a
@code{%union} affect definitions related to @code{YYLTYPE} and
-@code{yytokentype}?  Second, what if there is no @code{%union}?  In that
+@code{yytoken_kind_t}?  Second, what if there is no @code{%union}?  In that
 case, the second kind of @var{Prologue} section is not available.  This
 behavior is not intuitive.
@@ -3051,8 +3051,8 @@ the same time:
@group
 %code @{
-  static void print_token (enum yytokentype token, YYSTYPE val);
+  static void print_token (yytoken_kind_t token, YYSTYPE val);
-  static void trace_token (enum yytokentype token, YYLTYPE loc);
+  static void trace_token (yytoken_kind_t token, YYLTYPE loc);
@}
@end group
@@ -3116,8 +3116,8 @@ Thus, they belong in one or more @code{%code requires}:
@group
 %code @{
-  static void print_token (enum yytokentype token, YYSTYPE val);
+  static void print_token (yytoken_kind_t token, YYSTYPE val);
-  static void trace_token (enum yytokentype token, YYLTYPE loc);
+  static void trace_token (yytoken_kind_t token, YYLTYPE loc);
@}
@end group
@@ -3149,7 +3149,7 @@ might wish for Bison to insert the prototype into both the parser header
 file and the parser implementation file.  Since this function is not a
 dependency required by @code{YYSTYPE} or @code{YYLTYPE}, it doesn't make
 sense to move its prototype to a @code{%code requires}.  More importantly,
-since it depends upon @code{YYLTYPE} and @code{yytokentype}, @code{%code
+since it depends upon @code{YYLTYPE} and @code{yytoken_kind_t}, @code{%code
 requires} is not sufficient.  Instead, move its prototype from the
 unqualified @code{%code} to a @code{%code provides}:
@@ -3189,7 +3189,7 @@ unqualified @code{%code} to a @code{%code provides}:
@group
 %code provides @{
-  void trace_token (enum yytokentype token, YYLTYPE loc);
+  void trace_token (yytoken_kind_t token, YYLTYPE loc);
@}
@end group
@@ -3205,7 +3205,7 @@ unqualified @code{%code} to a @code{%code provides}:
@noindent
 Bison will insert the @code{trace_token} prototype into both the parser
 header file and the parser implementation file after the definitions for
-@code{yytokentype}, @code{YYLTYPE}, and @code{YYSTYPE}.
+@code{yytoken_kind_t}, @code{YYLTYPE}, and @code{YYSTYPE}.
 The above examples are careful to write directives in an order that reflects
 the layout of the generated parser implementation and header files:
@@ -5755,7 +5755,7 @@ so on.
 Contrary to defining @code{api.prefix}, some symbols are @emph{not} renamed
 by @code{%name-prefix}, for instance @code{YYDEBUG}, @code{YYTOKENTYPE},
-@code{yytokentype}, @code{YYSTYPE}, @code{YYLTYPE}.
+@code{yytoken_kind_t}, @code{YYSTYPE}, @code{YYLTYPE}.
@end deffn
@ifset defaultprec
@@ -6296,18 +6296,19 @@ introduced in Bison 3.0.
 all
@item Purpose:
-The output files normally define the tokens with Yacc-compatible token
+The output files normally define the enumeration of the @emph{token kinds}
-numbers: sequential numbers starting at 257 except for single character
+with Yacc-compatible token codes: sequential numbers starting at 257 except
-tokens which stand for themselves (e.g., in ASCII, @samp{'a'} is numbered
+for single character tokens which stand for themselves (e.g., in ASCII,
-65).  The parser however uses symbol numbers assigned sequentially starting
+@samp{'a'} is numbered 65).  The parser however uses @emph{symbol kinds}
-at 3.  Therefore each time the scanner returns an (external) token number,
+which are assigned numbers sequentially starting at 0.  Therefore each time
-it must be mapped to the (internal) symbol number.
+the scanner returns an (external) token kind, it must be mapped to the
 (internal) symbol kind.
-When @code{api.token.raw} is set, tokens are assigned their internal number,
+When @code{api.token.raw} is set, the code of the token kinds are forced to
-which saves one table lookup per token to map them from the external to the
+coincide with the symbol kind.  This saves one table lookup per token to map
-internal number, and also saves the generation of the mapping table.  The
+them from the token kind to the symbol kind, and also saves the generation
-gain is typically moderate, but in extreme cases (very simple user actions),
+of the mapping table.  The gain is typically moderate, but in extreme cases
-a 10% improvement can be observed.
+(very simple user actions), a 10% improvement can be observed.
 When @code{api.token.raw} is set, the grammar cannot use character literals
 (such as @samp{'a'}).
@@ -7138,13 +7139,14 @@ that need it.  @xref{Invocation}.
@subsection Calling Convention for @code{yylex}
 The value that @code{yylex} returns must be the positive numeric code for
-the type of token it has just found; a zero or negative value signifies
+the kind of token it has just found; a zero or negative value signifies
 end-of-input.
-When a token is referred to in the grammar rules by a name, that name in the
+When a token kind is referred to in the grammar rules by a name, that name
-parser implementation file becomes a C macro whose definition is the proper
+in the parser implementation file becomes an enumerator of the enum
-numeric code for that token kind.  So @code{yylex} can use the name to
+@code{yytoken_kind_t} whose definition is the proper numeric code for that
-indicate that type.  @xref{Symbols}.
+token kind.  So @code{yylex} should use the name to indicate that type.
@xref{Symbols}.
 When a token is referred to in the grammar rules by a character literal, the
 numeric code for that character is also the code for the token kind.  So
@@ -7160,12 +7162,13 @@ yylex (void)
@{
  @dots{}
  if (c == EOF)    /* Detect end-of-input. */
-    return 0;
+    return YYEOF;
  @dots{}
-  if (c == '+' || c == '-')
+  else if (c == '+' || c == '-')
    return c;      /* Assume token kind for '+' is '+'. */
  @dots{}
-  return INT;      /* Return the type of the token. */
+  else
    return INT;    /* Return the type of the token. */
  @dots{}
@}
@end example
@@ -7207,10 +7210,9 @@ The @code{yytname} table is generated only if you use the
@vindex yylval
 In an ordinary (nonreentrant) parser, the semantic value of the token must
-be stored into the global variable @code{yylval}.  When you are using
+be stored into the global variable @code{yylval}.  When you are using just
-just one data type for semantic values, @code{yylval} has that type.
+one data type for semantic values, @code{yylval} has that type.  Thus, if
-Thus, if the type is @code{int} (the default), you might write this in
+the type is @code{int} (the default), you might write this in @code{yylex}:
@code{yylex}:
@example
@group
@@ -10503,17 +10505,16 @@ calculator (@pxref{Mfcalc Declarations}):
@dots{} %% @dots{} %% @dots{}
 static void
-print_token_value (FILE *file, int type, YYSTYPE value)
+print_token_value (FILE *file, yytoken_kind_t kind, YYSTYPE value)
@{
-  if (type == VAR)
+  if (kind == VAR)
    fprintf (file, "%s", value.tptr->name);
-  else if (type == NUM)
+  else if (kind == NUM)
    fprintf (file, "%d", value.val);
@}
@end example
-@xref{Mfcalc Traces}, for the
+@xref{Mfcalc Traces}, for the proper use of @code{%printer}.
 proper use of @code{%printer}.
@c ================================================= Invoking Bison
@@ -11545,8 +11546,8 @@ Values}.
@end defcv
@defcv {Type} {parser} {token}
-A structure that contains (only) the @code{yytokentype} enumeration, which
+A structure that contains (only) the @code{yytoken_kind_t} enumeration,
-defines the tokens.  To refer to the token @code{FOO}, use
+which defines the tokens.  To refer to the token @code{FOO}, use
@code{yy::parser::token::FOO}.  The scanner can use @samp{typedef
 yy::parser::token token;} to ``import'' the token enumeration (@pxref{Calc++
 Scanner}).
@@ -12005,7 +12006,7 @@ The generated parser expects @code{yylex} to have the following prototype.
@deftypefun {int} yylex (@code{semantic_type*} @var{yylval}, @code{location_type*} @var{yylloc}, @var{type1} @var{arg1}, @dots{})
@deftypefunx {int} yylex (@code{semantic_type*} @var{yylval}, @var{type1} @var{arg1}, @dots{})
-Return the next token.  Its type is the return value, its semantic value and
+Return the next token.  Its kind is the return value, its semantic value and
 location (if enabled) being @var{yylval} and @var{yylloc}.  Invocations of
@samp{%lex-param @{@var{type1} @var{arg1}@}} yield additional arguments.
@end deftypefun
@@ -14580,6 +14581,22 @@ Data type of semantic values; @code{int} by default.
@xref{Value Type}.
@end deffn
@deffn {Type} yysymbol_kind_t
 An enum that includes all the symbols, tokens and nonterminals, of the
 grammar.  @xref{Syntax Error Reporting Function}.  The symbol kinds are used
 internally by the parser, and should not be confused with the token kinds:
 the symbol kind of a terminal symbol is not equal to its token kind! (Unless
@samp{%define api.token.raw} was used).
@end deffn
@deffn {Type} yytoken_kind_t
 An enum that includes all the declared @dfn{token kinds} declared with
@code{%token} (@pxref{Token Decl}).  These are the return values for
@code{yylex}.  They should not be confused with the @emph{symbol kinds},
 used internally by the parser.
@end deffn
@node Glossary
@appendix Glossary
@cindex glossary
@@ -14662,6 +14679,21 @@ performs some operation.
@item Input stream
 A continuous flow of data between devices or programs.
@item Kind
 ``Token'' and ``symbol'' are each overloaded to mean either a grammar symbol
 (kind) or all parse info (kind, value, location) associated with occurrences
 of that grammar symbol from the input.  To disambiguate, we use ``token
 kind'' and ``symbol kind'' to mean both grammar symbols and the types that
 represent them in a base programming language (C, C++, etc.). However, we
 use ``token'' and ``symbol'' without the word ``kind'' to mean parsed
 occurrences, and we append the word ``type'' to refer to the types that
 represent them in a base programming language.
 In summary: When you see ``kind'', interpret ``symbol'' or ``token'' to mean
 a @emph{grammar symbol}.  When you don't see ``kind'' (including when you
 see ``type''), interpret ``symbol'' or ``token'' to mean a @emph{parsed
 symbol}.
@item LAC (Lookahead Correction)
 A parsing mechanism that fixes the problem of delayed syntax error
 detection, which is caused by LR state merging, default reductions, and the
@@ -14761,6 +14793,10 @@ the language being parsed.  The start symbol is usually listed as the
 first nonterminal symbol in a language specification.
@xref{Start Decl}.
@item Symbol kind
 A finite enumeration of all the possible grammar symbols, as processed by
 the parser.  @xref{Symbols}.
@item Symbol table
 A data structure where symbol names and associated data are stored
 during parsing to allow for recognition and use of existing
@@ -14770,16 +14806,20 @@ information in repeated uses of a symbol.  @xref{Multi-function Calc}.
 An error encountered during parsing of an input stream due to invalid
 syntax.  @xref{Error Recovery}.
@item Terminal symbol
 A grammar symbol that has no rules in the grammar and therefore is
 grammatically indivisible.  The piece of text it represents is a token.
@xref{Language and Grammar}.
@item Token
 A basic, grammatically indivisible unit of a language.  The symbol
 that describes a token in the grammar is a terminal symbol.
 The input of the Bison parser is a stream of tokens which comes from
 the lexical analyzer.  @xref{Symbols}.
-@item Terminal symbol
+@item Token kind
-A grammar symbol that has no rules in the grammar and therefore is
+A finite enumeration of all the possible grammar terminals, as disciminated
-grammatically indivisible.  The piece of text it represents is a token.
+by the scanner.  @xref{Symbols}.
@xref{Language and Grammar}.
@item Unreachable state
 A parser state to which there does not exist a sequence of transitions from
--- a/examples/c/lexcalc/parse.y
+++ b/examples/c/lexcalc/parse.y
@@ -6,7 +6,7 @@
 {
  // Tell Flex the expected prototype of yylex.
 #define YY_DECL                                 \
-  enum yytokentype yylex (YYSTYPE* yylval, YYLTYPE *yylloc, int *nerrs)
+  yytoken_kind_t yylex (YYSTYPE* yylval, YYLTYPE *yylloc, int *nerrs)
  YY_DECL;
  void yyerror (YYLTYPE *loc, int *nerrs, const char *msg);
--- a/examples/c/reccalc/parse.y
+++ b/examples/c/reccalc/parse.y
@@ -26,7 +26,7 @@
  // Tell Flex the expected prototype of yylex.
  // The scanner argument must be named yyscanner.
 #define YY_DECL                                                         \
-  enum yytokentype yylex (YYSTYPE* yylval, yyscan_t yyscanner, result *res)
+  yytoken_kind_t yylex (YYSTYPE* yylval, yyscan_t yyscanner, result *res)
  YY_DECL;
  void yyerror (yyscan_t scanner, result *res, const char *msg, ...);
--- a/src/parse-gram.h
+++ b/src/parse-gram.h
@@ -72,7 +72,7 @@ extern int gram_debug;
  } value_type;
-/* Token type.  */
+/* Token kinds.  */
 #ifndef GRAM_TOKENTYPE
 # define GRAM_TOKENTYPE
  enum gram_tokentype
@@ -139,6 +139,7 @@ extern int gram_debug;
    PERCENT_UNION = 59,            /* "%union"  */
    PERCENT_EMPTY = 60             /* "%empty"  */
  };
  typedef enum gram_tokentype gram_token_kind_t;
 #endif
 /* Value type.  */
--- a/tests/regression.at
+++ b/tests/regression.at
@@ -146,8 +146,9 @@ void print_my_token (void);
 void
 print_my_token (void)
 {
-  enum yytokentype my_token = MY_TOKEN;
+  enum yytokentype tok1 = MY_TOKEN;
-  printf ("%d\n", my_token);
+  yytoken_kind_t   tok2 = MY_TOKEN;
  printf ("%d, %d\n", tok1, tok2);
 }
 %}
 %token MY_TOKEN