tokens: properly define the YYEOF token kind

Currently EOF is handled in an adhoc way, with a #define YYEOF 0 in the implementation file. As a result, the user has to define her own EOF token if she wants to use it, which is a pity. Give the $end token a visible kind name, YYEOF. Except that in C, where enums are not scoped, we would have collisions between all the definitions of YYEOFs in the header files, so in C, make it <api.PREFIX>EOF. * data/skeletons/c.m4 (YYEOF): Override its name to avoid collisions. Unless the user already gave it a different name. * data/skeletons/glr.c (YYEOF): Remove. Use ]b4_symbol(0, [id])[ instead. Add support for "pre_epilogue", for glr.cc. * data/skeletons/glr.cc: Remove dead code (never emitted #undefs). * data/skeletons/yacc.c * src/parse-gram.c * src/reader.c * src/symtab.c * tests/actions.at * tests/input.at
2026-06-13 19:22:12 +00:00 · 2020-04-10 18:31:07 +02:00
parent 95421df67b
commit e50de09886
11 changed files with 70 additions and 28 deletions
@@ -6,7 +6,7 @@ should not have to dispatch to several APIs.
 ** Documentation
 - yyexpected_tokens in all the languages.
 - YYENOMEM
- YYERRCODE?
+- YYERRCODE, YYUNDEF, YYEOF
 - i18n in Java
 - symbol.type_get should be kind_get, and it's not documented.

@@ -85,6 +85,20 @@ push parsers on top of pull parser.  Which is currently not relevant, since
 push parsers are measurably slower.

 * Bison 3.7
+** Counter example generation
+See https://github.com/akimd/bison/pull/15.
+
+** Clean up
+Rename user_token_number for tokens as "code".  It's not a "user number",
+it's the token code, and the user can control it, but this code always
+exists.
+
+Rename endtoken as eoftoken.
+
+Don't rename in Bison 3.6 (it would be logical to do so) because that
+would probably create many conflicts in Vincent's work (see previous point).
+
+* Bison 3.8
 ** Unit rules / Injection rules (Akim Demaille)
 Maybe we could expand unit rules (or "injections", see
 https://homepages.cwi.nl/~daybuild/daily-books/syntax/2-sdf/sdf.html), i.e.,
@@ -537,11 +537,9 @@ m4_define([b4_symbol_map],
 # Whether NUM denotes a token that has an exported definition (i.e.,
 # shows in enum yytokentype).
 m4_define([b4_token_visible_if],
-[m4_case(b4_symbol([$1], [tag]),
-         [$undefined], [$2],
-         [b4_symbol_if([$1], [is_token],
-                       [b4_symbol_if([$1], [has_id], [$2], [$3])],
-                       [$3])])])
+[b4_symbol_if([$1], [is_token],
+              [b4_symbol_if([$1], [has_id], [$2], [$3])],
+              [$3])])


 # b4_token_has_definition(NUM)
@@ -431,8 +431,11 @@ static const b4_int_type_for([$2]) yy$1[[]] =
 ## ------------- ##

 # Because C enums are not scoped, because tokens are exposed in the
-# header, and because these tokens are common to all the parser, we
+# header, and because these tokens are common to all the parsers, we
 # need to make sure their names don't collide: use the api.prefix.
+# YYEOF is special, since the user may give it a different name.
+m4_if(b4_symbol(0, id), [YYEOF],
+     [m4_define([b4_symbol(0, id)],  [b4_api_PREFIX[][EOF]])])
 m4_define([b4_symbol(1, id)],  [b4_api_PREFIX[][ERRCODE]])
 m4_define([b4_symbol(2, id)],  [b4_api_PREFIX[][UNDEF]])

@@ -429,7 +429,6 @@ int yychar;])[

 enum { YYENOMEM = -2 };

-static const int YYEOF = 0;
 static const int YYEMPTY = -2;

 typedef enum { yyok, yyaccept, yyabort, yyerr } YYRESULTTAG;
@@ -833,9 +832,9 @@ yygetToken (int *yycharp][]b4_pure_if([, yyGLRStack* yystackp])[]b4_user_formals
 #endif // YY_EXCEPTIONS]], [[
      *yycharp = ]b4_lex[;]])[
    }
-  if (*yycharp <= YYEOF)
+  if (*yycharp <= ]b4_symbol(0, [id])[)
    {
-      *yycharp = YYEOF;
+      *yycharp = ]b4_symbol(0, [id])[;
      yytoken = ]b4_symbol_prefix[YYEOF;
      YY_DPRINTF ((stderr, "Now at end of input.\n"));
    }
@@ -2311,7 +2310,7 @@ yyrecoverSyntaxError (yyGLRStack* yystackp]b4_user_formals[)
      {
        yysymbol_kind_t yytoken;
        int yyj;
-        if (yychar == YYEOF)
+        if (yychar == ]b4_symbol(0, [id])[)
          yyFail (yystackp][]b4_lpure_args[, YY_NULLPTR);
        if (yychar != YYEMPTY)
          {]b4_locations_if([[
@@ -2724,6 +2723,7 @@ m4_if(b4_prefix, [yy], [],
 #define yynerrs ]b4_prefix[nerrs]b4_locations_if([[
 #define yylloc  ]b4_prefix[lloc]])])[

-]b4_percent_code_get([[epilogue]])[]dnl
+]m4_ifdef([b4_pre_epilogue], [b4_pre_epilogue])[]dnl This is a hack for glr.cc.  To remove when we have a better glr.cc.
+b4_percent_code_get([[epilogue]])[]dnl
 b4_epilogue[]dnl
 b4_output_end
@@ -105,6 +105,12 @@ yyerror (]b4_locations_if([[const ]b4_namespace_ref::b4_parser_class[::location_
         ]])[]m4_ifset([b4_parse_param], [b4_formals(b4_parse_param),
         ])[const char* msg);]])[

+]b4_percent_define_flag_if([[global_tokens_and_yystype]], [],
+[m4_define([b4_pre_epilogue],
+[[/* The user is using the C++ token type, not the C one. */
+#undef ]b4_symbol(0, [id])
+])])[
+
 # Hijack the epilogue to define implementations (yyerror, parser member
 # functions etc.).
 ]m4_append([b4_epilogue],
@@ -329,8 +335,14 @@ b4_percent_code_get([[requires]])[

 ]dnl Redirections for glr.c.
 b4_percent_define_flag_if([[global_tokens_and_yystype]],
-[b4_token_defines])
-[
+[b4_token_defines
+])[
+]b4_namespace_close[
+
+]dnl Map the name used in c.m4 to the one used in c++.m4.
+[#undef ]b4_symbol(0, [id])[
+#define ]b4_symbol(0, [id])[ ]b4_namespace_ref[::]b4_parser_class[::token::]b4_symbol(0, [id])[
+
 #ifndef ]b4_api_PREFIX[STYPE
 # define ]b4_api_PREFIX[STYPE ]b4_namespace_ref[::]b4_parser_class[::semantic_type
 #endif
@@ -338,7 +350,6 @@ b4_percent_define_flag_if([[global_tokens_and_yystype]],
 # define ]b4_api_PREFIX[LTYPE ]b4_namespace_ref[::]b4_parser_class[::location_type
 #endif

-]b4_namespace_close[
 ]m4_define([b4_declare_symbol_enum],
 [[typedef ]b4_namespace_ref[::]b4_parser_class[::symbol_kind_type yysymbol_kind_t;
 #define ]b4_symbol_prefix[YYEMPTY ]b4_namespace_ref[::]b4_parser_class[::symbol_kind::]b4_symbol_prefix[YYEMPTY
@@ -716,7 +716,6 @@ enum { YYENOMEM = -2 };
 #define yyerrok         (yyerrstatus = 0)
 #define yyclearin       (yychar = YYEMPTY)
 #define YYEMPTY         (-2)
-#define YYEOF           0

 #define YYACCEPT        goto yyacceptlab
 #define YYABORT         goto yyabortlab
@@ -1760,9 +1759,9 @@ yyread_pushed_token:]])[
      yychar = ]b4_lex[;]])[
    }

-  if (yychar <= YYEOF)
+  if (yychar <= ]b4_symbol(0, [id])[)
    {
-      yychar = YYEOF;
+      yychar = ]b4_symbol(0, [id])[;
      yytoken = ]b4_symbol_prefix[YYEOF;
      YYDPRINTF ((stderr, "Now at end of input.\n"));
    }
@@ -1957,10 +1956,10 @@ yyerrlab:
      /* If just tried and failed to reuse lookahead token after an
         error, discard it.  */

-      if (yychar <= YYEOF)
+      if (yychar <= ]b4_symbol(0, [id])[)
        {
          /* Return failure if at end of input.  */
-          if (yychar == YYEOF)
+          if (yychar == ]b4_symbol(0, [id])[)
            YYABORT;
        }
      else
@@ -900,7 +900,6 @@ enum { YYENOMEM = -2 };
 #define yyerrok         (yyerrstatus = 0)
 #define yyclearin       (yychar = YYEMPTY)
 #define YYEMPTY         (-2)
-#define YYEOF           0

 #define YYACCEPT        goto yyacceptlab
 #define YYABORT         goto yyabortlab
@@ -1950,9 +1949,9 @@ yybackup:
      yychar = yylex (&yylval, &yylloc);
    }

-  if (yychar <= YYEOF)
+  if (yychar <= GRAM_EOF)
    {
-      yychar = YYEOF;
+      yychar = GRAM_EOF;
      yytoken = YYSYMBOL_YYEOF;
      YYDPRINTF ((stderr, "Now at end of input.\n"));
    }
@@ -2643,10 +2642,10 @@ yyerrlab:
      /* If just tried and failed to reuse lookahead token after an
         error, discard it.  */

-      if (yychar <= YYEOF)
+      if (yychar <= GRAM_EOF)
        {
          /* Return failure if at end of input.  */
-          if (yychar == YYEOF)
+          if (yychar == GRAM_EOF)
            YYABORT;
        }
      else
@@ -778,11 +778,16 @@ check_and_convert_grammar (void)
  /* If the user did not define her ENDTOKEN, do it now. */
  if (!endtoken)
    {
-      endtoken = symbol_get ("$end", empty_loc);
+      endtoken = symbol_get ("YYEOF", empty_loc);
      endtoken->content->class = token_sym;
      endtoken->content->number = 0;
      /* Value specified by POSIX.  */
      endtoken->content->user_token_number = 0;
+      {
+        symbol *alias = symbol_get ("$end", empty_loc);
+        symbol_class_set (alias, token_sym, empty_loc, false);
+        symbol_make_alias (endtoken, alias, empty_loc);
+      }
    }

  /* Report any undefined symbols and consider them nonterminals.  */
@@ -70,9 +70,12 @@ bool tag_seen = false;
 static bool
 symbol_is_user_defined (symbol *sym)
 {
+  const bool eof_is_user_defined
+    = !endtoken->alias || STRNEQ (endtoken->alias->tag, "$end");
  return sym->tag[0] != '$'
+    && (eof_is_user_defined || (sym != endtoken && sym->alias != errtoken))
    && sym != errtoken && sym->alias != errtoken
-    && sym != undeftoken  && sym->alias != undeftoken;
+    && sym != undeftoken && sym->alias != undeftoken;
 }


@@ -1303,6 +1303,15 @@ AT_CLEANUP

 AT_SETUP([Default %printer and %destructor for user-defined end token])

+# Enable declaration of default %printer/%destructor.  Make the parser
+# use these for all user-declared grammar symbols for which the user
+# does not declare a specific %printer/%destructor.  Thus, the parser
+# uses it for token 0 if the user declares it but not if Bison
+# generates it as $end.  Discussed starting at
+# <http://lists.gnu.org/r/bison-patches/2006-02/msg00064.html>,
+# <http://lists.gnu.org/r/bison-patches/2006-06/msg00091.html>, and
+# <http://lists.gnu.org/r/bison-patches/2006-07/msg00019.html>.
+
 # AT_TEST(TYPED)
 # --------------
 m4_pushdef([AT_TEST],
@@ -344,6 +344,7 @@ exp:
 int main (void)
 {
  assert (YYERRCODE == 123);
+  assert (YYTRANSLATE (YYEOF) == YYSYMBOL_YYEOF);
  assert (YYTRANSLATE (YYERRCODE) == YYSYMBOL_YYERROR);
  assert (YYTRANSLATE (YYUNDEF) == YYSYMBOL_YYUNDEF);
  return 0;
@@ -430,7 +431,7 @@ exp:;
 AT_BISON_CHECK([-Wno-other -S./dump-symbols.m4 input.y])
 AT_CHECK([cat symbols.csv], [],
 [[number, class, tag, id, user_number, type,
-0, Token, $end, , 0, ,
+0, Token, $end, YYEOF, 0, ,
 1, Token, error, YYERRCODE, 256, ,
 2, Token, $undefined, YYUNDEF, 257, ,
 3, Token, 'a', , 97, ,