tokens: properly define the YYEOF token kind

Currently EOF is handled in an adhoc way, with a #define YYEOF 0 in
the implementation file.  As a result, the user has to define her own
EOF token if she wants to use it, which is a pity.

Give the $end token a visible kind name, YYEOF.  Except that in C,
where enums are not scoped, we would have collisions between all the
definitions of YYEOFs in the header files, so in C, make it
<api.PREFIX>EOF.

* data/skeletons/c.m4 (YYEOF): Override its name to avoid collisions.
Unless the user already gave it a different name.
* data/skeletons/glr.c (YYEOF): Remove.
Use ]b4_symbol(0, [id])[ instead.
Add support for "pre_epilogue", for glr.cc.
* data/skeletons/glr.cc: Remove dead code (never emitted #undefs).
* data/skeletons/yacc.c
* src/parse-gram.c
* src/reader.c
* src/symtab.c
* tests/actions.at
* tests/input.at
This commit is contained in:
Akim Demaille
2020-04-10 18:31:07 +02:00
parent 95421df67b
commit e50de09886
11 changed files with 70 additions and 28 deletions

16
TODO
View File

@@ -6,7 +6,7 @@ should not have to dispatch to several APIs.
** Documentation
- yyexpected_tokens in all the languages.
- YYENOMEM
- YYERRCODE?
- YYERRCODE, YYUNDEF, YYEOF
- i18n in Java
- symbol.type_get should be kind_get, and it's not documented.
@@ -85,6 +85,20 @@ push parsers on top of pull parser. Which is currently not relevant, since
push parsers are measurably slower.
* Bison 3.7
** Counter example generation
See https://github.com/akimd/bison/pull/15.
** Clean up
Rename user_token_number for tokens as "code". It's not a "user number",
it's the token code, and the user can control it, but this code always
exists.
Rename endtoken as eoftoken.
Don't rename in Bison 3.6 (it would be logical to do so) because that
would probably create many conflicts in Vincent's work (see previous point).
* Bison 3.8
** Unit rules / Injection rules (Akim Demaille)
Maybe we could expand unit rules (or "injections", see
https://homepages.cwi.nl/~daybuild/daily-books/syntax/2-sdf/sdf.html), i.e.,

View File

@@ -537,11 +537,9 @@ m4_define([b4_symbol_map],
# Whether NUM denotes a token that has an exported definition (i.e.,
# shows in enum yytokentype).
m4_define([b4_token_visible_if],
[m4_case(b4_symbol([$1], [tag]),
[$undefined], [$2],
[b4_symbol_if([$1], [is_token],
[b4_symbol_if([$1], [has_id], [$2], [$3])],
[$3])])])
[b4_symbol_if([$1], [is_token],
[b4_symbol_if([$1], [has_id], [$2], [$3])],
[$3])])
# b4_token_has_definition(NUM)

View File

@@ -431,8 +431,11 @@ static const b4_int_type_for([$2]) yy$1[[]] =
## ------------- ##
# Because C enums are not scoped, because tokens are exposed in the
# header, and because these tokens are common to all the parser, we
# header, and because these tokens are common to all the parsers, we
# need to make sure their names don't collide: use the api.prefix.
# YYEOF is special, since the user may give it a different name.
m4_if(b4_symbol(0, id), [YYEOF],
[m4_define([b4_symbol(0, id)], [b4_api_PREFIX[][EOF]])])
m4_define([b4_symbol(1, id)], [b4_api_PREFIX[][ERRCODE]])
m4_define([b4_symbol(2, id)], [b4_api_PREFIX[][UNDEF]])

View File

@@ -429,7 +429,6 @@ int yychar;])[
enum { YYENOMEM = -2 };
static const int YYEOF = 0;
static const int YYEMPTY = -2;
typedef enum { yyok, yyaccept, yyabort, yyerr } YYRESULTTAG;
@@ -833,9 +832,9 @@ yygetToken (int *yycharp][]b4_pure_if([, yyGLRStack* yystackp])[]b4_user_formals
#endif // YY_EXCEPTIONS]], [[
*yycharp = ]b4_lex[;]])[
}
if (*yycharp <= YYEOF)
if (*yycharp <= ]b4_symbol(0, [id])[)
{
*yycharp = YYEOF;
*yycharp = ]b4_symbol(0, [id])[;
yytoken = ]b4_symbol_prefix[YYEOF;
YY_DPRINTF ((stderr, "Now at end of input.\n"));
}
@@ -2311,7 +2310,7 @@ yyrecoverSyntaxError (yyGLRStack* yystackp]b4_user_formals[)
{
yysymbol_kind_t yytoken;
int yyj;
if (yychar == YYEOF)
if (yychar == ]b4_symbol(0, [id])[)
yyFail (yystackp][]b4_lpure_args[, YY_NULLPTR);
if (yychar != YYEMPTY)
{]b4_locations_if([[
@@ -2724,6 +2723,7 @@ m4_if(b4_prefix, [yy], [],
#define yynerrs ]b4_prefix[nerrs]b4_locations_if([[
#define yylloc ]b4_prefix[lloc]])])[
]b4_percent_code_get([[epilogue]])[]dnl
]m4_ifdef([b4_pre_epilogue], [b4_pre_epilogue])[]dnl This is a hack for glr.cc. To remove when we have a better glr.cc.
b4_percent_code_get([[epilogue]])[]dnl
b4_epilogue[]dnl
b4_output_end

View File

@@ -105,6 +105,12 @@ yyerror (]b4_locations_if([[const ]b4_namespace_ref::b4_parser_class[::location_
]])[]m4_ifset([b4_parse_param], [b4_formals(b4_parse_param),
])[const char* msg);]])[
]b4_percent_define_flag_if([[global_tokens_and_yystype]], [],
[m4_define([b4_pre_epilogue],
[[/* The user is using the C++ token type, not the C one. */
#undef ]b4_symbol(0, [id])
])])[
# Hijack the epilogue to define implementations (yyerror, parser member
# functions etc.).
]m4_append([b4_epilogue],
@@ -329,8 +335,14 @@ b4_percent_code_get([[requires]])[
]dnl Redirections for glr.c.
b4_percent_define_flag_if([[global_tokens_and_yystype]],
[b4_token_defines])
[
[b4_token_defines
])[
]b4_namespace_close[
]dnl Map the name used in c.m4 to the one used in c++.m4.
[#undef ]b4_symbol(0, [id])[
#define ]b4_symbol(0, [id])[ ]b4_namespace_ref[::]b4_parser_class[::token::]b4_symbol(0, [id])[
#ifndef ]b4_api_PREFIX[STYPE
# define ]b4_api_PREFIX[STYPE ]b4_namespace_ref[::]b4_parser_class[::semantic_type
#endif
@@ -338,7 +350,6 @@ b4_percent_define_flag_if([[global_tokens_and_yystype]],
# define ]b4_api_PREFIX[LTYPE ]b4_namespace_ref[::]b4_parser_class[::location_type
#endif
]b4_namespace_close[
]m4_define([b4_declare_symbol_enum],
[[typedef ]b4_namespace_ref[::]b4_parser_class[::symbol_kind_type yysymbol_kind_t;
#define ]b4_symbol_prefix[YYEMPTY ]b4_namespace_ref[::]b4_parser_class[::symbol_kind::]b4_symbol_prefix[YYEMPTY

View File

@@ -716,7 +716,6 @@ enum { YYENOMEM = -2 };
#define yyerrok (yyerrstatus = 0)
#define yyclearin (yychar = YYEMPTY)
#define YYEMPTY (-2)
#define YYEOF 0
#define YYACCEPT goto yyacceptlab
#define YYABORT goto yyabortlab
@@ -1760,9 +1759,9 @@ yyread_pushed_token:]])[
yychar = ]b4_lex[;]])[
}
if (yychar <= YYEOF)
if (yychar <= ]b4_symbol(0, [id])[)
{
yychar = YYEOF;
yychar = ]b4_symbol(0, [id])[;
yytoken = ]b4_symbol_prefix[YYEOF;
YYDPRINTF ((stderr, "Now at end of input.\n"));
}
@@ -1957,10 +1956,10 @@ yyerrlab:
/* If just tried and failed to reuse lookahead token after an
error, discard it. */
if (yychar <= YYEOF)
if (yychar <= ]b4_symbol(0, [id])[)
{
/* Return failure if at end of input. */
if (yychar == YYEOF)
if (yychar == ]b4_symbol(0, [id])[)
YYABORT;
}
else

View File

@@ -900,7 +900,6 @@ enum { YYENOMEM = -2 };
#define yyerrok (yyerrstatus = 0)
#define yyclearin (yychar = YYEMPTY)
#define YYEMPTY (-2)
#define YYEOF 0
#define YYACCEPT goto yyacceptlab
#define YYABORT goto yyabortlab
@@ -1950,9 +1949,9 @@ yybackup:
yychar = yylex (&yylval, &yylloc);
}
if (yychar <= YYEOF)
if (yychar <= GRAM_EOF)
{
yychar = YYEOF;
yychar = GRAM_EOF;
yytoken = YYSYMBOL_YYEOF;
YYDPRINTF ((stderr, "Now at end of input.\n"));
}
@@ -2643,10 +2642,10 @@ yyerrlab:
/* If just tried and failed to reuse lookahead token after an
error, discard it. */
if (yychar <= YYEOF)
if (yychar <= GRAM_EOF)
{
/* Return failure if at end of input. */
if (yychar == YYEOF)
if (yychar == GRAM_EOF)
YYABORT;
}
else

View File

@@ -778,11 +778,16 @@ check_and_convert_grammar (void)
/* If the user did not define her ENDTOKEN, do it now. */
if (!endtoken)
{
endtoken = symbol_get ("$end", empty_loc);
endtoken = symbol_get ("YYEOF", empty_loc);
endtoken->content->class = token_sym;
endtoken->content->number = 0;
/* Value specified by POSIX. */
endtoken->content->user_token_number = 0;
{
symbol *alias = symbol_get ("$end", empty_loc);
symbol_class_set (alias, token_sym, empty_loc, false);
symbol_make_alias (endtoken, alias, empty_loc);
}
}
/* Report any undefined symbols and consider them nonterminals. */

View File

@@ -70,9 +70,12 @@ bool tag_seen = false;
static bool
symbol_is_user_defined (symbol *sym)
{
const bool eof_is_user_defined
= !endtoken->alias || STRNEQ (endtoken->alias->tag, "$end");
return sym->tag[0] != '$'
&& (eof_is_user_defined || (sym != endtoken && sym->alias != errtoken))
&& sym != errtoken && sym->alias != errtoken
&& sym != undeftoken && sym->alias != undeftoken;
&& sym != undeftoken && sym->alias != undeftoken;
}

View File

@@ -1303,6 +1303,15 @@ AT_CLEANUP
AT_SETUP([Default %printer and %destructor for user-defined end token])
# Enable declaration of default %printer/%destructor. Make the parser
# use these for all user-declared grammar symbols for which the user
# does not declare a specific %printer/%destructor. Thus, the parser
# uses it for token 0 if the user declares it but not if Bison
# generates it as $end. Discussed starting at
# <http://lists.gnu.org/r/bison-patches/2006-02/msg00064.html>,
# <http://lists.gnu.org/r/bison-patches/2006-06/msg00091.html>, and
# <http://lists.gnu.org/r/bison-patches/2006-07/msg00019.html>.
# AT_TEST(TYPED)
# --------------
m4_pushdef([AT_TEST],

View File

@@ -344,6 +344,7 @@ exp:
int main (void)
{
assert (YYERRCODE == 123);
assert (YYTRANSLATE (YYEOF) == YYSYMBOL_YYEOF);
assert (YYTRANSLATE (YYERRCODE) == YYSYMBOL_YYERROR);
assert (YYTRANSLATE (YYUNDEF) == YYSYMBOL_YYUNDEF);
return 0;
@@ -430,7 +431,7 @@ exp:;
AT_BISON_CHECK([-Wno-other -S./dump-symbols.m4 input.y])
AT_CHECK([cat symbols.csv], [],
[[number, class, tag, id, user_number, type,
0, Token, $end, , 0, ,
0, Token, $end, YYEOF, 0, ,
1, Token, error, YYERRCODE, 256, ,
2, Token, $undefined, YYUNDEF, 257, ,
3, Token, 'a', , 97, ,