diff --git a/TODO b/TODO index 37a80460..4da4100d 100644 --- a/TODO +++ b/TODO @@ -6,7 +6,7 @@ should not have to dispatch to several APIs. ** Documentation - yyexpected_tokens in all the languages. - YYENOMEM -- YYERRCODE? +- YYERRCODE, YYUNDEF, YYEOF - i18n in Java - symbol.type_get should be kind_get, and it's not documented. @@ -85,6 +85,20 @@ push parsers on top of pull parser. Which is currently not relevant, since push parsers are measurably slower. * Bison 3.7 +** Counter example generation +See https://github.com/akimd/bison/pull/15. + +** Clean up +Rename user_token_number for tokens as "code". It's not a "user number", +it's the token code, and the user can control it, but this code always +exists. + +Rename endtoken as eoftoken. + +Don't rename in Bison 3.6 (it would be logical to do so) because that +would probably create many conflicts in Vincent's work (see previous point). + +* Bison 3.8 ** Unit rules / Injection rules (Akim Demaille) Maybe we could expand unit rules (or "injections", see https://homepages.cwi.nl/~daybuild/daily-books/syntax/2-sdf/sdf.html), i.e., diff --git a/data/skeletons/bison.m4 b/data/skeletons/bison.m4 index 4739f0fb..c1b04077 100644 --- a/data/skeletons/bison.m4 +++ b/data/skeletons/bison.m4 @@ -537,11 +537,9 @@ m4_define([b4_symbol_map], # Whether NUM denotes a token that has an exported definition (i.e., # shows in enum yytokentype). m4_define([b4_token_visible_if], -[m4_case(b4_symbol([$1], [tag]), - [$undefined], [$2], - [b4_symbol_if([$1], [is_token], - [b4_symbol_if([$1], [has_id], [$2], [$3])], - [$3])])]) +[b4_symbol_if([$1], [is_token], + [b4_symbol_if([$1], [has_id], [$2], [$3])], + [$3])]) # b4_token_has_definition(NUM) diff --git a/data/skeletons/c.m4 b/data/skeletons/c.m4 index 8013141d..e517259d 100644 --- a/data/skeletons/c.m4 +++ b/data/skeletons/c.m4 @@ -431,8 +431,11 @@ static const b4_int_type_for([$2]) yy$1[[]] = ## ------------- ## # Because C enums are not scoped, because tokens are exposed in the -# header, and because these tokens are common to all the parser, we +# header, and because these tokens are common to all the parsers, we # need to make sure their names don't collide: use the api.prefix. +# YYEOF is special, since the user may give it a different name. +m4_if(b4_symbol(0, id), [YYEOF], + [m4_define([b4_symbol(0, id)], [b4_api_PREFIX[][EOF]])]) m4_define([b4_symbol(1, id)], [b4_api_PREFIX[][ERRCODE]]) m4_define([b4_symbol(2, id)], [b4_api_PREFIX[][UNDEF]]) diff --git a/data/skeletons/glr.c b/data/skeletons/glr.c index 10ee46c4..1fd042fb 100644 --- a/data/skeletons/glr.c +++ b/data/skeletons/glr.c @@ -429,7 +429,6 @@ int yychar;])[ enum { YYENOMEM = -2 }; -static const int YYEOF = 0; static const int YYEMPTY = -2; typedef enum { yyok, yyaccept, yyabort, yyerr } YYRESULTTAG; @@ -833,9 +832,9 @@ yygetToken (int *yycharp][]b4_pure_if([, yyGLRStack* yystackp])[]b4_user_formals #endif // YY_EXCEPTIONS]], [[ *yycharp = ]b4_lex[;]])[ } - if (*yycharp <= YYEOF) + if (*yycharp <= ]b4_symbol(0, [id])[) { - *yycharp = YYEOF; + *yycharp = ]b4_symbol(0, [id])[; yytoken = ]b4_symbol_prefix[YYEOF; YY_DPRINTF ((stderr, "Now at end of input.\n")); } @@ -2311,7 +2310,7 @@ yyrecoverSyntaxError (yyGLRStack* yystackp]b4_user_formals[) { yysymbol_kind_t yytoken; int yyj; - if (yychar == YYEOF) + if (yychar == ]b4_symbol(0, [id])[) yyFail (yystackp][]b4_lpure_args[, YY_NULLPTR); if (yychar != YYEMPTY) {]b4_locations_if([[ @@ -2724,6 +2723,7 @@ m4_if(b4_prefix, [yy], [], #define yynerrs ]b4_prefix[nerrs]b4_locations_if([[ #define yylloc ]b4_prefix[lloc]])])[ -]b4_percent_code_get([[epilogue]])[]dnl +]m4_ifdef([b4_pre_epilogue], [b4_pre_epilogue])[]dnl This is a hack for glr.cc. To remove when we have a better glr.cc. +b4_percent_code_get([[epilogue]])[]dnl b4_epilogue[]dnl b4_output_end diff --git a/data/skeletons/glr.cc b/data/skeletons/glr.cc index 9d3b07a5..c6159c3a 100644 --- a/data/skeletons/glr.cc +++ b/data/skeletons/glr.cc @@ -105,6 +105,12 @@ yyerror (]b4_locations_if([[const ]b4_namespace_ref::b4_parser_class[::location_ ]])[]m4_ifset([b4_parse_param], [b4_formals(b4_parse_param), ])[const char* msg);]])[ +]b4_percent_define_flag_if([[global_tokens_and_yystype]], [], +[m4_define([b4_pre_epilogue], +[[/* The user is using the C++ token type, not the C one. */ +#undef ]b4_symbol(0, [id]) +])])[ + # Hijack the epilogue to define implementations (yyerror, parser member # functions etc.). ]m4_append([b4_epilogue], @@ -329,8 +335,14 @@ b4_percent_code_get([[requires]])[ ]dnl Redirections for glr.c. b4_percent_define_flag_if([[global_tokens_and_yystype]], -[b4_token_defines]) -[ +[b4_token_defines +])[ +]b4_namespace_close[ + +]dnl Map the name used in c.m4 to the one used in c++.m4. +[#undef ]b4_symbol(0, [id])[ +#define ]b4_symbol(0, [id])[ ]b4_namespace_ref[::]b4_parser_class[::token::]b4_symbol(0, [id])[ + #ifndef ]b4_api_PREFIX[STYPE # define ]b4_api_PREFIX[STYPE ]b4_namespace_ref[::]b4_parser_class[::semantic_type #endif @@ -338,7 +350,6 @@ b4_percent_define_flag_if([[global_tokens_and_yystype]], # define ]b4_api_PREFIX[LTYPE ]b4_namespace_ref[::]b4_parser_class[::location_type #endif -]b4_namespace_close[ ]m4_define([b4_declare_symbol_enum], [[typedef ]b4_namespace_ref[::]b4_parser_class[::symbol_kind_type yysymbol_kind_t; #define ]b4_symbol_prefix[YYEMPTY ]b4_namespace_ref[::]b4_parser_class[::symbol_kind::]b4_symbol_prefix[YYEMPTY diff --git a/data/skeletons/yacc.c b/data/skeletons/yacc.c index 12bb7fe1..73679084 100644 --- a/data/skeletons/yacc.c +++ b/data/skeletons/yacc.c @@ -716,7 +716,6 @@ enum { YYENOMEM = -2 }; #define yyerrok (yyerrstatus = 0) #define yyclearin (yychar = YYEMPTY) #define YYEMPTY (-2) -#define YYEOF 0 #define YYACCEPT goto yyacceptlab #define YYABORT goto yyabortlab @@ -1760,9 +1759,9 @@ yyread_pushed_token:]])[ yychar = ]b4_lex[;]])[ } - if (yychar <= YYEOF) + if (yychar <= ]b4_symbol(0, [id])[) { - yychar = YYEOF; + yychar = ]b4_symbol(0, [id])[; yytoken = ]b4_symbol_prefix[YYEOF; YYDPRINTF ((stderr, "Now at end of input.\n")); } @@ -1957,10 +1956,10 @@ yyerrlab: /* If just tried and failed to reuse lookahead token after an error, discard it. */ - if (yychar <= YYEOF) + if (yychar <= ]b4_symbol(0, [id])[) { /* Return failure if at end of input. */ - if (yychar == YYEOF) + if (yychar == ]b4_symbol(0, [id])[) YYABORT; } else diff --git a/src/parse-gram.c b/src/parse-gram.c index ed6794b2..d3bb2ea4 100644 --- a/src/parse-gram.c +++ b/src/parse-gram.c @@ -900,7 +900,6 @@ enum { YYENOMEM = -2 }; #define yyerrok (yyerrstatus = 0) #define yyclearin (yychar = YYEMPTY) #define YYEMPTY (-2) -#define YYEOF 0 #define YYACCEPT goto yyacceptlab #define YYABORT goto yyabortlab @@ -1950,9 +1949,9 @@ yybackup: yychar = yylex (&yylval, &yylloc); } - if (yychar <= YYEOF) + if (yychar <= GRAM_EOF) { - yychar = YYEOF; + yychar = GRAM_EOF; yytoken = YYSYMBOL_YYEOF; YYDPRINTF ((stderr, "Now at end of input.\n")); } @@ -2643,10 +2642,10 @@ yyerrlab: /* If just tried and failed to reuse lookahead token after an error, discard it. */ - if (yychar <= YYEOF) + if (yychar <= GRAM_EOF) { /* Return failure if at end of input. */ - if (yychar == YYEOF) + if (yychar == GRAM_EOF) YYABORT; } else diff --git a/src/reader.c b/src/reader.c index c386f433..bffca16d 100644 --- a/src/reader.c +++ b/src/reader.c @@ -778,11 +778,16 @@ check_and_convert_grammar (void) /* If the user did not define her ENDTOKEN, do it now. */ if (!endtoken) { - endtoken = symbol_get ("$end", empty_loc); + endtoken = symbol_get ("YYEOF", empty_loc); endtoken->content->class = token_sym; endtoken->content->number = 0; /* Value specified by POSIX. */ endtoken->content->user_token_number = 0; + { + symbol *alias = symbol_get ("$end", empty_loc); + symbol_class_set (alias, token_sym, empty_loc, false); + symbol_make_alias (endtoken, alias, empty_loc); + } } /* Report any undefined symbols and consider them nonterminals. */ diff --git a/src/symtab.c b/src/symtab.c index fd96b827..b236fc36 100644 --- a/src/symtab.c +++ b/src/symtab.c @@ -70,9 +70,12 @@ bool tag_seen = false; static bool symbol_is_user_defined (symbol *sym) { + const bool eof_is_user_defined + = !endtoken->alias || STRNEQ (endtoken->alias->tag, "$end"); return sym->tag[0] != '$' + && (eof_is_user_defined || (sym != endtoken && sym->alias != errtoken)) && sym != errtoken && sym->alias != errtoken - && sym != undeftoken && sym->alias != undeftoken; + && sym != undeftoken && sym->alias != undeftoken; } diff --git a/tests/actions.at b/tests/actions.at index 9fa630df..223d00d2 100644 --- a/tests/actions.at +++ b/tests/actions.at @@ -1303,6 +1303,15 @@ AT_CLEANUP AT_SETUP([Default %printer and %destructor for user-defined end token]) +# Enable declaration of default %printer/%destructor. Make the parser +# use these for all user-declared grammar symbols for which the user +# does not declare a specific %printer/%destructor. Thus, the parser +# uses it for token 0 if the user declares it but not if Bison +# generates it as $end. Discussed starting at +# , +# , and +# . + # AT_TEST(TYPED) # -------------- m4_pushdef([AT_TEST], diff --git a/tests/input.at b/tests/input.at index 6d434350..2250406d 100644 --- a/tests/input.at +++ b/tests/input.at @@ -344,6 +344,7 @@ exp: int main (void) { assert (YYERRCODE == 123); + assert (YYTRANSLATE (YYEOF) == YYSYMBOL_YYEOF); assert (YYTRANSLATE (YYERRCODE) == YYSYMBOL_YYERROR); assert (YYTRANSLATE (YYUNDEF) == YYSYMBOL_YYUNDEF); return 0; @@ -430,7 +431,7 @@ exp:; AT_BISON_CHECK([-Wno-other -S./dump-symbols.m4 input.y]) AT_CHECK([cat symbols.csv], [], [[number, class, tag, id, user_number, type, -0, Token, $end, , 0, , +0, Token, $end, YYEOF, 0, , 1, Token, error, YYERRCODE, 256, , 2, Token, $undefined, YYUNDEF, 257, , 3, Token, 'a', , 97, ,