From 7e28dbea112a6467eaf69aa72ca9b1881c90ca6d Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Sun, 29 Mar 2020 17:06:16 +0200 Subject: [PATCH] c++: also use symbol_type_type Because of the insane current implementation of glr.cc, things are a bit nasty. We will rename symbol_number_type as symbol_type_type later, to keep this commit small. * data/skeletons/c++.m4 (b4_declare_symbol_enum): New. Also define YYNTOKENS to avoid type clashes when yyntokens_ was actually defined in another enum. Use it. (symbol_number_type): Be an alias of symbol_type_type. Use YYSYMBOL_YYEMPTY and the like. Use symbol_number_type where appropriate. (empty_symbol): Remove. (yytranslate_): Use symbol_number_type, not token_number_type. * data/skeletons/lalr1.cc: Use symbol_number_type where appropriate. Adjust to the replacement of empty_symbol by YYSYMBOL_YYEMPTY. (yy_error_token_, yy_undef_token_, yyeof_, yyntokens_): Remove. Adjust dependencies. * data/skeletons/glr.cc: Use symbol_number_type where appropriate. Forward definitions of YYSYMBOL_YYEMPTY, etc. to glr.c. * tests/headers.at: Accept YYNTOKENS and other YYSYMBOL_*. * tests/local.at (AT_YYERROR_DEFINE(c++)): Use symbol_number_type. --- data/skeletons/c++.m4 | 63 +++++++++++++++++++---------- data/skeletons/glr.cc | 26 ++++++++---- data/skeletons/lalr1.cc | 89 ++++++++++++++++++++--------------------- tests/headers.at | 2 + tests/local.at | 6 +-- 5 files changed, 109 insertions(+), 77 deletions(-) diff --git a/data/skeletons/c++.m4 b/data/skeletons/c++.m4 index c52f034c..625cb8c5 100644 --- a/data/skeletons/c++.m4 +++ b/data/skeletons/c++.m4 @@ -160,6 +160,10 @@ m4_bpatsubst(m4_dquote(m4_bpatsubst(m4_dquote(b4_namespace_ref[ ]), [::\([^][:]\|:[^:]\)*], [} ])[} // ]b4_namespace_ref])]) +## -------------------------- ## +## (External) token numbers. ## +## -------------------------- ## + # b4_token_enums # -------------- # Output the definition of the tokens as enums. @@ -174,6 +178,27 @@ m4_define([b4_token_enums], +## --------------------------- ## +## (Internal) symbol numbers. ## +## --------------------------- ## + +# b4_declare_symbol_enum +# ---------------------- +# The definition of the symbol internal numbers as an enum. +# Defining YYEMPTY here is important: it forces the compiler +# to use a signed type, which matters for yytoken. +m4_define([b4_declare_symbol_enum], +[[enum symbol_type_type + { + ]m4_join([, + ], + ]b4_symbol_sid([-2])[ = -2, + b4_symbol_map([b4_symbol_enum]), + [YYNTOKENS = ]b4_tokens_number[ ///< Number of tokens.])[ + }; +]])]) + + ## ----------------- ## ## Semantic Values. ## @@ -231,7 +256,7 @@ m4_define([b4_public_types_declare], location_type location;])[ }; - /// Tokens. + /// Token numbers. struct token { ]b4_token_enums[ @@ -240,14 +265,11 @@ m4_define([b4_public_types_declare], /// (External) token type, as returned by yylex. typedef token::yytokentype token_type; + /// Symbol codes. + ]b4_declare_symbol_enum[ + /// Symbol type: an internal symbol number. - typedef int symbol_number_type; - - /// The symbol type number to denote an empty symbol. - enum { empty_symbol = -2 }; - - /// Internal symbol number for tokens (subsumed by symbol_number_type). - typedef ]b4_int_type_for([b4_translate])[ token_number_type; + typedef symbol_type_type symbol_number_type; ]]) @@ -368,9 +390,8 @@ m4_define([b4_symbol_type_define], symbol_number_type type_get () const YY_NOEXCEPT; /// The symbol type. - /// \a empty_symbol when empty. - /// An int, not token_number_type, to be able to store empty_symbol. - int type; + /// \a YYSYMBOL_YYEMPTY when empty. + symbol_number_type type; }; /// "External" symbols: returned by the scanner. @@ -442,7 +463,7 @@ m4_define([b4_public_types_define], bool ]b4_parser_class[::basic_symbol::empty () const YY_NOEXCEPT { - return Base::type_get () == empty_symbol; + return Base::type_get () == YYSYMBOL_YYEMPTY; } template @@ -458,7 +479,7 @@ m4_define([b4_public_types_define], // by_type. ]b4_inline([$1])b4_parser_class[::by_type::by_type () - : type (empty_symbol) + : type (YYSYMBOL_YYEMPTY) {} #if 201103L <= YY_CPLUSPLUS @@ -480,7 +501,7 @@ m4_define([b4_public_types_define], ]b4_inline([$1])[void ]b4_parser_class[::by_type::clear () { - type = empty_symbol; + type = YYSYMBOL_YYEMPTY; } ]b4_inline([$1])[void @@ -490,7 +511,7 @@ m4_define([b4_public_types_define], that.clear (); } - ]b4_inline([$1])[int + ]b4_inline([$1])[]b4_parser_class[::symbol_number_type ]b4_parser_class[::by_type::type_get () const YY_NOEXCEPT { return type; @@ -510,15 +531,15 @@ m4_define([b4_token_constructor_define], []) # Define yytranslate_. Sometimes used in the header file ($1=hh), # sometimes in the cc file. m4_define([b4_yytranslate_define], -[ b4_inline([$1])b4_parser_class[::token_number_type +[ b4_inline([$1])b4_parser_class[::symbol_number_type ]b4_parser_class[::yytranslate_ (int t) { ]b4_api_token_raw_if( -[[ return static_cast (t);]], +[[ return static_cast (t);]], [[ // YYTRANSLATE[TOKEN-NUM] -- Symbol number corresponding to // TOKEN-NUM as returned by yylex. static - const token_number_type + const ]b4_int_type_for([b4_translate])[ translate_table[] = { ]b4_translate[ @@ -526,11 +547,11 @@ m4_define([b4_yytranslate_define], const int user_token_number_max_ = ]b4_user_token_number_max[; if (t <= 0) - return yyeof_; + return YYSYMBOL_YYEOF; else if (t <= user_token_number_max_) - return translate_table[t]; + return YY_CAST (symbol_number_type, translate_table[t]); else - return yy_undef_token_;]])[ + return YYSYMBOL_YYUNDEF;]])[ } ]]) diff --git a/data/skeletons/glr.cc b/data/skeletons/glr.cc index a6b26cc3..2432570f 100644 --- a/data/skeletons/glr.cc +++ b/data/skeletons/glr.cc @@ -66,19 +66,24 @@ m4_defn([b4_parse_param]))], [[b4_namespace_ref::b4_parser_class[& yyparser], [[yyparser]]]]) ]) +# b4_declare_symbol_enum +# ---------------------- +m4_append([b4_declare_symbol_enum], +[[typedef symbol_type_type yysymbol_type_t; +]]) + # b4_yy_symbol_print_define # ------------------------- # Bypass the default implementation to generate the "yy_symbol_print" # and "yy_symbol_value_print" functions. m4_define([b4_yy_symbol_print_define], -[[ -/*--------------------. +[[/*--------------------. | Print this symbol. | `--------------------*/ static void -yy_symbol_print (FILE *, int yytype, +yy_symbol_print (FILE *, ]b4_namespace_ref::b4_parser_class[::symbol_number_type yytype, const ]b4_namespace_ref::b4_parser_class[::semantic_type *yyvaluep]b4_locations_if([[, const ]b4_namespace_ref::b4_parser_class[::location_type *yylocationp]])[]b4_user_formals[) { @@ -170,7 +175,7 @@ m4_pushdef([b4_parse_param], m4_defn([b4_parse_param_orig]))dnl `--------------------*/ void - ]b4_parser_class[::yy_symbol_value_print_ (int yytype, + ]b4_parser_class[::yy_symbol_value_print_ (symbol_number_type yytype, const semantic_type* yyvaluep]b4_locations_if([[, const location_type* yylocationp]])[) {]b4_locations_if([[ @@ -184,7 +189,7 @@ m4_pushdef([b4_parse_param], m4_defn([b4_parse_param_orig]))dnl void - ]b4_parser_class[::yy_symbol_print_ (int yytype, + ]b4_parser_class[::yy_symbol_print_ (symbol_number_type yytype, const semantic_type* yyvaluep]b4_locations_if([[, const location_type* yylocationp]])[) { @@ -320,14 +325,14 @@ b4_percent_code_get([[requires]])[ /// \param yytype The token type. /// \param yyvaluep Its semantic value.]b4_locations_if([[ /// \param yylocationp Its location.]])[ - virtual void yy_symbol_value_print_ (int yytype, + virtual void yy_symbol_value_print_ (symbol_number_type yytype, const semantic_type* yyvaluep]b4_locations_if([[, const location_type* yylocationp]])[); /// \brief Report a symbol on the debug stream. /// \param yytype The token type. /// \param yyvaluep Its semantic value.]b4_locations_if([[ /// \param yylocationp Its location.]])[ - virtual void yy_symbol_print_ (int yytype, + virtual void yy_symbol_print_ (symbol_number_type yytype, const semantic_type* yyvaluep]b4_locations_if([[, const location_type* yylocationp]])[); private: @@ -350,6 +355,13 @@ b4_percent_define_flag_if([[global_tokens_and_yystype]], #endif ]b4_namespace_close[ +]m4_define([b4_declare_symbol_enum], +[[typedef ]b4_namespace_ref[::]b4_parser_class[::symbol_number_type yysymbol_type_t; +#define YYSYMBOL_YYEMPTY ]b4_namespace_ref[::]b4_parser_class[::YYSYMBOL_YYEMPTY +#define YYSYMBOL_YYERROR ]b4_namespace_ref[::]b4_parser_class[::YYSYMBOL_YYERROR +#define YYSYMBOL_YYEOF ]b4_namespace_ref[::]b4_parser_class[::YYSYMBOL_YYEOF +#define YYSYMBOL_YYUNDEF ]b4_namespace_ref[::]b4_parser_class[::YYSYMBOL_YYUNDEF +]])[ ]b4_percent_code_get([[provides]])[ ]m4_popdef([b4_parse_param])dnl ]) diff --git a/data/skeletons/lalr1.cc b/data/skeletons/lalr1.cc index 09418fd7..0e489744 100644 --- a/data/skeletons/lalr1.cc +++ b/data/skeletons/lalr1.cc @@ -241,13 +241,13 @@ m4_define([b4_shared_declarations], public: context (const ]b4_parser_class[& yyparser, const symbol_type& yyla); const symbol_type& lookahead () const { return yyla_; } - int token () const { return yyla_.type_get (); }]b4_locations_if([[ + symbol_number_type token () const { return yyla_.type_get (); }]b4_locations_if([[ const location_type& location () const { return yyla_.location; } ]])[ /// Put in YYARG at most YYARGN of the expected tokens, and return the /// number of tokens stored in YYARG. If YYARG is null, return the /// number of expected tokens (guaranteed to be less than YYNTOKENS). - int yyexpected_tokens (int yyarg[], int yyargn) const; + int yyexpected_tokens (symbol_number_type yyarg[], int yyargn) const; private: const ]b4_parser_class[& yyparser_; @@ -261,10 +261,10 @@ m4_define([b4_shared_declarations], /// Check the lookahead yytoken. /// \returns true iff the token will be eventually shifted. - bool yy_lac_check_ (int yytoken) const; + bool yy_lac_check_ (symbol_number_type yytoken) const; /// Establish the initial context if no initial context currently exists. /// \returns true iff the token will be eventually shifted. - bool yy_lac_establish_ (int yytoken); + bool yy_lac_establish_ (symbol_number_type yytoken); /// Discard any previous initial lookahead context because of event. /// \param event the event which caused the lookahead to be discarded. /// Only used for debbuging output. @@ -280,7 +280,7 @@ m4_define([b4_shared_declarations], [detailed\|verbose], [[ /// The arguments of the error message. int yysyntax_error_arguments_ (const context& yyctx, - int yyarg[], int yyargn) const; + symbol_number_type yyarg[], int yyargn) const; /// Generate an error message. /// \param yyctx the context in which the error occurred. @@ -304,11 +304,11 @@ m4_define([b4_shared_declarations], /// Convert a scanner token number \a t to a symbol number. /// In theory \a t should be a token_type, but character literals /// are valid, yet not members of the token_type enum. - static token_number_type yytranslate_ (int t); + static symbol_number_type yytranslate_ (int t); ]b4_parse_error_bmatch([custom\|detailed], [[ /// The user-facing name of the symbol whose (internal) number is /// YYSYMBOL. No bounds checking. - static const char *yysymbol_name (int yysymbol); + static const char *yysymbol_name (symbol_number_type yysymbol); ]])[ // Tables. @@ -377,7 +377,7 @@ m4_define([b4_shared_declarations], void move (by_state& that); /// The (internal) type number (corresponding to \a state). - /// \a empty_symbol when empty. + /// \a YYSYMBOL_YYEMPTY when empty. symbol_number_type type_get () const YY_NOEXCEPT; /// The state number used to denote an empty symbol. @@ -446,18 +446,12 @@ m4_define([b4_shared_declarations], /// Pop \a n symbols from the stack. void yypop_ (int n = 1); - /// Some specific tokens. - static const token_number_type yy_error_token_ = 1; - static const token_number_type yy_undef_token_ = ]b4_undef_token_number[; - /// Constants. enum { - yyeof_ = 0, yylast_ = ]b4_last[, ///< Last index in yytable_. yynnts_ = ]b4_nterms_number[, ///< Number of nonterminal symbols. - yyfinal_ = ]b4_final_state_number[, ///< Termination state number. - yyntokens_ = ]b4_tokens_number[ ///< Number of tokens. + yyfinal_ = ]b4_final_state_number[ ///< Termination state number. }; ]b4_parse_param_vars[ @@ -592,7 +586,7 @@ m4_if(b4_prefix, [yy], [], /* The user-facing name of the symbol whose (internal) number is YYSYMBOL. No bounds checking. */ const char * - ]b4_parser_class[::yysymbol_name (int yysymbol) + ]b4_parser_class[::yysymbol_name (symbol_number_type yysymbol) { static const char *const yy_sname[] = { @@ -604,7 +598,7 @@ m4_if(b4_prefix, [yy], [], { ]b4_translatable[ }; - return (yysymbol < yyntokens_ && yytranslatable[yysymbol] + return (yysymbol < YYNTOKENS && yytranslatable[yysymbol] ? _(yy_sname[yysymbol]) : yy_sname[yysymbol]);]], [[ return yy_sname[yysymbol];]])[ @@ -706,9 +700,9 @@ b4_parse_error_case([verbose], [[ ]b4_parser_class[::by_state::type_get () const YY_NOEXCEPT { if (state == empty_state) - return empty_symbol; + return YYSYMBOL_YYEMPTY; else - return yystos_[+state]; + return YY_CAST (symbol_number_type, yystos_[+state]); } ]b4_parser_class[::stack_symbol_type::stack_symbol_type () @@ -731,7 +725,7 @@ b4_parse_error_case([verbose], [[ b4_symbol_variant([that.type_get ()], [value], [move], [YY_MOVE (that.value)])])[ // that is emptied. - that.type = empty_symbol; + that.type = YYSYMBOL_YYEMPTY; } #if YY_CPLUSPLUS < 201103L @@ -786,7 +780,7 @@ b4_parse_error_case([verbose], [[ if (yysym.empty ()) std::abort (); #endif - yyo << (yytype < yyntokens_ ? "token" : "nterm") + yyo << (yytype < YYNTOKENS ? "token" : "nterm") << ' ' << yytname_[yytype] << " ("]b4_locations_if([ << yysym.location << ": "])[; ]b4_symbol_actions([printer])[ @@ -849,11 +843,11 @@ b4_parse_error_case([verbose], [[ ]b4_parser_class[::state_type ]b4_parser_class[::yy_lr_goto_state_ (state_type yystate, int yysym) { - int yyr = yypgoto_[yysym - yyntokens_] + yystate; + int yyr = yypgoto_[yysym - YYNTOKENS] + yystate; if (0 <= yyr && yyr <= yylast_ && yycheck_[yyr] == yystate) return yytable_[yyr]; else - return yydefgoto_[yysym - yyntokens_]; + return yydefgoto_[yysym - YYNTOKENS]; } bool @@ -1095,7 +1089,7 @@ b4_dollar_popdef])[]dnl error, discard it. */ // Return failure if at end of input. - if (yyla.type_get () == yyeof_) + if (yyla.type_get () == YYSYMBOL_YYEOF) YYABORT; else if (!yyla.empty ()) { @@ -1137,8 +1131,8 @@ b4_dollar_popdef])[]dnl yyn = yypact_[+yystack_[0].state]; if (!yy_pact_value_is_default_ (yyn)) { - yyn += yy_error_token_; - if (0 <= yyn && yyn <= yylast_ && yycheck_[yyn] == yy_error_token_) + yyn += YYSYMBOL_YYERROR; + if (0 <= yyn && yyn <= yylast_ && yycheck_[yyn] == YYSYMBOL_YYERROR) { yyn = yytable_[yyn]; if (0 < yyn) @@ -1235,7 +1229,7 @@ b4_dollar_popdef])[]dnl {} int - ]b4_parser_class[::context::yyexpected_tokens (int yyarg[], int yyargn) const + ]b4_parser_class[::context::yyexpected_tokens (symbol_number_type yyarg[], int yyargn) const { // Actual number of expected tokens int yycount = 0; @@ -1247,17 +1241,20 @@ b4_dollar_popdef])[]dnl yyparser_.yy_lac_check_ (yyla_.type_get ()); #endif - for (int yyx = 0; yyx < yyntokens_; ++yyx) - if (yyx != yy_error_token_ && yyx != yy_undef_token_ && yyparser_.yy_lac_check_ (yyx)) - { - if (!yyarg) - ++yycount; - else if (yycount == yyargn) - return 0; - else - yyarg[yycount++] = yyx; - } -]], [[ + for (int yyx = 0; yyx < YYNTOKENS; ++yyx) + { + symbol_number_type yysym = YY_CAST (symbol_number_type, yyx); + if (yysym != YYSYMBOL_YYERROR && yysym != YYSYMBOL_YYUNDEF + && yyparser_.yy_lac_check_ (yysym)) + { + if (!yyarg) + ++yycount; + else if (yycount == yyargn) + return 0; + else + yyarg[yycount++] = yysym; + } + }]], [[ int yyn = yypact_[+yyparser_.yystack_[0].state]; if (!yy_pact_value_is_default_ (yyn)) { @@ -1267,9 +1264,9 @@ b4_dollar_popdef])[]dnl int yyxbegin = yyn < 0 ? -yyn : 0; // Stay within bounds of both yycheck and yytname. int yychecklim = yylast_ - yyn + 1; - int yyxend = yychecklim < yyntokens_ ? yychecklim : yyntokens_; + int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS; for (int yyx = yyxbegin; yyx < yyxend; ++yyx) - if (yycheck_[yyx + yyn] == yyx && yyx != yy_error_token_ + if (yycheck_[yyx + yyn] == yyx && yyx != YYSYMBOL_YYERROR && !yy_table_value_is_error_ (yytable_[yyx + yyn])) { if (!yyarg) @@ -1277,7 +1274,7 @@ b4_dollar_popdef])[]dnl else if (yycount == yyargn) return 0; else - yyarg[yycount++] = yyx; + yyarg[yycount++] = YY_CAST (symbol_number_type, yyx); } } ]])[ @@ -1286,7 +1283,7 @@ b4_dollar_popdef])[]dnl ]])b4_lac_if([[ bool - ]b4_parser_class[::yy_lac_check_ (int yytoken) const + ]b4_parser_class[::yy_lac_check_ (symbol_number_type yytoken) const { // Logically, the yylac_stack's lifetime is confined to this function. // Clear it, to get rid of potential left-overs from previous call. @@ -1364,7 +1361,7 @@ b4_dollar_popdef])[]dnl // Establish the initial context if no initial context currently exists. bool - ]b4_parser_class[::yy_lac_establish_ (int yytoken) + ]b4_parser_class[::yy_lac_establish_ (symbol_number_type yytoken) { /* Establish the initial context for the current lookahead if no initial context is currently established. @@ -1426,7 +1423,7 @@ b4_dollar_popdef])[]dnl int ]b4_parser_class[::yysyntax_error_arguments_ (const context& yyctx, - int yyarg[], int yyargn) const + symbol_number_type yyarg[], int yyargn) const { /* There are many possibilities here to consider: - If this state is a consistent state with a default action, then @@ -1475,7 +1472,7 @@ b4_dollar_popdef])[]dnl // Its maximum. enum { YYARGS_MAX = 5 }; // Arguments of yyformat. - int yyarg[YYARGS_MAX]; + symbol_number_type yyarg[YYARGS_MAX]; int yycount = yysyntax_error_arguments_ (yyctx, yyarg, YYARGS_MAX); char const* yyformat = YY_NULLPTR; @@ -1520,7 +1517,7 @@ b4_dollar_popdef])[]dnl ]b4_tname_if([], [[#if ]b4_api_PREFIX[DEBUG]])[ // YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. - // First, the terminals, then, starting at \a yyntokens_, nonterminals. + // First, the terminals, then, starting at \a YYNTOKENS, nonterminals. const char* const ]b4_parser_class[::yytname_[] = { diff --git a/tests/headers.at b/tests/headers.at index 8ab8de4c..d746b8d7 100644 --- a/tests/headers.at +++ b/tests/headers.at @@ -319,7 +319,9 @@ AT_PERL_CHECK([[-n -0777 -e ' s{//.*}{}g; s{\b((defined|if)\ YYDEBUG |YYChar + |YYNTOKENS # This is actual scoped in a C++ class. |YYPUSH_MORE(?:_DEFINED)? + |YYSYMBOL_(\w+) # These guys are scoped. |YYUSE |YY_ATTRIBUTE(?:_PURE|_UNUSED) |YY(?:_REINTERPRET)?_CAST diff --git a/tests/local.at b/tests/local.at index 43a51f8b..770480da 100644 --- a/tests/local.at +++ b/tests/local.at @@ -749,13 +749,13 @@ void std::cerr << ctx.location () << ": ";]])[ std::cerr << "syntax error"; { - int la = ctx.token (); - if (la != empty_symbol) + symbol_type_type la = ctx.token (); + if (la != YYSYMBOL_YYEMPTY) fprintf (stderr, " on token [%s]", yysymbol_name (la)); } { enum { TOKENMAX = 10 }; - int expected[TOKENMAX]; + symbol_type_type expected[TOKENMAX]; int n = ctx.yyexpected_tokens (expected, TOKENMAX); if (0 < n) {