diff --git a/ChangeLog b/ChangeLog index 83ae569c..a7e2b729 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,29 @@ +2008-11-03 Akim Demaille + + Use variants to support objects as semantic values. + This patch was inspired by work by Michiel De Wilde. But he used Boost + variants which (i) requires Boost on the user side, (ii) is slow, and + (iii) has useless overhead (the parser knows the type of the semantic value + there is no reason to duplicate this information as Boost.Variants do). + + This implementation reserves a buffer large enough to store the largest + objects. yy::variant implements this buffer. It was implemented with + Quentin Hocquet. + + * src/output.c (type_names_output): New. + (output_skeleton): Invoke it. + * data/c++.m4 (b4_variant_if): New. + (b4_symbol_value): If needed, provide a definition for variants. + * data/lalr1.cc (b4_symbol_value, b4_symbol_action_) + (b4_symbol_variant, _b4_char_sizeof_counter, _b4_char_sizeof_dummy) + (b4_char_sizeof, yy::variant): New. + (parser::parse): If variants are requested, define + parser::union_type, parser::variant, change the definition of + semantic_type, construct $$ before running the user action instead + of performing a default $$ = $1. + * examples/variant.yy: New. + Based on an example by Michiel De Wilde. + 2008-11-03 Akim Demaille Parameterize the extraction of semantic values. diff --git a/THANKS b/THANKS index 6785e2a8..91459166 100644 --- a/THANKS +++ b/THANKS @@ -55,6 +55,7 @@ Martin Nylin martin.nylin@linuxmail.org Matt Kraai kraai@alumni.cmu.edu Matt Rosing rosing@peakfive.com Michael Hayes m.hayes@elec.canterbury.ac.nz +Michiel De Wilde mdewilde.agilent@gmail.com Mickael Labau labau_m@epita.fr Mike Castle dalgoda@ix.netcom.com Neil Booth NeilB@earthling.net @@ -71,6 +72,7 @@ Per Allansson per@appgate.com Peter Fales psfales@lucent.com Peter Hamorsky hamo@upjs.sk Piotr Gackiewicz gacek@intertel.com.pl +Quentin Hocquet hocquet@gostai.com Quoc Peyrot chojin@lrde.epita.fr R Blake blakers@mac.com Raja R Harinath harinath@cs.umn.edu diff --git a/data/c++.m4 b/data/c++.m4 index a896b778..ba17dff1 100644 --- a/data/c++.m4 +++ b/data/c++.m4 @@ -97,6 +97,11 @@ m4_map_sep([ b4_token_enum], [, ## Semantic Values. ## ## ----------------- ## +# b4_variant_if([IF-VARIANT-ARE-USED], [IF-NOT]) +# ---------------------------------------------- +m4_define([b4_variant_if], +[b4_percent_define_ifdef([[variant]], [$1], [$2])]) + # b4_lhs_value([TYPE]) # -------------------- diff --git a/data/lalr1.cc b/data/lalr1.cc index 07a301ff..de2c1def 100644 --- a/data/lalr1.cc +++ b/data/lalr1.cc @@ -18,6 +18,75 @@ m4_include(b4_pkgdatadir/[c++.m4]) +# How the semantic value is extracted when using variants. +b4_variant_if([ + # b4_symbol_value(VAL, [TYPE]) + # ---------------------------- + m4_define([b4_symbol_value], + [m4_ifval([$2], + [$1.as<$2>()], + [$1])]) +]) # b4_variant_if + + +# b4_symbol_action_(SYMBOL-TAG, SYMBOL-NUM, SYMBOL-TYPENAME) +# ---------------------------------------------------------- +# Invoke b4_dollar_dollar(SYMBOL_TYPENAME) for each symbol. +m4_define([b4_symbol_action_], +[m4_ifval($3, +[ case $2: // $1 + b4_dollar_dollar($@); + break; +])]) + + +# b4_symbol_variant(YYTYPE, YYVAL, ACTION) +# ---------------------------------------- +# Run some ACTION ("build", or "destroy") on YYVAL of symbol type +# YYTYPE. +m4_define([b4_symbol_variant], +[m4_pushdef([b4_dollar_dollar], + [$2.$3<$][3>()])dnl + switch ($1) + { +m4_map([b4_symbol_action_], m4_defn([b4_type_names])) + default: + break; + } +m4_popdef([b4_dollar_dollar])dnl +]) + + +# _b4_char_sizeof_counter +# ----------------------- +# A counter used by _b4_char_sizeof_dummy to create fresh symbols. +m4_define([_b4_char_sizeof_counter], +[0]) + +# _b4_char_sizeof_dummy +# --------------------- +# At each call return a new C++ identifier. +m4_define([_b4_char_sizeof_dummy], +[m4_define([_b4_char_sizeof_counter], m4_incr(_b4_char_sizeof_counter))dnl +dummy[]_b4_char_sizeof_counter]) + + +# b4_char_sizeof(SYMBOL-TAG, SYMBOL-NUM, SYMBOL-TYPENAME) +# ------------------------------------------------------- +# To be mapped on the list of type names to produce: +# +# char dummy1[sizeof(type_name_1)]; +# char dummy2[sizeof(type_name_2)]; +# +# for defined type names. +# $3 is doubly-quoted, do not quote it again. +m4_define([b4_char_sizeof], +[m4_ifval($3, +[ + char _b4_char_sizeof_dummy@{sizeof($3)@}; // $1])dnl +]) + + m4_define([b4_parser_class_name], [b4_percent_define_get([[parser_class_name]])]) @@ -52,6 +121,51 @@ dnl FIXME: This is wrong, we want computed header guards. ]b4_namespace_open[ class position; class location; +]b4_variant_if([[ + /// A char[S] buffer to store and retrieve objects. + /// + /// Sort of a variant, but does not keep track of the nature + /// of the stored data, since that knowledge is available + /// via the current state. + template + struct variant + { + /// Instantiate a \a T in here. + template + inline void + build() + { + new (buffer) T; + } + + /// Destroy the stored \a T. + template + inline void + destroy() + { + reinterpret_cast(buffer).~T(); + } + + /// Accessor to a built \a T. + template + inline T& + as() + { + return reinterpret_cast(buffer); + } + + /// Const accessor to a built \a T (for %printer). + template + inline const T& + as() const + { + return reinterpret_cast(buffer); + } + + /// A buffer large enough to store any of the semantic values. + char buffer[S]; + }; +]])[ ]b4_namespace_close[ #include "location.hh" @@ -99,16 +213,23 @@ do { \ class ]b4_parser_class_name[ { public: - /// Symbol semantic values. #ifndef YYSTYPE -]m4_ifdef([b4_stype], +]b4_variant_if( +[ /// An auxiliary type to compute the largest semantic type. + union union_type + {]m4_map([b4_char_sizeof], m4_defn([b4_type_names]))[ + }; + + /// Symbol semantic values. + typedef variant semantic_type;], +[ /// Symbol semantic values. +m4_ifdef([b4_stype], [ union semantic_type - { -b4_user_stype + {b4_user_stype };], [m4_if(b4_tag_seen_flag, 0, [[ typedef int semantic_type;]], -[[ typedef YYSTYPE semantic_type;]])])[ +[[ typedef YYSTYPE semantic_type;]])])])[ #else typedef YYSTYPE semantic_type; #endif @@ -631,7 +752,6 @@ m4_ifdef([b4_lex_param], [, ]b4_lex_param))[; /* Discard the token being shifted. */ yychar = yyempty_; - yysemantic_stack_.push (yylval); yylocation_stack_.push (yylloc); @@ -656,7 +776,11 @@ m4_ifdef([b4_lex_param], [, ]b4_lex_param))[; | yyreduce -- Do a reduction. | `-----------------------------*/ yyreduce: - yylen = yyr2_[yyn]; + yylen = yyr2_[yyn];]b4_variant_if([ + /* Variants are always initialized to an empty instance of the + correct type. The default $$=$1 rule is NOT applied when using + variants */ + ]b4_symbol_variant([[yyr1_@{yyn@}]], [yyval], [build])[],[ /* If YYLEN is nonzero, implement the default value of the action: `$$ = $1'. Otherwise, use the top of the stack. @@ -666,7 +790,7 @@ m4_ifdef([b4_lex_param], [, ]b4_lex_param))[; if (yylen) yyval = yysemantic_stack_@{yylen - 1@}; else - yyval = yysemantic_stack_@{0@}; + yyval = yysemantic_stack_@{0@};])[ { slice slice (yylocation_stack_, yylen); @@ -684,7 +808,6 @@ m4_ifdef([b4_lex_param], [, ]b4_lex_param))[; yypop_ (yylen); yylen = 0; YY_STACK_PRINT (); - yysemantic_stack_.push (yyval); yylocation_stack_.push (yyloc); diff --git a/examples/variant.yy b/examples/variant.yy new file mode 100644 index 00000000..fafb6556 --- /dev/null +++ b/examples/variant.yy @@ -0,0 +1,108 @@ +/* Test file for C++ parsers using variants. + Based on an example by Michiel De Wilde . */ +%language "C++" +%debug +%defines +%define variant + +%code requires // *.hh +{ +#include +} + +%code // *.cc +{ +#include +#include +#include + +static yy::parser::token_type yylex(yy::parser::semantic_type* yylval); +} + +%token TEXT +%token NUMBER +%printer { debug_stream() << $$; } +%token END_OF_FILE 0 + +%type text result + +%% + +result: + text { std::cout << $1 << std::endl; } +; + +text: + /* nothing */ { /* This will generate an empty string */ } +| text TEXT { std::swap($$,$1); $$.append($2); } +| text NUMBER { + std::swap($$,$1); + std::ostringstream o; + o << ' ' << $2; + $$.append(o.str()); + } +; +%% + +// The yylex function providing subsequent tokens: +// TEXT "I have three numbers for you:" +// NUMBER 1 +// NUMBER 2 +// NUMBER 3 +// TEXT " and that's all!" +// END_OF_FILE + +static +yy::parser::token_type +yylex(yy::parser::semantic_type* yylval) +{ + static int stage = 0; + yy::parser::token_type result; + + switch (stage) + { + case 0: + yylval->build(); + yylval->as() = std::string("I have three numbers for you:"); + result = yy::parser::token::TEXT; + break; + case 1: + case 2: + case 3: + yylval->build(); + yylval->as() = stage; + result = yy::parser::token::NUMBER; + break; + case 4: + yylval->build(); + yylval->as() = std::string(" and that's all!"); + result = yy::parser::token::TEXT; + break; + default: + result = yy::parser::token::END_OF_FILE; + break; + } + + stage++; + return result; +} + +// Mandatory error function +void +yy::parser::error(const yy::parser::location_type& yylloc, + const std::string& message) +{ + std::cerr << yylloc << ": " << message << std::endl; +} + +int +main(int argc, char *argv[]) +{ + yy::parser p; + p.set_debug_level(!!getenv("YYDEBUG")); + p.parse(); +} + +// Local Variables: +// mode: C++ +// End: diff --git a/src/output.c b/src/output.c index 585b90e0..8457ba19 100644 --- a/src/output.c +++ b/src/output.c @@ -283,6 +283,31 @@ prepare_states (void) +/*-----------------------------------------------. +| For each symbol type, its tags and type name. | +`-----------------------------------------------*/ + +static void +type_names_output (FILE *out) +{ + int i; + char const *sep = ""; + + fputs ("m4_define([b4_type_names],\n[", out); + for (i = 0; i < nsyms; ++i) + { + symbol *sym = symbols[i]; + /* Symbol-name, Symbol-number, optional typename. */ + fprintf (out, "%s[", i ? ",\n" : ""); + escaped_output (out, sym->tag); + fprintf (out, ", %d, [[%s]]]", + sym->number, + sym->type_name ? sym->type_name : ""); + } + fputs ("])\n\n", out); +} + + /*---------------------------------. | Output the user actions to OUT. | `---------------------------------*/ @@ -461,7 +486,6 @@ prepare_actions (void) muscle_insert_unsigned_int_table ("conflicting_rules", conflict_list, 0, 1, conflict_list_cnt); } - /*---------------------------. | Call the skeleton parser. | @@ -567,6 +591,7 @@ output_skeleton (void) /* Output the definitions of all the muscles. */ fputs ("m4_init()\n", out); + type_names_output (out); user_actions_output (out); merger_output (out); token_definitions_output (out);