Use variants to support objects as semantic values.

This patch was inspired by work by Michiel De Wilde.  But he used Boost
variants which (i) requires Boost on the user side, (ii) is slow, and
(iii) has useless overhead (the parser knows the type of the semantic value
there is no reason to duplicate this information as Boost.Variants do).

This implementation reserves a buffer large enough to store the largest
objects.  yy::variant implements this buffer.  It was implemented with
Quentin Hocquet.

	* src/output.c (type_names_output): New.
	(output_skeleton): Invoke it.
	* data/c++.m4 (b4_variant_if): New.
	(b4_symbol_value): If needed, provide a definition for variants.
	* data/lalr1.cc (b4_symbol_value, b4_symbol_action_)
	(b4_symbol_variant, _b4_char_sizeof_counter, _b4_char_sizeof_dummy)
	(b4_char_sizeof, yy::variant): New.
	(parser::parse): If variants are requested, define
	parser::union_type, parser::variant, change the definition of
	semantic_type, construct $$ before running the user action instead
	of performing a default $$ = $1.
	* examples/variant.yy: New.
	Based on an example by Michiel De Wilde.
This commit is contained in:
Akim Demaille
2008-10-21 18:00:29 -05:00
parent 1fa5d8bbf9
commit 5ab8c47bcf
6 changed files with 299 additions and 10 deletions

View File

@@ -1,3 +1,29 @@
2008-11-03 Akim Demaille <demaille@gostai.com>
Use variants to support objects as semantic values.
This patch was inspired by work by Michiel De Wilde. But he used Boost
variants which (i) requires Boost on the user side, (ii) is slow, and
(iii) has useless overhead (the parser knows the type of the semantic value
there is no reason to duplicate this information as Boost.Variants do).
This implementation reserves a buffer large enough to store the largest
objects. yy::variant implements this buffer. It was implemented with
Quentin Hocquet.
* src/output.c (type_names_output): New.
(output_skeleton): Invoke it.
* data/c++.m4 (b4_variant_if): New.
(b4_symbol_value): If needed, provide a definition for variants.
* data/lalr1.cc (b4_symbol_value, b4_symbol_action_)
(b4_symbol_variant, _b4_char_sizeof_counter, _b4_char_sizeof_dummy)
(b4_char_sizeof, yy::variant): New.
(parser::parse): If variants are requested, define
parser::union_type, parser::variant, change the definition of
semantic_type, construct $$ before running the user action instead
of performing a default $$ = $1.
* examples/variant.yy: New.
Based on an example by Michiel De Wilde.
2008-11-03 Akim Demaille <demaille@gostai.com>
Parameterize the extraction of semantic values.

2
THANKS
View File

@@ -55,6 +55,7 @@ Martin Nylin martin.nylin@linuxmail.org
Matt Kraai kraai@alumni.cmu.edu
Matt Rosing rosing@peakfive.com
Michael Hayes m.hayes@elec.canterbury.ac.nz
Michiel De Wilde mdewilde.agilent@gmail.com
Mickael Labau labau_m@epita.fr
Mike Castle dalgoda@ix.netcom.com
Neil Booth NeilB@earthling.net
@@ -71,6 +72,7 @@ Per Allansson per@appgate.com
Peter Fales psfales@lucent.com
Peter Hamorsky hamo@upjs.sk
Piotr Gackiewicz gacek@intertel.com.pl
Quentin Hocquet hocquet@gostai.com
Quoc Peyrot chojin@lrde.epita.fr
R Blake blakers@mac.com
Raja R Harinath harinath@cs.umn.edu

View File

@@ -97,6 +97,11 @@ m4_map_sep([ b4_token_enum], [,
## Semantic Values. ##
## ----------------- ##
# b4_variant_if([IF-VARIANT-ARE-USED], [IF-NOT])
# ----------------------------------------------
m4_define([b4_variant_if],
[b4_percent_define_ifdef([[variant]], [$1], [$2])])
# b4_lhs_value([TYPE])
# --------------------

View File

@@ -18,6 +18,75 @@
m4_include(b4_pkgdatadir/[c++.m4])
# How the semantic value is extracted when using variants.
b4_variant_if([
# b4_symbol_value(VAL, [TYPE])
# ----------------------------
m4_define([b4_symbol_value],
[m4_ifval([$2],
[$1.as<$2>()],
[$1])])
]) # b4_variant_if
# b4_symbol_action_(SYMBOL-TAG, SYMBOL-NUM, SYMBOL-TYPENAME)
# ----------------------------------------------------------
# Invoke b4_dollar_dollar(SYMBOL_TYPENAME) for each symbol.
m4_define([b4_symbol_action_],
[m4_ifval($3,
[ case $2: // $1
b4_dollar_dollar($@);
break;
])])
# b4_symbol_variant(YYTYPE, YYVAL, ACTION)
# ----------------------------------------
# Run some ACTION ("build", or "destroy") on YYVAL of symbol type
# YYTYPE.
m4_define([b4_symbol_variant],
[m4_pushdef([b4_dollar_dollar],
[$2.$3<$][3>()])dnl
switch ($1)
{
m4_map([b4_symbol_action_], m4_defn([b4_type_names]))
default:
break;
}
m4_popdef([b4_dollar_dollar])dnl
])
# _b4_char_sizeof_counter
# -----------------------
# A counter used by _b4_char_sizeof_dummy to create fresh symbols.
m4_define([_b4_char_sizeof_counter],
[0])
# _b4_char_sizeof_dummy
# ---------------------
# At each call return a new C++ identifier.
m4_define([_b4_char_sizeof_dummy],
[m4_define([_b4_char_sizeof_counter], m4_incr(_b4_char_sizeof_counter))dnl
dummy[]_b4_char_sizeof_counter])
# b4_char_sizeof(SYMBOL-TAG, SYMBOL-NUM, SYMBOL-TYPENAME)
# -------------------------------------------------------
# To be mapped on the list of type names to produce:
#
# char dummy1[sizeof(type_name_1)];
# char dummy2[sizeof(type_name_2)];
#
# for defined type names.
# $3 is doubly-quoted, do not quote it again.
m4_define([b4_char_sizeof],
[m4_ifval($3,
[
char _b4_char_sizeof_dummy@{sizeof($3)@}; // $1])dnl
])
m4_define([b4_parser_class_name],
[b4_percent_define_get([[parser_class_name]])])
@@ -52,6 +121,51 @@ dnl FIXME: This is wrong, we want computed header guards.
]b4_namespace_open[
class position;
class location;
]b4_variant_if([[
/// A char[S] buffer to store and retrieve objects.
///
/// Sort of a variant, but does not keep track of the nature
/// of the stored data, since that knowledge is available
/// via the current state.
template <size_t S>
struct variant
{
/// Instantiate a \a T in here.
template <typename T>
inline void
build()
{
new (buffer) T;
}
/// Destroy the stored \a T.
template <typename T>
inline void
destroy()
{
reinterpret_cast<T&>(buffer).~T();
}
/// Accessor to a built \a T.
template <typename T>
inline T&
as()
{
return reinterpret_cast<T&>(buffer);
}
/// Const accessor to a built \a T (for %printer).
template <typename T>
inline const T&
as() const
{
return reinterpret_cast<const T&>(buffer);
}
/// A buffer large enough to store any of the semantic values.
char buffer[S];
};
]])[
]b4_namespace_close[
#include "location.hh"
@@ -99,16 +213,23 @@ do { \
class ]b4_parser_class_name[
{
public:
/// Symbol semantic values.
#ifndef YYSTYPE
]m4_ifdef([b4_stype],
]b4_variant_if(
[ /// An auxiliary type to compute the largest semantic type.
union union_type
{]m4_map([b4_char_sizeof], m4_defn([b4_type_names]))[
};
/// Symbol semantic values.
typedef variant<sizeof(union_type)> semantic_type;],
[ /// Symbol semantic values.
m4_ifdef([b4_stype],
[ union semantic_type
{
b4_user_stype
{b4_user_stype
};],
[m4_if(b4_tag_seen_flag, 0,
[[ typedef int semantic_type;]],
[[ typedef YYSTYPE semantic_type;]])])[
[[ typedef YYSTYPE semantic_type;]])])])[
#else
typedef YYSTYPE semantic_type;
#endif
@@ -631,7 +752,6 @@ m4_ifdef([b4_lex_param], [, ]b4_lex_param))[;
/* Discard the token being shifted. */
yychar = yyempty_;
yysemantic_stack_.push (yylval);
yylocation_stack_.push (yylloc);
@@ -656,7 +776,11 @@ m4_ifdef([b4_lex_param], [, ]b4_lex_param))[;
| yyreduce -- Do a reduction. |
`-----------------------------*/
yyreduce:
yylen = yyr2_[yyn];
yylen = yyr2_[yyn];]b4_variant_if([
/* Variants are always initialized to an empty instance of the
correct type. The default $$=$1 rule is NOT applied when using
variants */
]b4_symbol_variant([[yyr1_@{yyn@}]], [yyval], [build])[],[
/* If YYLEN is nonzero, implement the default value of the action:
`$$ = $1'. Otherwise, use the top of the stack.
@@ -666,7 +790,7 @@ m4_ifdef([b4_lex_param], [, ]b4_lex_param))[;
if (yylen)
yyval = yysemantic_stack_@{yylen - 1@};
else
yyval = yysemantic_stack_@{0@};
yyval = yysemantic_stack_@{0@};])[
{
slice<location_type, location_stack_type> slice (yylocation_stack_, yylen);
@@ -684,7 +808,6 @@ m4_ifdef([b4_lex_param], [, ]b4_lex_param))[;
yypop_ (yylen);
yylen = 0;
YY_STACK_PRINT ();
yysemantic_stack_.push (yyval);
yylocation_stack_.push (yyloc);

108
examples/variant.yy Normal file
View File

@@ -0,0 +1,108 @@
/* Test file for C++ parsers using variants.
Based on an example by Michiel De Wilde <mdewilde.agilent@gmail.com>. */
%language "C++"
%debug
%defines
%define variant
%code requires // *.hh
{
#include <string>
}
%code // *.cc
{
#include <algorithm>
#include <iostream>
#include <sstream>
static yy::parser::token_type yylex(yy::parser::semantic_type* yylval);
}
%token <std::string> TEXT
%token <int> NUMBER
%printer { debug_stream() << $$; } <int> <std::string>
%token END_OF_FILE 0
%type <std::string> text result
%%
result:
text { std::cout << $1 << std::endl; }
;
text:
/* nothing */ { /* This will generate an empty string */ }
| text TEXT { std::swap($$,$1); $$.append($2); }
| text NUMBER {
std::swap($$,$1);
std::ostringstream o;
o << ' ' << $2;
$$.append(o.str());
}
;
%%
// The yylex function providing subsequent tokens:
// TEXT "I have three numbers for you:"
// NUMBER 1
// NUMBER 2
// NUMBER 3
// TEXT " and that's all!"
// END_OF_FILE
static
yy::parser::token_type
yylex(yy::parser::semantic_type* yylval)
{
static int stage = 0;
yy::parser::token_type result;
switch (stage)
{
case 0:
yylval->build<std::string>();
yylval->as<std::string>() = std::string("I have three numbers for you:");
result = yy::parser::token::TEXT;
break;
case 1:
case 2:
case 3:
yylval->build<int>();
yylval->as<int>() = stage;
result = yy::parser::token::NUMBER;
break;
case 4:
yylval->build<std::string>();
yylval->as<std::string>() = std::string(" and that's all!");
result = yy::parser::token::TEXT;
break;
default:
result = yy::parser::token::END_OF_FILE;
break;
}
stage++;
return result;
}
// Mandatory error function
void
yy::parser::error(const yy::parser::location_type& yylloc,
const std::string& message)
{
std::cerr << yylloc << ": " << message << std::endl;
}
int
main(int argc, char *argv[])
{
yy::parser p;
p.set_debug_level(!!getenv("YYDEBUG"));
p.parse();
}
// Local Variables:
// mode: C++
// End:

View File

@@ -283,6 +283,31 @@ prepare_states (void)
/*-----------------------------------------------.
| For each symbol type, its tags and type name. |
`-----------------------------------------------*/
static void
type_names_output (FILE *out)
{
int i;
char const *sep = "";
fputs ("m4_define([b4_type_names],\n[", out);
for (i = 0; i < nsyms; ++i)
{
symbol *sym = symbols[i];
/* Symbol-name, Symbol-number, optional typename. */
fprintf (out, "%s[", i ? ",\n" : "");
escaped_output (out, sym->tag);
fprintf (out, ", %d, [[%s]]]",
sym->number,
sym->type_name ? sym->type_name : "");
}
fputs ("])\n\n", out);
}
/*---------------------------------.
| Output the user actions to OUT. |
`---------------------------------*/
@@ -461,7 +486,6 @@ prepare_actions (void)
muscle_insert_unsigned_int_table ("conflicting_rules", conflict_list,
0, 1, conflict_list_cnt);
}
/*---------------------------.
| Call the skeleton parser. |
@@ -567,6 +591,7 @@ output_skeleton (void)
/* Output the definitions of all the muscles. */
fputs ("m4_init()\n", out);
type_names_output (out);
user_actions_output (out);
merger_output (out);
token_definitions_output (out);