c++: exhibit a safe symbol_type

Instead of introducing make_symbol (whose name, btw, somewhat
infringes on the user's "name space", if she defines a token named
"symbol"), let's make the construction of symbol_type safer, using
assertions.

For instance with:

    %token ':' <std::string> ID <int> INT;

generate:

    symbol_type (int token, const std::string&);
    symbol_type (int token, const int&);
    symbol_type (int token);

It does mean that now named token constructors (make_ID, make_INT,
etc.) go through a useless assert, but I think we can ignore this: I
assume any decent compiler will inline the symbol_type ctor inside the
make_TOKEN functions, which will show that the assert is trivially
verified, hence I expect no code will be emitted for it.  And anyway,
that's an assert, NDEBUG controls it.

* data/c++.m4 (symbol_type): Turn into a subclass of
basic_symbol<by_type>.
Declare symbol constructors when variants are enabled.
* data/variant.hh (_b4_type_constructor_declare)
(_b4_type_constructor_define): Replace with...
(_b4_symbol_constructor_declare, _b4_symbol_constructor_def): these.
Generate symbol_type constructors.
* doc/bison.texi (Complete Symbols): Document.
* tests/types.at: Check.
This commit is contained in:
Akim Demaille
2018-12-19 17:51:10 +01:00
parent 1f4dd2671a
commit e5780041b9
5 changed files with 142 additions and 43 deletions

30
NEWS
View File

@@ -96,10 +96,36 @@ GNU Bison NEWS
until it sees the '='. So we notate the two possible reductions to
indicate that each conflicts in one rule.
*** C++: Actual token constructors
When variants and token constructors are enabled, in addition to the
type-safe named token constructors (make_ID, amke_INT, etc.), we now
generate genuine constructors for symbol_type.
For instance with these declarations
%token ':'
<std::string> ID
<int> INT;
you may use these constructors:
symbol_type (int token, const std::string&);
symbol_type (int token, const int&);
symbol_type (int token);
which should be used in a Flex-scanner as follows.
%%
[a-z]+ return yy::parser::symbol_type (ID, yytext);
[0-9]+ return yy::parser::symbol_type (INT, text_to_int (yytext);
":" return yy::parser::symbol_type (:);
<<EOF>> return yy::parser::symbol_type (0);
*** C++: Variadic emplace
If your application requires C++11, you may now use a variadic emplace for
semantic values:
If your application requires C++11 and you don't use symbol constructors,
you may now use a variadic emplace for semantic values:
%define api.value.type variant
%token <std::pair<int, int>> PAIR

View File

@@ -332,7 +332,17 @@ m4_define([b4_symbol_type_declare],
};
/// "External" symbols: returned by the scanner.
typedef basic_symbol<by_type> symbol_type;
struct symbol_type : basic_symbol<by_type>
{]b4_variant_if([[
/// Superclass.
typedef basic_symbol<by_type> super_type;
/// Empty symbol.
symbol_type () {};
/// Constructor for valueless symbols, and symbols from each type.
]b4_type_foreach([_b4_symbol_constructor_declare])[
]])[};
]])

View File

@@ -368,25 +368,21 @@ m4_define([_b4_token_maker_declare],
])])
# _b4_type_constructor_declare(SYMBOL-NUM...)
# -------------------------------------------
# _b4_symbol_constructor_declare(SYMBOL-NUM...)
# ---------------------------------------------
# Declare a unique make_symbol for all the SYMBOL-NUM (they
# have the same type). Use at class-level.
m4_define([_b4_type_constructor_declare],
m4_define([_b4_symbol_constructor_declare],
[m4_ifval(_b4_includes_tokens($@),
[#if 201103L <= YY_CPLUSPLUS
static
symbol_type
make_symbol (dnl
b4_join([int tok],
symbol_type (b4_join(
[int tok],
b4_symbol_if([$1], [has_type],
[b4_symbol([$1], [type]) v]),
b4_locations_if([location_type l])));
#else
static
symbol_type
make_symbol (dnl
b4_join([int tok],
symbol_type (b4_join(
[int tok],
b4_symbol_if([$1], [has_type],
[const b4_symbol([$1], [type])& v]),
b4_locations_if([const location_type& l])));
@@ -399,7 +395,6 @@ b4_join([int tok],
# Declare symbol constructors. Use at class-level.
m4_define([b4_symbol_constructor_declare],
[ // Symbol constructors declarations.
b4_type_foreach([_b4_type_constructor_declare])
b4_symbol_foreach([_b4_token_maker_declare])])
@@ -437,8 +432,8 @@ m4_define([_b4_token_maker_define],
])])
# _b4_type_constructor_define(SYMBOL-NUM...)
# ------------------------------------------
# _b4_symbol_constructor_define(SYMBOL-NUM...)
# --------------------------------------------
# Declare a unique make_symbol for all the SYMBOL-NUM (they
# have the same type). Use at class-level.
m4_define([_b4_type_clause],
@@ -447,38 +442,36 @@ m4_define([_b4_type_clause],
[tok == token::b4_symbol([$1], [id])],
[tok == b4_symbol([$1], [user_number])])])])
m4_define([_b4_type_constructor_define],
m4_define([_b4_symbol_constructor_define],
[m4_ifval(_b4_includes_tokens($@),
[#if 201103L <= YY_CPLUSPLUS
[[#if 201103L <= YY_CPLUSPLUS
inline
b4_parser_class_name::symbol_type
b4_parser_class_name::make_symbol (dnl
b4_join([int tok],
]b4_parser_class_name[::symbol_type::symbol_type (]b4_join(
[int tok],
b4_symbol_if([$1], [has_type],
[b4_symbol([$1], [type]) v]),
b4_locations_if([location_type l])))
{b4_parse_assert_if([
assert (m4_join([ || ], m4_map_sep([_b4_type_clause], [, ], [$@])));])[
return symbol_type (]b4_join([token_type (tok)],
b4_symbol_if([$1], [has_type], [std::move (v)]),
b4_locations_if([std::move (l)])));
b4_locations_if([location_type l]))[)
: super_type(]b4_join([token_type (tok)],
b4_symbol_if([$1], [has_type], [std::move (v)]),
b4_locations_if([std::move (l)]))[)
{
YYASSERT (]m4_join([ || ], m4_map_sep([_b4_type_clause], [, ], [$@]))[);
}
#else
inline
b4_parser_class_name::symbol_type
b4_parser_class_name::make_symbol (dnl
b4_join([int tok],
]b4_parser_class_name[::symbol_type::symbol_type (]b4_join(
[int tok],
b4_symbol_if([$1], [has_type],
[const b4_symbol([$1], [type])& v]),
b4_locations_if([const location_type& l])))
{b4_parse_assert_if([
assert (m4_join([ || ], m4_map_sep([_b4_type_clause], [, ], [$@])));])[
return symbol_type (]b4_join([token_type (tok)],
b4_symbol_if([$1], [has_type], [v]),
b4_locations_if([l])));
b4_locations_if([const location_type& l]))[)
: super_type(]b4_join([token_type (tok)],
b4_symbol_if([$1], [has_type], [v]),
b4_locations_if([l]))[)
{
YYASSERT (]m4_join([ || ], m4_map_sep([_b4_type_clause], [, ], [$@]))[);
}
#endif
])])
]])])
# b4_basic_symbol_constructor_declare(SYMBOL-NUM)
@@ -532,5 +525,5 @@ m4_define([b4_basic_symbol_constructor_define],
# Define the overloaded versions of make_symbol for all the value types.
m4_define([b4_symbol_constructor_define],
[ // Implementation of make_symbol for each symbol type.
b4_type_foreach([_b4_type_constructor_define])
b4_type_foreach([_b4_symbol_constructor_define])
b4_symbol_foreach([_b4_token_maker_define])])

View File

@@ -11500,6 +11500,57 @@ additional arguments.
For each token type, Bison generates named constructors as follows.
@deftypeop {Constructor} {parser::symbol_type} {} {symbol_type} (int @var{token}, const @var{value_type}& @var{value}, const location_type& @var{location})
@deftypeopx {Constructor} {parser::symbol_type} {} {symbol_type} (int @var{token}, const location_type& @var{location})
@deftypeopx {Constructor} {parser::symbol_type} {} {symbol_type} (int @var{token}, const @var{value_type}& @var{value})
@deftypeopx {Constructor} {parser::symbol_type} {} {symbol_type} (int @var{token})
Build a complete terminal symbol for the token type @var{token} (including
the @code{api.token.prefix}), whose semantic value, if it has one, is
@var{value} of adequate @var{value_type}. Pass the @var{location} iff
location tracking is enabled.
Consistency between @var{token} and @var{value_type} is checked via an
@code{assert}.
@end deftypeop
For instance, given the following declarations:
@example
%define api.token.prefix @{TOK_@}
%token <std::string> IDENTIFIER;
%token <int> INTEGER;
%token ':';
@end example
@noindent
you may use these constructors:
@example
symbol_type (int token, const std::string&, const location_type&);
symbol_type (int token, const int&, const location_type&);
symbol_type (int token, const location_type&);
@end example
@noindent
which should be used in a Flex-scanner as follows.
@example
%%
[a-z]+ return yy::parser::symbol_type (TOK_IDENTIFIER, yytext, loc);
[0-9]+ return yy::parser::symbol_type (TOK_INTEGER, text_to_int (yytext), loc);
":" return yy::parser::symbol_type (':', loc);
<<EOF>> return yy::parser::symbol_type (0, loc);
@end example
@sp 1
Note that it is possible to generate and compile type incorrect code
(e.g. @samp{symbol_type (':', yytext, loc)}). It will fail at run time,
provided the assertions are enabled (i.e., @option{-DNDEBUG} was not passed
to the compiler). Bison supports an alternative that guarantees that type
incorrect code will not even compile. Indeed, it generates @emph{named
constructors} as follows.
@deftypemethod {parser} {symbol_type} {make_@var{token}} (const @var{value_type}& @var{value}, const location_type& @var{location})
@deftypemethodx {parser} {symbol_type} {make_@var{token}} (const location_type& @var{location})
@deftypemethodx {parser} {symbol_type} {make_@var{token}} (const @var{value_type}& @var{value})
@@ -11531,7 +11582,7 @@ symbol_type make_EOF (const location_type&);
@end example
@noindent
which should be used in a Flex-scanner as follows.
which should be used in a scanner as follows.
@example
[a-z]+ return yy::parser::make_IDENTIFIER (yytext, loc);
@@ -11544,6 +11595,7 @@ Tokens that do not have an identifier are not accessible: you cannot simply
use characters such as @code{':'}, they must be declared with @code{%token},
including the end-of-file token.
@node A Complete C++ Example
@subsection A Complete C++ Example

View File

@@ -288,6 +288,24 @@ m4_foreach([b4_skel], [[yacc.c], [glr.c], [lalr1.cc], [glr.cc]],
AT_VAL.build (std::pair<std::string, std::string> ("two", "deux"));],
[10:11, two:deux])
# Type-based token constructors on move-only types, and types with commas.
AT_TEST([%skeleton "]b4_skel["
%define api.value.type variant
%define api.token.constructor],
[[%token <std::pair<int, int>> '1' '2';]],
['1' '2'
{
std::cout << $1.first << ':' << $1.second << ", "
<< $2.first << ':' << $2.second << '\n';
}],
["12"],
[[typedef yy::parser::symbol_type symbol;
if (res)
return symbol (res, std::make_pair (res - '0', res - '0' + 1));
else
return symbol (res)]],
[1:2, 2:3])
# Move-only types, and variadic emplace.
AT_TEST([%skeleton "]b4_skel["
%code requires { #include <memory> }
@@ -336,11 +354,11 @@ m4_foreach([b4_skel], [[yacc.c], [glr.c], [lalr1.cc], [glr.cc]],
<< $2.first << ':' << $2.second << '\n'; }],
["12"],
[[if (res == '1')
return yy::parser::make_symbol ('1', std::make_unique<int> (10));
return {res, std::make_unique<int> (10)};
else if (res == '2')
return yy::parser::make_symbol ('2', std::make_pair (21, 22));
return {res, std::make_pair (21, 22)};
else
return yy::parser::make_symbol (0)]],
return res]],
[10, 21:22],
[AT_REQUIRE_CXX_STD(14, [echo "$at_std not supported"; continue])])