mirror of
https://git.savannah.gnu.org/git/bison.git
synced 2026-03-09 04:13:03 +00:00
c++: exhibit a safe symbol_type
Instead of introducing make_symbol (whose name, btw, somewhat
infringes on the user's "name space", if she defines a token named
"symbol"), let's make the construction of symbol_type safer, using
assertions.
For instance with:
%token ':' <std::string> ID <int> INT;
generate:
symbol_type (int token, const std::string&);
symbol_type (int token, const int&);
symbol_type (int token);
It does mean that now named token constructors (make_ID, make_INT,
etc.) go through a useless assert, but I think we can ignore this: I
assume any decent compiler will inline the symbol_type ctor inside the
make_TOKEN functions, which will show that the assert is trivially
verified, hence I expect no code will be emitted for it. And anyway,
that's an assert, NDEBUG controls it.
* data/c++.m4 (symbol_type): Turn into a subclass of
basic_symbol<by_type>.
Declare symbol constructors when variants are enabled.
* data/variant.hh (_b4_type_constructor_declare)
(_b4_type_constructor_define): Replace with...
(_b4_symbol_constructor_declare, _b4_symbol_constructor_def): these.
Generate symbol_type constructors.
* doc/bison.texi (Complete Symbols): Document.
* tests/types.at: Check.
This commit is contained in:
30
NEWS
30
NEWS
@@ -96,10 +96,36 @@ GNU Bison NEWS
|
||||
until it sees the '='. So we notate the two possible reductions to
|
||||
indicate that each conflicts in one rule.
|
||||
|
||||
*** C++: Actual token constructors
|
||||
|
||||
When variants and token constructors are enabled, in addition to the
|
||||
type-safe named token constructors (make_ID, amke_INT, etc.), we now
|
||||
generate genuine constructors for symbol_type.
|
||||
|
||||
For instance with these declarations
|
||||
|
||||
%token ':'
|
||||
<std::string> ID
|
||||
<int> INT;
|
||||
|
||||
you may use these constructors:
|
||||
|
||||
symbol_type (int token, const std::string&);
|
||||
symbol_type (int token, const int&);
|
||||
symbol_type (int token);
|
||||
|
||||
which should be used in a Flex-scanner as follows.
|
||||
|
||||
%%
|
||||
[a-z]+ return yy::parser::symbol_type (ID, yytext);
|
||||
[0-9]+ return yy::parser::symbol_type (INT, text_to_int (yytext);
|
||||
":" return yy::parser::symbol_type (’:’);
|
||||
<<EOF>> return yy::parser::symbol_type (0);
|
||||
|
||||
*** C++: Variadic emplace
|
||||
|
||||
If your application requires C++11, you may now use a variadic emplace for
|
||||
semantic values:
|
||||
If your application requires C++11 and you don't use symbol constructors,
|
||||
you may now use a variadic emplace for semantic values:
|
||||
|
||||
%define api.value.type variant
|
||||
%token <std::pair<int, int>> PAIR
|
||||
|
||||
12
data/c++.m4
12
data/c++.m4
@@ -332,7 +332,17 @@ m4_define([b4_symbol_type_declare],
|
||||
};
|
||||
|
||||
/// "External" symbols: returned by the scanner.
|
||||
typedef basic_symbol<by_type> symbol_type;
|
||||
struct symbol_type : basic_symbol<by_type>
|
||||
{]b4_variant_if([[
|
||||
/// Superclass.
|
||||
typedef basic_symbol<by_type> super_type;
|
||||
|
||||
/// Empty symbol.
|
||||
symbol_type () {};
|
||||
|
||||
/// Constructor for valueless symbols, and symbols from each type.
|
||||
]b4_type_foreach([_b4_symbol_constructor_declare])[
|
||||
]])[};
|
||||
]])
|
||||
|
||||
|
||||
|
||||
@@ -368,25 +368,21 @@ m4_define([_b4_token_maker_declare],
|
||||
])])
|
||||
|
||||
|
||||
# _b4_type_constructor_declare(SYMBOL-NUM...)
|
||||
# -------------------------------------------
|
||||
# _b4_symbol_constructor_declare(SYMBOL-NUM...)
|
||||
# ---------------------------------------------
|
||||
# Declare a unique make_symbol for all the SYMBOL-NUM (they
|
||||
# have the same type). Use at class-level.
|
||||
m4_define([_b4_type_constructor_declare],
|
||||
m4_define([_b4_symbol_constructor_declare],
|
||||
[m4_ifval(_b4_includes_tokens($@),
|
||||
[#if 201103L <= YY_CPLUSPLUS
|
||||
static
|
||||
symbol_type
|
||||
make_symbol (dnl
|
||||
b4_join([int tok],
|
||||
symbol_type (b4_join(
|
||||
[int tok],
|
||||
b4_symbol_if([$1], [has_type],
|
||||
[b4_symbol([$1], [type]) v]),
|
||||
b4_locations_if([location_type l])));
|
||||
#else
|
||||
static
|
||||
symbol_type
|
||||
make_symbol (dnl
|
||||
b4_join([int tok],
|
||||
symbol_type (b4_join(
|
||||
[int tok],
|
||||
b4_symbol_if([$1], [has_type],
|
||||
[const b4_symbol([$1], [type])& v]),
|
||||
b4_locations_if([const location_type& l])));
|
||||
@@ -399,7 +395,6 @@ b4_join([int tok],
|
||||
# Declare symbol constructors. Use at class-level.
|
||||
m4_define([b4_symbol_constructor_declare],
|
||||
[ // Symbol constructors declarations.
|
||||
b4_type_foreach([_b4_type_constructor_declare])
|
||||
b4_symbol_foreach([_b4_token_maker_declare])])
|
||||
|
||||
|
||||
@@ -437,8 +432,8 @@ m4_define([_b4_token_maker_define],
|
||||
])])
|
||||
|
||||
|
||||
# _b4_type_constructor_define(SYMBOL-NUM...)
|
||||
# ------------------------------------------
|
||||
# _b4_symbol_constructor_define(SYMBOL-NUM...)
|
||||
# --------------------------------------------
|
||||
# Declare a unique make_symbol for all the SYMBOL-NUM (they
|
||||
# have the same type). Use at class-level.
|
||||
m4_define([_b4_type_clause],
|
||||
@@ -447,38 +442,36 @@ m4_define([_b4_type_clause],
|
||||
[tok == token::b4_symbol([$1], [id])],
|
||||
[tok == b4_symbol([$1], [user_number])])])])
|
||||
|
||||
m4_define([_b4_type_constructor_define],
|
||||
m4_define([_b4_symbol_constructor_define],
|
||||
[m4_ifval(_b4_includes_tokens($@),
|
||||
[#if 201103L <= YY_CPLUSPLUS
|
||||
[[#if 201103L <= YY_CPLUSPLUS
|
||||
inline
|
||||
b4_parser_class_name::symbol_type
|
||||
b4_parser_class_name::make_symbol (dnl
|
||||
b4_join([int tok],
|
||||
]b4_parser_class_name[::symbol_type::symbol_type (]b4_join(
|
||||
[int tok],
|
||||
b4_symbol_if([$1], [has_type],
|
||||
[b4_symbol([$1], [type]) v]),
|
||||
b4_locations_if([location_type l])))
|
||||
{b4_parse_assert_if([
|
||||
assert (m4_join([ || ], m4_map_sep([_b4_type_clause], [, ], [$@])));])[
|
||||
return symbol_type (]b4_join([token_type (tok)],
|
||||
b4_symbol_if([$1], [has_type], [std::move (v)]),
|
||||
b4_locations_if([std::move (l)])));
|
||||
b4_locations_if([location_type l]))[)
|
||||
: super_type(]b4_join([token_type (tok)],
|
||||
b4_symbol_if([$1], [has_type], [std::move (v)]),
|
||||
b4_locations_if([std::move (l)]))[)
|
||||
{
|
||||
YYASSERT (]m4_join([ || ], m4_map_sep([_b4_type_clause], [, ], [$@]))[);
|
||||
}
|
||||
#else
|
||||
inline
|
||||
b4_parser_class_name::symbol_type
|
||||
b4_parser_class_name::make_symbol (dnl
|
||||
b4_join([int tok],
|
||||
]b4_parser_class_name[::symbol_type::symbol_type (]b4_join(
|
||||
[int tok],
|
||||
b4_symbol_if([$1], [has_type],
|
||||
[const b4_symbol([$1], [type])& v]),
|
||||
b4_locations_if([const location_type& l])))
|
||||
{b4_parse_assert_if([
|
||||
assert (m4_join([ || ], m4_map_sep([_b4_type_clause], [, ], [$@])));])[
|
||||
return symbol_type (]b4_join([token_type (tok)],
|
||||
b4_symbol_if([$1], [has_type], [v]),
|
||||
b4_locations_if([l])));
|
||||
b4_locations_if([const location_type& l]))[)
|
||||
: super_type(]b4_join([token_type (tok)],
|
||||
b4_symbol_if([$1], [has_type], [v]),
|
||||
b4_locations_if([l]))[)
|
||||
{
|
||||
YYASSERT (]m4_join([ || ], m4_map_sep([_b4_type_clause], [, ], [$@]))[);
|
||||
}
|
||||
#endif
|
||||
])])
|
||||
]])])
|
||||
|
||||
|
||||
# b4_basic_symbol_constructor_declare(SYMBOL-NUM)
|
||||
@@ -532,5 +525,5 @@ m4_define([b4_basic_symbol_constructor_define],
|
||||
# Define the overloaded versions of make_symbol for all the value types.
|
||||
m4_define([b4_symbol_constructor_define],
|
||||
[ // Implementation of make_symbol for each symbol type.
|
||||
b4_type_foreach([_b4_type_constructor_define])
|
||||
b4_type_foreach([_b4_symbol_constructor_define])
|
||||
b4_symbol_foreach([_b4_token_maker_define])])
|
||||
|
||||
@@ -11500,6 +11500,57 @@ additional arguments.
|
||||
|
||||
For each token type, Bison generates named constructors as follows.
|
||||
|
||||
@deftypeop {Constructor} {parser::symbol_type} {} {symbol_type} (int @var{token}, const @var{value_type}& @var{value}, const location_type& @var{location})
|
||||
@deftypeopx {Constructor} {parser::symbol_type} {} {symbol_type} (int @var{token}, const location_type& @var{location})
|
||||
@deftypeopx {Constructor} {parser::symbol_type} {} {symbol_type} (int @var{token}, const @var{value_type}& @var{value})
|
||||
@deftypeopx {Constructor} {parser::symbol_type} {} {symbol_type} (int @var{token})
|
||||
Build a complete terminal symbol for the token type @var{token} (including
|
||||
the @code{api.token.prefix}), whose semantic value, if it has one, is
|
||||
@var{value} of adequate @var{value_type}. Pass the @var{location} iff
|
||||
location tracking is enabled.
|
||||
|
||||
Consistency between @var{token} and @var{value_type} is checked via an
|
||||
@code{assert}.
|
||||
@end deftypeop
|
||||
|
||||
For instance, given the following declarations:
|
||||
|
||||
@example
|
||||
%define api.token.prefix @{TOK_@}
|
||||
%token <std::string> IDENTIFIER;
|
||||
%token <int> INTEGER;
|
||||
%token ':';
|
||||
@end example
|
||||
|
||||
@noindent
|
||||
you may use these constructors:
|
||||
|
||||
@example
|
||||
symbol_type (int token, const std::string&, const location_type&);
|
||||
symbol_type (int token, const int&, const location_type&);
|
||||
symbol_type (int token, const location_type&);
|
||||
@end example
|
||||
|
||||
@noindent
|
||||
which should be used in a Flex-scanner as follows.
|
||||
|
||||
@example
|
||||
%%
|
||||
[a-z]+ return yy::parser::symbol_type (TOK_IDENTIFIER, yytext, loc);
|
||||
[0-9]+ return yy::parser::symbol_type (TOK_INTEGER, text_to_int (yytext), loc);
|
||||
":" return yy::parser::symbol_type (':', loc);
|
||||
<<EOF>> return yy::parser::symbol_type (0, loc);
|
||||
@end example
|
||||
|
||||
@sp 1
|
||||
|
||||
Note that it is possible to generate and compile type incorrect code
|
||||
(e.g. @samp{symbol_type (':', yytext, loc)}). It will fail at run time,
|
||||
provided the assertions are enabled (i.e., @option{-DNDEBUG} was not passed
|
||||
to the compiler). Bison supports an alternative that guarantees that type
|
||||
incorrect code will not even compile. Indeed, it generates @emph{named
|
||||
constructors} as follows.
|
||||
|
||||
@deftypemethod {parser} {symbol_type} {make_@var{token}} (const @var{value_type}& @var{value}, const location_type& @var{location})
|
||||
@deftypemethodx {parser} {symbol_type} {make_@var{token}} (const location_type& @var{location})
|
||||
@deftypemethodx {parser} {symbol_type} {make_@var{token}} (const @var{value_type}& @var{value})
|
||||
@@ -11531,7 +11582,7 @@ symbol_type make_EOF (const location_type&);
|
||||
@end example
|
||||
|
||||
@noindent
|
||||
which should be used in a Flex-scanner as follows.
|
||||
which should be used in a scanner as follows.
|
||||
|
||||
@example
|
||||
[a-z]+ return yy::parser::make_IDENTIFIER (yytext, loc);
|
||||
@@ -11544,6 +11595,7 @@ Tokens that do not have an identifier are not accessible: you cannot simply
|
||||
use characters such as @code{':'}, they must be declared with @code{%token},
|
||||
including the end-of-file token.
|
||||
|
||||
|
||||
@node A Complete C++ Example
|
||||
@subsection A Complete C++ Example
|
||||
|
||||
|
||||
@@ -288,6 +288,24 @@ m4_foreach([b4_skel], [[yacc.c], [glr.c], [lalr1.cc], [glr.cc]],
|
||||
AT_VAL.build (std::pair<std::string, std::string> ("two", "deux"));],
|
||||
[10:11, two:deux])
|
||||
|
||||
# Type-based token constructors on move-only types, and types with commas.
|
||||
AT_TEST([%skeleton "]b4_skel["
|
||||
%define api.value.type variant
|
||||
%define api.token.constructor],
|
||||
[[%token <std::pair<int, int>> '1' '2';]],
|
||||
['1' '2'
|
||||
{
|
||||
std::cout << $1.first << ':' << $1.second << ", "
|
||||
<< $2.first << ':' << $2.second << '\n';
|
||||
}],
|
||||
["12"],
|
||||
[[typedef yy::parser::symbol_type symbol;
|
||||
if (res)
|
||||
return symbol (res, std::make_pair (res - '0', res - '0' + 1));
|
||||
else
|
||||
return symbol (res)]],
|
||||
[1:2, 2:3])
|
||||
|
||||
# Move-only types, and variadic emplace.
|
||||
AT_TEST([%skeleton "]b4_skel["
|
||||
%code requires { #include <memory> }
|
||||
@@ -336,11 +354,11 @@ m4_foreach([b4_skel], [[yacc.c], [glr.c], [lalr1.cc], [glr.cc]],
|
||||
<< $2.first << ':' << $2.second << '\n'; }],
|
||||
["12"],
|
||||
[[if (res == '1')
|
||||
return yy::parser::make_symbol ('1', std::make_unique<int> (10));
|
||||
return {res, std::make_unique<int> (10)};
|
||||
else if (res == '2')
|
||||
return yy::parser::make_symbol ('2', std::make_pair (21, 22));
|
||||
return {res, std::make_pair (21, 22)};
|
||||
else
|
||||
return yy::parser::make_symbol (0)]],
|
||||
return res]],
|
||||
[10, 21:22],
|
||||
[AT_REQUIRE_CXX_STD(14, [echo "$at_std not supported"; continue])])
|
||||
|
||||
|
||||
Reference in New Issue
Block a user