mirror of
https://git.savannah.gnu.org/git/bison.git
synced 2026-03-14 23:03:04 +00:00
c++: exhibit a safe symbol_type
Instead of introducing make_symbol (whose name, btw, somewhat
infringes on the user's "name space", if she defines a token named
"symbol"), let's make the construction of symbol_type safer, using
assertions.
For instance with:
%token ':' <std::string> ID <int> INT;
generate:
symbol_type (int token, const std::string&);
symbol_type (int token, const int&);
symbol_type (int token);
It does mean that now named token constructors (make_ID, make_INT,
etc.) go through a useless assert, but I think we can ignore this: I
assume any decent compiler will inline the symbol_type ctor inside the
make_TOKEN functions, which will show that the assert is trivially
verified, hence I expect no code will be emitted for it. And anyway,
that's an assert, NDEBUG controls it.
* data/c++.m4 (symbol_type): Turn into a subclass of
basic_symbol<by_type>.
Declare symbol constructors when variants are enabled.
* data/variant.hh (_b4_type_constructor_declare)
(_b4_type_constructor_define): Replace with...
(_b4_symbol_constructor_declare, _b4_symbol_constructor_def): these.
Generate symbol_type constructors.
* doc/bison.texi (Complete Symbols): Document.
* tests/types.at: Check.
This commit is contained in:
30
NEWS
30
NEWS
@@ -96,10 +96,36 @@ GNU Bison NEWS
|
|||||||
until it sees the '='. So we notate the two possible reductions to
|
until it sees the '='. So we notate the two possible reductions to
|
||||||
indicate that each conflicts in one rule.
|
indicate that each conflicts in one rule.
|
||||||
|
|
||||||
|
*** C++: Actual token constructors
|
||||||
|
|
||||||
|
When variants and token constructors are enabled, in addition to the
|
||||||
|
type-safe named token constructors (make_ID, amke_INT, etc.), we now
|
||||||
|
generate genuine constructors for symbol_type.
|
||||||
|
|
||||||
|
For instance with these declarations
|
||||||
|
|
||||||
|
%token ':'
|
||||||
|
<std::string> ID
|
||||||
|
<int> INT;
|
||||||
|
|
||||||
|
you may use these constructors:
|
||||||
|
|
||||||
|
symbol_type (int token, const std::string&);
|
||||||
|
symbol_type (int token, const int&);
|
||||||
|
symbol_type (int token);
|
||||||
|
|
||||||
|
which should be used in a Flex-scanner as follows.
|
||||||
|
|
||||||
|
%%
|
||||||
|
[a-z]+ return yy::parser::symbol_type (ID, yytext);
|
||||||
|
[0-9]+ return yy::parser::symbol_type (INT, text_to_int (yytext);
|
||||||
|
":" return yy::parser::symbol_type (’:’);
|
||||||
|
<<EOF>> return yy::parser::symbol_type (0);
|
||||||
|
|
||||||
*** C++: Variadic emplace
|
*** C++: Variadic emplace
|
||||||
|
|
||||||
If your application requires C++11, you may now use a variadic emplace for
|
If your application requires C++11 and you don't use symbol constructors,
|
||||||
semantic values:
|
you may now use a variadic emplace for semantic values:
|
||||||
|
|
||||||
%define api.value.type variant
|
%define api.value.type variant
|
||||||
%token <std::pair<int, int>> PAIR
|
%token <std::pair<int, int>> PAIR
|
||||||
|
|||||||
12
data/c++.m4
12
data/c++.m4
@@ -332,7 +332,17 @@ m4_define([b4_symbol_type_declare],
|
|||||||
};
|
};
|
||||||
|
|
||||||
/// "External" symbols: returned by the scanner.
|
/// "External" symbols: returned by the scanner.
|
||||||
typedef basic_symbol<by_type> symbol_type;
|
struct symbol_type : basic_symbol<by_type>
|
||||||
|
{]b4_variant_if([[
|
||||||
|
/// Superclass.
|
||||||
|
typedef basic_symbol<by_type> super_type;
|
||||||
|
|
||||||
|
/// Empty symbol.
|
||||||
|
symbol_type () {};
|
||||||
|
|
||||||
|
/// Constructor for valueless symbols, and symbols from each type.
|
||||||
|
]b4_type_foreach([_b4_symbol_constructor_declare])[
|
||||||
|
]])[};
|
||||||
]])
|
]])
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -368,25 +368,21 @@ m4_define([_b4_token_maker_declare],
|
|||||||
])])
|
])])
|
||||||
|
|
||||||
|
|
||||||
# _b4_type_constructor_declare(SYMBOL-NUM...)
|
# _b4_symbol_constructor_declare(SYMBOL-NUM...)
|
||||||
# -------------------------------------------
|
# ---------------------------------------------
|
||||||
# Declare a unique make_symbol for all the SYMBOL-NUM (they
|
# Declare a unique make_symbol for all the SYMBOL-NUM (they
|
||||||
# have the same type). Use at class-level.
|
# have the same type). Use at class-level.
|
||||||
m4_define([_b4_type_constructor_declare],
|
m4_define([_b4_symbol_constructor_declare],
|
||||||
[m4_ifval(_b4_includes_tokens($@),
|
[m4_ifval(_b4_includes_tokens($@),
|
||||||
[#if 201103L <= YY_CPLUSPLUS
|
[#if 201103L <= YY_CPLUSPLUS
|
||||||
static
|
symbol_type (b4_join(
|
||||||
symbol_type
|
[int tok],
|
||||||
make_symbol (dnl
|
|
||||||
b4_join([int tok],
|
|
||||||
b4_symbol_if([$1], [has_type],
|
b4_symbol_if([$1], [has_type],
|
||||||
[b4_symbol([$1], [type]) v]),
|
[b4_symbol([$1], [type]) v]),
|
||||||
b4_locations_if([location_type l])));
|
b4_locations_if([location_type l])));
|
||||||
#else
|
#else
|
||||||
static
|
symbol_type (b4_join(
|
||||||
symbol_type
|
[int tok],
|
||||||
make_symbol (dnl
|
|
||||||
b4_join([int tok],
|
|
||||||
b4_symbol_if([$1], [has_type],
|
b4_symbol_if([$1], [has_type],
|
||||||
[const b4_symbol([$1], [type])& v]),
|
[const b4_symbol([$1], [type])& v]),
|
||||||
b4_locations_if([const location_type& l])));
|
b4_locations_if([const location_type& l])));
|
||||||
@@ -399,7 +395,6 @@ b4_join([int tok],
|
|||||||
# Declare symbol constructors. Use at class-level.
|
# Declare symbol constructors. Use at class-level.
|
||||||
m4_define([b4_symbol_constructor_declare],
|
m4_define([b4_symbol_constructor_declare],
|
||||||
[ // Symbol constructors declarations.
|
[ // Symbol constructors declarations.
|
||||||
b4_type_foreach([_b4_type_constructor_declare])
|
|
||||||
b4_symbol_foreach([_b4_token_maker_declare])])
|
b4_symbol_foreach([_b4_token_maker_declare])])
|
||||||
|
|
||||||
|
|
||||||
@@ -437,8 +432,8 @@ m4_define([_b4_token_maker_define],
|
|||||||
])])
|
])])
|
||||||
|
|
||||||
|
|
||||||
# _b4_type_constructor_define(SYMBOL-NUM...)
|
# _b4_symbol_constructor_define(SYMBOL-NUM...)
|
||||||
# ------------------------------------------
|
# --------------------------------------------
|
||||||
# Declare a unique make_symbol for all the SYMBOL-NUM (they
|
# Declare a unique make_symbol for all the SYMBOL-NUM (they
|
||||||
# have the same type). Use at class-level.
|
# have the same type). Use at class-level.
|
||||||
m4_define([_b4_type_clause],
|
m4_define([_b4_type_clause],
|
||||||
@@ -447,38 +442,36 @@ m4_define([_b4_type_clause],
|
|||||||
[tok == token::b4_symbol([$1], [id])],
|
[tok == token::b4_symbol([$1], [id])],
|
||||||
[tok == b4_symbol([$1], [user_number])])])])
|
[tok == b4_symbol([$1], [user_number])])])])
|
||||||
|
|
||||||
m4_define([_b4_type_constructor_define],
|
m4_define([_b4_symbol_constructor_define],
|
||||||
[m4_ifval(_b4_includes_tokens($@),
|
[m4_ifval(_b4_includes_tokens($@),
|
||||||
[#if 201103L <= YY_CPLUSPLUS
|
[[#if 201103L <= YY_CPLUSPLUS
|
||||||
inline
|
inline
|
||||||
b4_parser_class_name::symbol_type
|
]b4_parser_class_name[::symbol_type::symbol_type (]b4_join(
|
||||||
b4_parser_class_name::make_symbol (dnl
|
[int tok],
|
||||||
b4_join([int tok],
|
|
||||||
b4_symbol_if([$1], [has_type],
|
b4_symbol_if([$1], [has_type],
|
||||||
[b4_symbol([$1], [type]) v]),
|
[b4_symbol([$1], [type]) v]),
|
||||||
b4_locations_if([location_type l])))
|
b4_locations_if([location_type l]))[)
|
||||||
{b4_parse_assert_if([
|
: super_type(]b4_join([token_type (tok)],
|
||||||
assert (m4_join([ || ], m4_map_sep([_b4_type_clause], [, ], [$@])));])[
|
b4_symbol_if([$1], [has_type], [std::move (v)]),
|
||||||
return symbol_type (]b4_join([token_type (tok)],
|
b4_locations_if([std::move (l)]))[)
|
||||||
b4_symbol_if([$1], [has_type], [std::move (v)]),
|
{
|
||||||
b4_locations_if([std::move (l)])));
|
YYASSERT (]m4_join([ || ], m4_map_sep([_b4_type_clause], [, ], [$@]))[);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
inline
|
inline
|
||||||
b4_parser_class_name::symbol_type
|
]b4_parser_class_name[::symbol_type::symbol_type (]b4_join(
|
||||||
b4_parser_class_name::make_symbol (dnl
|
[int tok],
|
||||||
b4_join([int tok],
|
|
||||||
b4_symbol_if([$1], [has_type],
|
b4_symbol_if([$1], [has_type],
|
||||||
[const b4_symbol([$1], [type])& v]),
|
[const b4_symbol([$1], [type])& v]),
|
||||||
b4_locations_if([const location_type& l])))
|
b4_locations_if([const location_type& l]))[)
|
||||||
{b4_parse_assert_if([
|
: super_type(]b4_join([token_type (tok)],
|
||||||
assert (m4_join([ || ], m4_map_sep([_b4_type_clause], [, ], [$@])));])[
|
b4_symbol_if([$1], [has_type], [v]),
|
||||||
return symbol_type (]b4_join([token_type (tok)],
|
b4_locations_if([l]))[)
|
||||||
b4_symbol_if([$1], [has_type], [v]),
|
{
|
||||||
b4_locations_if([l])));
|
YYASSERT (]m4_join([ || ], m4_map_sep([_b4_type_clause], [, ], [$@]))[);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
])])
|
]])])
|
||||||
|
|
||||||
|
|
||||||
# b4_basic_symbol_constructor_declare(SYMBOL-NUM)
|
# b4_basic_symbol_constructor_declare(SYMBOL-NUM)
|
||||||
@@ -532,5 +525,5 @@ m4_define([b4_basic_symbol_constructor_define],
|
|||||||
# Define the overloaded versions of make_symbol for all the value types.
|
# Define the overloaded versions of make_symbol for all the value types.
|
||||||
m4_define([b4_symbol_constructor_define],
|
m4_define([b4_symbol_constructor_define],
|
||||||
[ // Implementation of make_symbol for each symbol type.
|
[ // Implementation of make_symbol for each symbol type.
|
||||||
b4_type_foreach([_b4_type_constructor_define])
|
b4_type_foreach([_b4_symbol_constructor_define])
|
||||||
b4_symbol_foreach([_b4_token_maker_define])])
|
b4_symbol_foreach([_b4_token_maker_define])])
|
||||||
|
|||||||
@@ -11500,6 +11500,57 @@ additional arguments.
|
|||||||
|
|
||||||
For each token type, Bison generates named constructors as follows.
|
For each token type, Bison generates named constructors as follows.
|
||||||
|
|
||||||
|
@deftypeop {Constructor} {parser::symbol_type} {} {symbol_type} (int @var{token}, const @var{value_type}& @var{value}, const location_type& @var{location})
|
||||||
|
@deftypeopx {Constructor} {parser::symbol_type} {} {symbol_type} (int @var{token}, const location_type& @var{location})
|
||||||
|
@deftypeopx {Constructor} {parser::symbol_type} {} {symbol_type} (int @var{token}, const @var{value_type}& @var{value})
|
||||||
|
@deftypeopx {Constructor} {parser::symbol_type} {} {symbol_type} (int @var{token})
|
||||||
|
Build a complete terminal symbol for the token type @var{token} (including
|
||||||
|
the @code{api.token.prefix}), whose semantic value, if it has one, is
|
||||||
|
@var{value} of adequate @var{value_type}. Pass the @var{location} iff
|
||||||
|
location tracking is enabled.
|
||||||
|
|
||||||
|
Consistency between @var{token} and @var{value_type} is checked via an
|
||||||
|
@code{assert}.
|
||||||
|
@end deftypeop
|
||||||
|
|
||||||
|
For instance, given the following declarations:
|
||||||
|
|
||||||
|
@example
|
||||||
|
%define api.token.prefix @{TOK_@}
|
||||||
|
%token <std::string> IDENTIFIER;
|
||||||
|
%token <int> INTEGER;
|
||||||
|
%token ':';
|
||||||
|
@end example
|
||||||
|
|
||||||
|
@noindent
|
||||||
|
you may use these constructors:
|
||||||
|
|
||||||
|
@example
|
||||||
|
symbol_type (int token, const std::string&, const location_type&);
|
||||||
|
symbol_type (int token, const int&, const location_type&);
|
||||||
|
symbol_type (int token, const location_type&);
|
||||||
|
@end example
|
||||||
|
|
||||||
|
@noindent
|
||||||
|
which should be used in a Flex-scanner as follows.
|
||||||
|
|
||||||
|
@example
|
||||||
|
%%
|
||||||
|
[a-z]+ return yy::parser::symbol_type (TOK_IDENTIFIER, yytext, loc);
|
||||||
|
[0-9]+ return yy::parser::symbol_type (TOK_INTEGER, text_to_int (yytext), loc);
|
||||||
|
":" return yy::parser::symbol_type (':', loc);
|
||||||
|
<<EOF>> return yy::parser::symbol_type (0, loc);
|
||||||
|
@end example
|
||||||
|
|
||||||
|
@sp 1
|
||||||
|
|
||||||
|
Note that it is possible to generate and compile type incorrect code
|
||||||
|
(e.g. @samp{symbol_type (':', yytext, loc)}). It will fail at run time,
|
||||||
|
provided the assertions are enabled (i.e., @option{-DNDEBUG} was not passed
|
||||||
|
to the compiler). Bison supports an alternative that guarantees that type
|
||||||
|
incorrect code will not even compile. Indeed, it generates @emph{named
|
||||||
|
constructors} as follows.
|
||||||
|
|
||||||
@deftypemethod {parser} {symbol_type} {make_@var{token}} (const @var{value_type}& @var{value}, const location_type& @var{location})
|
@deftypemethod {parser} {symbol_type} {make_@var{token}} (const @var{value_type}& @var{value}, const location_type& @var{location})
|
||||||
@deftypemethodx {parser} {symbol_type} {make_@var{token}} (const location_type& @var{location})
|
@deftypemethodx {parser} {symbol_type} {make_@var{token}} (const location_type& @var{location})
|
||||||
@deftypemethodx {parser} {symbol_type} {make_@var{token}} (const @var{value_type}& @var{value})
|
@deftypemethodx {parser} {symbol_type} {make_@var{token}} (const @var{value_type}& @var{value})
|
||||||
@@ -11531,7 +11582,7 @@ symbol_type make_EOF (const location_type&);
|
|||||||
@end example
|
@end example
|
||||||
|
|
||||||
@noindent
|
@noindent
|
||||||
which should be used in a Flex-scanner as follows.
|
which should be used in a scanner as follows.
|
||||||
|
|
||||||
@example
|
@example
|
||||||
[a-z]+ return yy::parser::make_IDENTIFIER (yytext, loc);
|
[a-z]+ return yy::parser::make_IDENTIFIER (yytext, loc);
|
||||||
@@ -11544,6 +11595,7 @@ Tokens that do not have an identifier are not accessible: you cannot simply
|
|||||||
use characters such as @code{':'}, they must be declared with @code{%token},
|
use characters such as @code{':'}, they must be declared with @code{%token},
|
||||||
including the end-of-file token.
|
including the end-of-file token.
|
||||||
|
|
||||||
|
|
||||||
@node A Complete C++ Example
|
@node A Complete C++ Example
|
||||||
@subsection A Complete C++ Example
|
@subsection A Complete C++ Example
|
||||||
|
|
||||||
|
|||||||
@@ -288,6 +288,24 @@ m4_foreach([b4_skel], [[yacc.c], [glr.c], [lalr1.cc], [glr.cc]],
|
|||||||
AT_VAL.build (std::pair<std::string, std::string> ("two", "deux"));],
|
AT_VAL.build (std::pair<std::string, std::string> ("two", "deux"));],
|
||||||
[10:11, two:deux])
|
[10:11, two:deux])
|
||||||
|
|
||||||
|
# Type-based token constructors on move-only types, and types with commas.
|
||||||
|
AT_TEST([%skeleton "]b4_skel["
|
||||||
|
%define api.value.type variant
|
||||||
|
%define api.token.constructor],
|
||||||
|
[[%token <std::pair<int, int>> '1' '2';]],
|
||||||
|
['1' '2'
|
||||||
|
{
|
||||||
|
std::cout << $1.first << ':' << $1.second << ", "
|
||||||
|
<< $2.first << ':' << $2.second << '\n';
|
||||||
|
}],
|
||||||
|
["12"],
|
||||||
|
[[typedef yy::parser::symbol_type symbol;
|
||||||
|
if (res)
|
||||||
|
return symbol (res, std::make_pair (res - '0', res - '0' + 1));
|
||||||
|
else
|
||||||
|
return symbol (res)]],
|
||||||
|
[1:2, 2:3])
|
||||||
|
|
||||||
# Move-only types, and variadic emplace.
|
# Move-only types, and variadic emplace.
|
||||||
AT_TEST([%skeleton "]b4_skel["
|
AT_TEST([%skeleton "]b4_skel["
|
||||||
%code requires { #include <memory> }
|
%code requires { #include <memory> }
|
||||||
@@ -336,11 +354,11 @@ m4_foreach([b4_skel], [[yacc.c], [glr.c], [lalr1.cc], [glr.cc]],
|
|||||||
<< $2.first << ':' << $2.second << '\n'; }],
|
<< $2.first << ':' << $2.second << '\n'; }],
|
||||||
["12"],
|
["12"],
|
||||||
[[if (res == '1')
|
[[if (res == '1')
|
||||||
return yy::parser::make_symbol ('1', std::make_unique<int> (10));
|
return {res, std::make_unique<int> (10)};
|
||||||
else if (res == '2')
|
else if (res == '2')
|
||||||
return yy::parser::make_symbol ('2', std::make_pair (21, 22));
|
return {res, std::make_pair (21, 22)};
|
||||||
else
|
else
|
||||||
return yy::parser::make_symbol (0)]],
|
return res]],
|
||||||
[10, 21:22],
|
[10, 21:22],
|
||||||
[AT_REQUIRE_CXX_STD(14, [echo "$at_std not supported"; continue])])
|
[AT_REQUIRE_CXX_STD(14, [echo "$at_std not supported"; continue])])
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user