mirror of
https://git.savannah.gnu.org/git/bison.git
synced 2026-03-21 18:23:03 +00:00
yacc.c: also define a symbol number for the empty token
This is not only cleaner, it also protects us from mixing signed
values (YYEMPTY is #defined as -2) with unsigned types (the
yysymbol_type_t enum is typically compiled as a small unsigned).
For instance GCC 9:
input.c: In function 'yyparse':
input.c:1107:7: error: conversion to 'unsigned int' from 'int'
may change the sign of the result
[-Werror=sign-conversion]
1107 | yyn += yytoken;
| ^~
input.c:1107:10: error: conversion to 'int' from 'unsigned int'
may change the sign of the result
[-Werror=sign-conversion]
1107 | yyn += yytoken;
| ^~~~~~~
input.c:1108:47: error: comparison of integer expressions of
different signedness:
'yytype_int8' {aka 'const signed char'} and
'yysymbol_type_t' {aka 'enum yysymbol_type_t'}
[-Werror=sign-compare]
1108 | if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken)
| ^~
input.c:702:25: error: operand of ?: changes signedness from 'int'
to 'unsigned int' due to unsignedness of
other operand [-Werror=sign-compare]
702 | #define YYEMPTY (-2)
| ^~~~
input.c:1220:33: note: in expansion of macro 'YYEMPTY'
1220 | yytoken = yychar == YYEMPTY ? YYEMPTY : YYTRANSLATE (yychar);
| ^~~~~~~
input.c:1220:41: error: unsigned conversion from 'int' to
'unsigned int' changes value
from '-2' to '4294967294'
[-Werror=sign-conversion]
1220 | yytoken = yychar == YYEMPTY ? YYEMPTY : YYTRANSLATE (yychar);
| ^
Eventually, it might be interesting to move away from -2 (which is the
only possible negative symbol number) and use the next available
number, to save bits. We could actually even simply use "0" and shift
the rest, which would allow to write "!yytoken" to mean really
"yytoken != YYEMPTY".
* data/skeletons/c.m4 (b4_declare_symbol_enum): Define YYSYMBOL_YYEMPTY.
* data/skeletons/yacc.c: Use it.
* src/parse-gram.y (yyreport_syntax_error): Use YYSYMBOL_YYEMPTY, not
YYEMPTY, when dealing with a symbol.
* tests/regression.at: Adjust.
This commit is contained in:
16
TODO
16
TODO
@@ -53,6 +53,22 @@ would actually also make the following point gracefully handled (status of
|
|||||||
YYERRCODE, YYUNDEFTOK, etc.). Possibly we could also define YYEMPTY (twice:
|
YYERRCODE, YYUNDEFTOK, etc.). Possibly we could also define YYEMPTY (twice:
|
||||||
as a token and as a symbol). And YYEOF.
|
as a token and as a symbol). And YYEOF.
|
||||||
|
|
||||||
|
It seems to work well. Yet we have a weird case: the "error" token:
|
||||||
|
|
||||||
|
enum yysymbol_type_t
|
||||||
|
{
|
||||||
|
YYSYMBOL_YYEMPTY = -2,
|
||||||
|
YYSYMBOL_YYEOF = 0,
|
||||||
|
YYSYMBOL_error = 1,
|
||||||
|
YYSYMBOL_YYUNDEF = 2,
|
||||||
|
YYSYMBOL_YYACCEPT = 61,
|
||||||
|
...
|
||||||
|
|
||||||
|
YYSYMBOL_error looks weird. We should maybe rename this as
|
||||||
|
"YYSYMBOL_YYERROR", even though it should not be confonded with the YYERROR
|
||||||
|
macro.
|
||||||
|
|
||||||
|
|
||||||
** Consistency
|
** Consistency
|
||||||
YYUNDEFTOK is an internal symbol number, as YYTERROR.
|
YYUNDEFTOK is an internal symbol number, as YYTERROR.
|
||||||
But YYERRCODE is an external token number.
|
But YYERRCODE is an external token number.
|
||||||
|
|||||||
@@ -411,6 +411,7 @@ m4_define([_b4_symbol],
|
|||||||
# if that would produce an invalid symbol.
|
# if that would produce an invalid symbol.
|
||||||
m4_define([b4_symbol_sid],
|
m4_define([b4_symbol_sid],
|
||||||
[m4_case([$1],
|
[m4_case([$1],
|
||||||
|
[-2], [[YYSYMBOL_YYEMPTY]],
|
||||||
[0], [[YYSYMBOL_YYEOF]],
|
[0], [[YYSYMBOL_YYEOF]],
|
||||||
[m4_bmatch(m4_quote(b4_symbol([$1], [tag])),
|
[m4_bmatch(m4_quote(b4_symbol([$1], [tag])),
|
||||||
[^\$accept$], [[YYSYMBOL_YYACCEPT]],
|
[^\$accept$], [[YYSYMBOL_YYACCEPT]],
|
||||||
|
|||||||
@@ -504,12 +504,15 @@ m4_define([b4_symbol_enum],
|
|||||||
# b4_declare_symbol_enum
|
# b4_declare_symbol_enum
|
||||||
# ----------------------
|
# ----------------------
|
||||||
# The definition of the symbol internal numbers as an enum.
|
# The definition of the symbol internal numbers as an enum.
|
||||||
|
# Defining YYEMPTY here is important: it forces the compiler
|
||||||
|
# to use a signed type, which matters for yytoken.
|
||||||
m4_define([b4_declare_symbol_enum],
|
m4_define([b4_declare_symbol_enum],
|
||||||
[[/* Symbol type. */
|
[[/* Symbol type. */
|
||||||
enum yysymbol_type_t
|
enum yysymbol_type_t
|
||||||
{
|
{
|
||||||
]m4_join([,
|
]m4_join([,
|
||||||
],
|
],
|
||||||
|
]b4_symbol_sid([-2])[ = -2,
|
||||||
b4_symbol_map([b4_symbol_enum]))[
|
b4_symbol_map([b4_symbol_enum]))[
|
||||||
};
|
};
|
||||||
typedef enum yysymbol_type_t yysymbol_type_t;
|
typedef enum yysymbol_type_t yysymbol_type_t;
|
||||||
|
|||||||
@@ -1348,7 +1348,7 @@ yysyntax_error_arguments (const yyparse_context_t *yyctx,
|
|||||||
one exception: it will still contain any token that will not be
|
one exception: it will still contain any token that will not be
|
||||||
accepted due to an error action in a later state.]])[
|
accepted due to an error action in a later state.]])[
|
||||||
*/
|
*/
|
||||||
if (yyctx->yytoken != YYEMPTY)
|
if (yyctx->yytoken != YYSYMBOL_YYEMPTY)
|
||||||
{
|
{
|
||||||
int yyn;]b4_lac_if([[
|
int yyn;]b4_lac_if([[
|
||||||
YYDPRINTF ((stderr, "Constructing syntax error message\n"));]])[
|
YYDPRINTF ((stderr, "Constructing syntax error message\n"));]])[
|
||||||
@@ -1576,7 +1576,7 @@ yyparse (]m4_ifset([b4_parse_param], [b4_formals(b4_parse_param)], [void])[)]])[
|
|||||||
/* The return value of yyparse. */
|
/* The return value of yyparse. */
|
||||||
int yyresult;
|
int yyresult;
|
||||||
/* Lookahead token as an internal (translated) token number. */
|
/* Lookahead token as an internal (translated) token number. */
|
||||||
yysymbol_type_t yytoken = 0;
|
yysymbol_type_t yytoken = YYSYMBOL_YYEMPTY;
|
||||||
/* The variables used to return semantic value and location from the
|
/* The variables used to return semantic value and location from the
|
||||||
action routines. */
|
action routines. */
|
||||||
YYSTYPE yyval;]b4_locations_if([[
|
YYSTYPE yyval;]b4_locations_if([[
|
||||||
@@ -1889,7 +1889,7 @@ yyreduce:
|
|||||||
yyerrlab:
|
yyerrlab:
|
||||||
/* Make sure we have latest lookahead translation. See comments at
|
/* Make sure we have latest lookahead translation. See comments at
|
||||||
user semantic actions for why this is necessary. */
|
user semantic actions for why this is necessary. */
|
||||||
yytoken = yychar == YYEMPTY ? YYEMPTY : YYTRANSLATE (yychar);
|
yytoken = yychar == YYEMPTY ? YYSYMBOL_YYEMPTY : YYTRANSLATE (yychar);
|
||||||
|
|
||||||
/* If not already recovering from an error, report this error. */
|
/* If not already recovering from an error, report this error. */
|
||||||
if (!yyerrstatus)
|
if (!yyerrstatus)
|
||||||
|
|||||||
@@ -808,7 +808,7 @@ yyreport_syntax_error (const yyparse_context_t *ctx)
|
|||||||
const char *argv[ARGS_MAX];
|
const char *argv[ARGS_MAX];
|
||||||
int argc = 0;
|
int argc = 0;
|
||||||
yysymbol_type_t unexpected = yyparse_context_token (ctx);
|
yysymbol_type_t unexpected = yyparse_context_token (ctx);
|
||||||
if (unexpected != YYEMPTY)
|
if (unexpected != YYSYMBOL_YYEMPTY)
|
||||||
{
|
{
|
||||||
argv[argc++] = yysymbol_name (unexpected);
|
argv[argc++] = yysymbol_name (unexpected);
|
||||||
yysymbol_type_t expected[ARGS_MAX - 1];
|
yysymbol_type_t expected[ARGS_MAX - 1];
|
||||||
|
|||||||
@@ -665,7 +665,7 @@ AT_BISON_CHECK([-v -o input.c input.y])
|
|||||||
[sed -n 's/ *$//;/^static const.*\[\] =/,/^}/p' input.c >tables.c]
|
[sed -n 's/ *$//;/^static const.*\[\] =/,/^}/p' input.c >tables.c]
|
||||||
|
|
||||||
AT_CHECK([[cat tables.c]], 0,
|
AT_CHECK([[cat tables.c]], 0,
|
||||||
[[static const yytype_int8 yytranslate[] =
|
[[static const yysymbol_type_t yytranslate[] =
|
||||||
{
|
{
|
||||||
0, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
0, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||||
|
|||||||
Reference in New Issue
Block a user