mirror of
https://git.savannah.gnu.org/git/bison.git
synced 2026-03-09 12:23:04 +00:00
yacc: use the most appropriate integral type for state numbers
Currently we properly use the "best" integral type for tables, including those storing state numbers. However the variables for state numbers used in yyparse (and its dependencies such as yy_stack_print) still use int16_t invariably. As a consequence, very large models overflow these variables. Let's use the "best" type for these variables too. It turns out that we can still use 16 bits for twice larger automata: stick to unsigned types. However using 'unsigned' when 16 bits are not enough is troublesome and generates tons of warnings about signedness issues. Instead, let's use 'int'. Reported by Tom Kramer. https://lists.gnu.org/archive/html/bug-bison/2019-09/msg00018.html * data/skeletons/yacc.c (b4_state_num_type): New. (yy_state_num): Be computed from YYNSTATES. * tests/linear: New. * tests/torture.at (State number type): New. Use it.
This commit is contained in:
1
THANKS
1
THANKS
@@ -179,6 +179,7 @@ Tim Landscheidt tim@tim-landscheidt.de
|
|||||||
Tim Van Holder tim.van.holder@pandora.be
|
Tim Van Holder tim.van.holder@pandora.be
|
||||||
Tobias Frost tobi@debian.org
|
Tobias Frost tobi@debian.org
|
||||||
Todd Freed todd.freed@gmail.com
|
Todd Freed todd.freed@gmail.com
|
||||||
|
Tom Kramer kramer@nist.gov
|
||||||
Tom Lane tgl@sss.pgh.pa.us
|
Tom Lane tgl@sss.pgh.pa.us
|
||||||
Tom Tromey tromey@cygnus.com
|
Tom Tromey tromey@cygnus.com
|
||||||
Tomasz Kłoczko kloczko.tomasz@gmail.com
|
Tomasz Kłoczko kloczko.tomasz@gmail.com
|
||||||
|
|||||||
@@ -127,6 +127,18 @@ m4_define([b4_int_type],
|
|||||||
|
|
||||||
[int])])
|
[int])])
|
||||||
|
|
||||||
|
# b4_state_num_type(MIN, MAX)
|
||||||
|
# ---------------------------
|
||||||
|
# Likewise, but prefer 'int' to 'unsigned' for large integers.
|
||||||
|
m4_define([b4_state_num_type],
|
||||||
|
[m4_if(b4_ints_in($@, [0], [255]), [1], [yytype_uint8],
|
||||||
|
b4_ints_in($@, [-128], [127]), [1], [yytype_int8],
|
||||||
|
|
||||||
|
b4_ints_in($@, [0], [65535]), [1], [yytype_uint16],
|
||||||
|
b4_ints_in($@, [-32768], [32767]), [1], [yytype_int16],
|
||||||
|
|
||||||
|
[int])])
|
||||||
|
|
||||||
|
|
||||||
## ----------------- ##
|
## ----------------- ##
|
||||||
## Semantic Values. ##
|
## Semantic Values. ##
|
||||||
@@ -432,7 +444,7 @@ typedef short yytype_int16;
|
|||||||
|
|
||||||
|
|
||||||
/* State numbers. */
|
/* State numbers. */
|
||||||
typedef yytype_int16 yy_state_num;
|
typedef ]b4_state_num_type(0, m4_eval(b4_states_number - 1))[ yy_state_num;
|
||||||
|
|
||||||
|
|
||||||
#ifndef YY_
|
#ifndef YY_
|
||||||
@@ -740,10 +752,7 @@ do { \
|
|||||||
{
|
{
|
||||||
YYFPRINTF (stderr, "Stack now");
|
YYFPRINTF (stderr, "Stack now");
|
||||||
for (; yybottom <= yytop; yybottom++)
|
for (; yybottom <= yytop; yybottom++)
|
||||||
{
|
YYFPRINTF (stderr, " %u", (unsigned) *yybottom);
|
||||||
int yybot = *yybottom;
|
|
||||||
YYFPRINTF (stderr, " %d", yybot);
|
|
||||||
}
|
|
||||||
YYFPRINTF (stderr, "\n");
|
YYFPRINTF (stderr, "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -985,7 +994,7 @@ yy_lac (yy_state_num *yyesa, yy_state_num **yyes,
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
yyrule = yytable[yyrule];
|
yyrule = (int) yytable[yyrule];
|
||||||
if (yytable_value_is_error (yyrule))
|
if (yytable_value_is_error (yyrule))
|
||||||
{
|
{
|
||||||
YYDPRINTF ((stderr, " Err\n"));
|
YYDPRINTF ((stderr, " Err\n"));
|
||||||
@@ -1022,11 +1031,10 @@ yy_lac (yy_state_num *yyesa, yy_state_num **yyes,
|
|||||||
int yystate;
|
int yystate;
|
||||||
{
|
{
|
||||||
const int yylhs = yyr1[yyrule] - YYNTOKENS;
|
const int yylhs = yyr1[yyrule] - YYNTOKENS;
|
||||||
const int yyi = yypgoto[yylhs] + *yyesp;
|
const int yyi = yypgoto[yylhs] + (int) *yyesp;
|
||||||
yystate = ((yy_state_num)
|
yystate = (0 <= yyi && yyi <= YYLAST && yycheck[yyi] == *yyesp
|
||||||
(0 <= yyi && yyi <= YYLAST && yycheck[yyi] == *yyesp
|
? (int) yytable[yyi]
|
||||||
? yytable[yyi]
|
: yydefgoto[yylhs]);
|
||||||
: yydefgoto[yylhs]));
|
|
||||||
}
|
}
|
||||||
if (yyesp == yyes_prev)
|
if (yyesp == yyes_prev)
|
||||||
{
|
{
|
||||||
@@ -1046,7 +1054,7 @@ yy_lac (yy_state_num *yyesa, yy_state_num **yyes,
|
|||||||
}
|
}
|
||||||
*++yyesp = (yy_state_num) yystate;
|
*++yyesp = (yy_state_num) yystate;
|
||||||
}
|
}
|
||||||
YYDPRINTF ((stderr, " G%d", (int) yystate));
|
YYDPRINTF ((stderr, " G%d", yystate));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}]])[
|
}]])[
|
||||||
@@ -1639,7 +1647,7 @@ yyread_pushed_token:]])[
|
|||||||
goto yydefault;
|
goto yydefault;
|
||||||
}]], [[
|
}]], [[
|
||||||
goto yydefault;]])[
|
goto yydefault;]])[
|
||||||
yyn = yytable[yyn];
|
yyn = (int) yytable[yyn];
|
||||||
if (yyn <= 0)
|
if (yyn <= 0)
|
||||||
{
|
{
|
||||||
if (yytable_value_is_error (yyn))
|
if (yytable_value_is_error (yyn))
|
||||||
@@ -1661,7 +1669,7 @@ yyread_pushed_token:]])[
|
|||||||
yychar = YYEMPTY;]b4_lac_if([[
|
yychar = YYEMPTY;]b4_lac_if([[
|
||||||
YY_LAC_DISCARD ("shift");]])[
|
YY_LAC_DISCARD ("shift");]])[
|
||||||
|
|
||||||
yystate = (yy_state_num) yyn;
|
yystate = yyn;
|
||||||
YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
|
YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
|
||||||
*++yyvsp = yylval;
|
*++yyvsp = yylval;
|
||||||
YY_IGNORE_MAYBE_UNINITIALIZED_END]b4_locations_if([
|
YY_IGNORE_MAYBE_UNINITIALIZED_END]b4_locations_if([
|
||||||
@@ -1741,9 +1749,9 @@ yyreduce:
|
|||||||
number reduced by. */
|
number reduced by. */
|
||||||
{
|
{
|
||||||
const int yylhs = yyr1[yyn] - YYNTOKENS;
|
const int yylhs = yyr1[yyn] - YYNTOKENS;
|
||||||
const int yyi = yypgoto[yylhs] + *yyssp;
|
const int yyi = yypgoto[yylhs] + (int) *yyssp;
|
||||||
yystate = (0 <= yyi && yyi <= YYLAST && yycheck[yyi] == *yyssp
|
yystate = (0 <= yyi && yyi <= YYLAST && yycheck[yyi] == *yyssp
|
||||||
? yytable[yyi]
|
? (int) yytable[yyi]
|
||||||
: yydefgoto[yylhs]);
|
: yydefgoto[yylhs]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1859,7 +1867,7 @@ yyerrlab1:
|
|||||||
yyn += YYTERROR;
|
yyn += YYTERROR;
|
||||||
if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR)
|
if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR)
|
||||||
{
|
{
|
||||||
yyn = yytable[yyn];
|
yyn = (int) yytable[yyn];
|
||||||
if (0 < yyn)
|
if (0 < yyn)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -1894,7 +1902,7 @@ yyerrlab1:
|
|||||||
/* Shift the error token. */
|
/* Shift the error token. */
|
||||||
YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp);
|
YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp);
|
||||||
|
|
||||||
yystate = (yy_state_num) yyn;
|
yystate = yyn;
|
||||||
goto yynewstate;
|
goto yynewstate;
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
94
tests/linear
Executable file
94
tests/linear
Executable file
@@ -0,0 +1,94 @@
|
|||||||
|
#! /usr/bin/env ruby
|
||||||
|
|
||||||
|
# Build a grammar whose LALR(1) parser has a given number of states.
|
||||||
|
# Useful to test edge cases (e.g., 256 and 257 states, etc.).
|
||||||
|
|
||||||
|
class Linear
|
||||||
|
def initialize(states)
|
||||||
|
@states = states - 4
|
||||||
|
@cols = Math.sqrt(@states).to_i
|
||||||
|
@lines = @states / @cols
|
||||||
|
@rest = @states % @cols
|
||||||
|
|
||||||
|
@n = @lines * (@cols - 1) + (@rest == 0 ? 1 : @rest == 1 ? 2 : @rest)
|
||||||
|
end
|
||||||
|
|
||||||
|
def nterms
|
||||||
|
last = @lines + ([0, 1].include?(@rest) ? 0 : 1)
|
||||||
|
(0...last).map { |i| "t#{i}" }.join(' ')
|
||||||
|
end
|
||||||
|
|
||||||
|
def rules
|
||||||
|
res = (0...@lines).map { |i| "t#{i}:#{' N' * (@cols - 1)}" }.join("\n")
|
||||||
|
case @rest
|
||||||
|
when 0
|
||||||
|
res += ' N'
|
||||||
|
when 1
|
||||||
|
res += ' N N'
|
||||||
|
else
|
||||||
|
res += "\nt#{@lines}:#{" N" * @rest}"
|
||||||
|
end
|
||||||
|
res
|
||||||
|
end
|
||||||
|
|
||||||
|
def to_s
|
||||||
|
puts <<~EOF
|
||||||
|
// states: #{@states}
|
||||||
|
// cols: #{@cols}
|
||||||
|
// lines: #{@lines}
|
||||||
|
// rest: #{@rest}
|
||||||
|
// n: #{@n}
|
||||||
|
|
||||||
|
%code {
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
static int yylex (void);
|
||||||
|
static void yyerror (const char *msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
%debug
|
||||||
|
%define api.value.type union
|
||||||
|
%define parse.lac full
|
||||||
|
%define parse.error verbose
|
||||||
|
%printer { fprintf (yyo, "%ld", $$); } <long>
|
||||||
|
%token <long> N
|
||||||
|
|
||||||
|
%%
|
||||||
|
|
||||||
|
exp: #{nterms}
|
||||||
|
|
||||||
|
#{rules}
|
||||||
|
|
||||||
|
%%
|
||||||
|
|
||||||
|
static
|
||||||
|
int yylex (void)
|
||||||
|
{
|
||||||
|
static long count = 0;
|
||||||
|
if (count++ < #{@n})
|
||||||
|
{
|
||||||
|
yylval.N = count;
|
||||||
|
return N;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void yyerror (const char *msg)
|
||||||
|
{
|
||||||
|
fprintf (stderr, "%s\\n", msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
main (void)
|
||||||
|
{
|
||||||
|
yydebug = !!getenv ("YYDEBUG");
|
||||||
|
return yyparse ();
|
||||||
|
}
|
||||||
|
EOF
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
puts Linear.new(ARGV[0].to_i).to_s
|
||||||
@@ -15,7 +15,7 @@
|
|||||||
## You should have received a copy of the GNU General Public License
|
## You should have received a copy of the GNU General Public License
|
||||||
## along with this program. If not, see <http://www.gnu.org/licenses/>.
|
## along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
EXTRA_DIST += $(TESTSUITE_AT) %D%/testsuite %D%/testsuite.h
|
EXTRA_DIST += %D%/linear $(TESTSUITE_AT) %D%/testsuite %D%/testsuite.h
|
||||||
|
|
||||||
DISTCLEANFILES += %D%/atconfig $(check_SCRIPTS)
|
DISTCLEANFILES += %D%/atconfig $(check_SCRIPTS)
|
||||||
MAINTAINERCLEANFILES += $(TESTSUITE)
|
MAINTAINERCLEANFILES += $(TESTSUITE)
|
||||||
|
|||||||
@@ -233,7 +233,7 @@ AT_DATA_HORIZONTAL_GRAMMAR([input.y], [1000])
|
|||||||
# Ask for 200 MiB, which should be plenty even on a 64-bit host.
|
# Ask for 200 MiB, which should be plenty even on a 64-bit host.
|
||||||
AT_INCREASE_DATA_SIZE(204000)
|
AT_INCREASE_DATA_SIZE(204000)
|
||||||
|
|
||||||
AT_BISON_CHECK_NO_XML([-v -o input.c input.y])
|
AT_BISON_CHECK_NO_XML([-o input.c input.y])
|
||||||
AT_COMPILE([input])
|
AT_COMPILE([input])
|
||||||
AT_PARSER_CHECK([input])
|
AT_PARSER_CHECK([input])
|
||||||
|
|
||||||
@@ -241,6 +241,43 @@ AT_CLEANUP
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## ------------------- ##
|
||||||
|
## State number type. ##
|
||||||
|
## ------------------- ##
|
||||||
|
|
||||||
|
# AT_TEST(NUM-STATES, TYPE)
|
||||||
|
# -------------------------
|
||||||
|
# Check that automaton with NUM-STATES uses TYPE has state number type.
|
||||||
|
# Check that parser works.
|
||||||
|
|
||||||
|
m4_pushdef([AT_TEST],
|
||||||
|
[AT_SETUP([State number type: $1 states])
|
||||||
|
|
||||||
|
AT_BISON_OPTION_PUSHDEFS
|
||||||
|
|
||||||
|
AT_CHECK([ruby $abs_top_srcdir/tests/linear $1 >input.y || { echo "ruby does not work"; exit 77; }])
|
||||||
|
# Old versions of GCC reject large values given to #line.
|
||||||
|
AT_FULL_COMPILE([input], [], [], [], [--no-line])
|
||||||
|
AT_CHECK([grep 'define YYNSTATES *$1' input.c], [], [ignore])
|
||||||
|
AT_CHECK([grep 'typedef $2 yy_state_num' input.c], [], [ignore])
|
||||||
|
AT_PARSER_CHECK([input])
|
||||||
|
|
||||||
|
AT_BISON_OPTION_POPDEFS
|
||||||
|
AT_CLEANUP])
|
||||||
|
|
||||||
|
AT_TEST( [256], [yytype_uint8])
|
||||||
|
AT_TEST( [257], [yytype_uint16])
|
||||||
|
AT_TEST([65536], [yytype_uint16])
|
||||||
|
AT_TEST([65537], [int])
|
||||||
|
|
||||||
|
m4_popdef([AT_TEST])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## ------------------------ ##
|
||||||
|
## Many lookahead tokens. ##
|
||||||
|
## ------------------------ ##
|
||||||
|
|
||||||
# AT_DATA_LOOKAHEAD_TOKENS_GRAMMAR(FILE-NAME, SIZE)
|
# AT_DATA_LOOKAHEAD_TOKENS_GRAMMAR(FILE-NAME, SIZE)
|
||||||
# --------------------------------------------------
|
# --------------------------------------------------
|
||||||
# Create FILE-NAME, containing a self checking parser for a grammar
|
# Create FILE-NAME, containing a self checking parser for a grammar
|
||||||
@@ -340,11 +377,6 @@ mv stdout $1
|
|||||||
AT_BISON_OPTION_POPDEFS
|
AT_BISON_OPTION_POPDEFS
|
||||||
])
|
])
|
||||||
|
|
||||||
|
|
||||||
## ------------------------ ##
|
|
||||||
## Many lookahead tokens. ##
|
|
||||||
## ------------------------ ##
|
|
||||||
|
|
||||||
AT_SETUP([Many lookahead tokens])
|
AT_SETUP([Many lookahead tokens])
|
||||||
|
|
||||||
AT_DATA_LOOKAHEAD_TOKENS_GRAMMAR([input.y], [1000])
|
AT_DATA_LOOKAHEAD_TOKENS_GRAMMAR([input.y], [1000])
|
||||||
|
|||||||
Reference in New Issue
Block a user