yacc: use the most appropriate integral type for state numbers

Currently we properly use the "best" integral type for tables,
including those storing state numbers.  However the variables for
state numbers used in yyparse (and its dependencies such as
yy_stack_print) still use int16_t invariably.  As a consequence, very
large models overflow these variables.

Let's use the "best" type for these variables too.  It turns out that
we can still use 16 bits for twice larger automata: stick to unsigned
types.

However using 'unsigned' when 16 bits are not enough is troublesome
and generates tons of warnings about signedness issues.  Instead,
let's use 'int'.

Reported by Tom Kramer.
https://lists.gnu.org/archive/html/bug-bison/2019-09/msg00018.html

* data/skeletons/yacc.c (b4_state_num_type): New.
(yy_state_num): Be computed from YYNSTATES.
* tests/linear: New.
* tests/torture.at (State number type): New.
Use it.
This commit is contained in:
Akim Demaille
2019-09-28 13:48:35 +02:00
parent 871c02b327
commit 2ca6b71967
5 changed files with 160 additions and 25 deletions

1
THANKS
View File

@@ -179,6 +179,7 @@ Tim Landscheidt tim@tim-landscheidt.de
Tim Van Holder tim.van.holder@pandora.be Tim Van Holder tim.van.holder@pandora.be
Tobias Frost tobi@debian.org Tobias Frost tobi@debian.org
Todd Freed todd.freed@gmail.com Todd Freed todd.freed@gmail.com
Tom Kramer kramer@nist.gov
Tom Lane tgl@sss.pgh.pa.us Tom Lane tgl@sss.pgh.pa.us
Tom Tromey tromey@cygnus.com Tom Tromey tromey@cygnus.com
Tomasz Kłoczko kloczko.tomasz@gmail.com Tomasz Kłoczko kloczko.tomasz@gmail.com

View File

@@ -127,6 +127,18 @@ m4_define([b4_int_type],
[int])]) [int])])
# b4_state_num_type(MIN, MAX)
# ---------------------------
# Likewise, but prefer 'int' to 'unsigned' for large integers.
m4_define([b4_state_num_type],
[m4_if(b4_ints_in($@, [0], [255]), [1], [yytype_uint8],
b4_ints_in($@, [-128], [127]), [1], [yytype_int8],
b4_ints_in($@, [0], [65535]), [1], [yytype_uint16],
b4_ints_in($@, [-32768], [32767]), [1], [yytype_int16],
[int])])
## ----------------- ## ## ----------------- ##
## Semantic Values. ## ## Semantic Values. ##
@@ -432,7 +444,7 @@ typedef short yytype_int16;
/* State numbers. */ /* State numbers. */
typedef yytype_int16 yy_state_num; typedef ]b4_state_num_type(0, m4_eval(b4_states_number - 1))[ yy_state_num;
#ifndef YY_ #ifndef YY_
@@ -740,10 +752,7 @@ do { \
{ {
YYFPRINTF (stderr, "Stack now"); YYFPRINTF (stderr, "Stack now");
for (; yybottom <= yytop; yybottom++) for (; yybottom <= yytop; yybottom++)
{ YYFPRINTF (stderr, " %u", (unsigned) *yybottom);
int yybot = *yybottom;
YYFPRINTF (stderr, " %d", yybot);
}
YYFPRINTF (stderr, "\n"); YYFPRINTF (stderr, "\n");
} }
@@ -985,7 +994,7 @@ yy_lac (yy_state_num *yyesa, yy_state_num **yyes,
} }
else else
{ {
yyrule = yytable[yyrule]; yyrule = (int) yytable[yyrule];
if (yytable_value_is_error (yyrule)) if (yytable_value_is_error (yyrule))
{ {
YYDPRINTF ((stderr, " Err\n")); YYDPRINTF ((stderr, " Err\n"));
@@ -1022,11 +1031,10 @@ yy_lac (yy_state_num *yyesa, yy_state_num **yyes,
int yystate; int yystate;
{ {
const int yylhs = yyr1[yyrule] - YYNTOKENS; const int yylhs = yyr1[yyrule] - YYNTOKENS;
const int yyi = yypgoto[yylhs] + *yyesp; const int yyi = yypgoto[yylhs] + (int) *yyesp;
yystate = ((yy_state_num) yystate = (0 <= yyi && yyi <= YYLAST && yycheck[yyi] == *yyesp
(0 <= yyi && yyi <= YYLAST && yycheck[yyi] == *yyesp ? (int) yytable[yyi]
? yytable[yyi] : yydefgoto[yylhs]);
: yydefgoto[yylhs]));
} }
if (yyesp == yyes_prev) if (yyesp == yyes_prev)
{ {
@@ -1046,7 +1054,7 @@ yy_lac (yy_state_num *yyesa, yy_state_num **yyes,
} }
*++yyesp = (yy_state_num) yystate; *++yyesp = (yy_state_num) yystate;
} }
YYDPRINTF ((stderr, " G%d", (int) yystate)); YYDPRINTF ((stderr, " G%d", yystate));
} }
} }
}]])[ }]])[
@@ -1639,7 +1647,7 @@ yyread_pushed_token:]])[
goto yydefault; goto yydefault;
}]], [[ }]], [[
goto yydefault;]])[ goto yydefault;]])[
yyn = yytable[yyn]; yyn = (int) yytable[yyn];
if (yyn <= 0) if (yyn <= 0)
{ {
if (yytable_value_is_error (yyn)) if (yytable_value_is_error (yyn))
@@ -1661,7 +1669,7 @@ yyread_pushed_token:]])[
yychar = YYEMPTY;]b4_lac_if([[ yychar = YYEMPTY;]b4_lac_if([[
YY_LAC_DISCARD ("shift");]])[ YY_LAC_DISCARD ("shift");]])[
yystate = (yy_state_num) yyn; yystate = yyn;
YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
*++yyvsp = yylval; *++yyvsp = yylval;
YY_IGNORE_MAYBE_UNINITIALIZED_END]b4_locations_if([ YY_IGNORE_MAYBE_UNINITIALIZED_END]b4_locations_if([
@@ -1741,9 +1749,9 @@ yyreduce:
number reduced by. */ number reduced by. */
{ {
const int yylhs = yyr1[yyn] - YYNTOKENS; const int yylhs = yyr1[yyn] - YYNTOKENS;
const int yyi = yypgoto[yylhs] + *yyssp; const int yyi = yypgoto[yylhs] + (int) *yyssp;
yystate = (0 <= yyi && yyi <= YYLAST && yycheck[yyi] == *yyssp yystate = (0 <= yyi && yyi <= YYLAST && yycheck[yyi] == *yyssp
? yytable[yyi] ? (int) yytable[yyi]
: yydefgoto[yylhs]); : yydefgoto[yylhs]);
} }
@@ -1859,7 +1867,7 @@ yyerrlab1:
yyn += YYTERROR; yyn += YYTERROR;
if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR) if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR)
{ {
yyn = yytable[yyn]; yyn = (int) yytable[yyn];
if (0 < yyn) if (0 < yyn)
break; break;
} }
@@ -1894,7 +1902,7 @@ yyerrlab1:
/* Shift the error token. */ /* Shift the error token. */
YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp); YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp);
yystate = (yy_state_num) yyn; yystate = yyn;
goto yynewstate; goto yynewstate;

94
tests/linear Executable file
View File

@@ -0,0 +1,94 @@
#! /usr/bin/env ruby
# Build a grammar whose LALR(1) parser has a given number of states.
# Useful to test edge cases (e.g., 256 and 257 states, etc.).
class Linear
def initialize(states)
@states = states - 4
@cols = Math.sqrt(@states).to_i
@lines = @states / @cols
@rest = @states % @cols
@n = @lines * (@cols - 1) + (@rest == 0 ? 1 : @rest == 1 ? 2 : @rest)
end
def nterms
last = @lines + ([0, 1].include?(@rest) ? 0 : 1)
(0...last).map { |i| "t#{i}" }.join(' ')
end
def rules
res = (0...@lines).map { |i| "t#{i}:#{' N' * (@cols - 1)}" }.join("\n")
case @rest
when 0
res += ' N'
when 1
res += ' N N'
else
res += "\nt#{@lines}:#{" N" * @rest}"
end
res
end
def to_s
puts <<~EOF
// states: #{@states}
// cols: #{@cols}
// lines: #{@lines}
// rest: #{@rest}
// n: #{@n}
%code {
#include <stdio.h>
#include <stdlib.h>
static int yylex (void);
static void yyerror (const char *msg);
}
%debug
%define api.value.type union
%define parse.lac full
%define parse.error verbose
%printer { fprintf (yyo, "%ld", $$); } <long>
%token <long> N
%%
exp: #{nterms}
#{rules}
%%
static
int yylex (void)
{
static long count = 0;
if (count++ < #{@n})
{
yylval.N = count;
return N;
}
else
return 0;
}
static
void yyerror (const char *msg)
{
fprintf (stderr, "%s\\n", msg);
}
int
main (void)
{
yydebug = !!getenv ("YYDEBUG");
return yyparse ();
}
EOF
end
end
puts Linear.new(ARGV[0].to_i).to_s

View File

@@ -15,7 +15,7 @@
## You should have received a copy of the GNU General Public License ## You should have received a copy of the GNU General Public License
## along with this program. If not, see <http://www.gnu.org/licenses/>. ## along with this program. If not, see <http://www.gnu.org/licenses/>.
EXTRA_DIST += $(TESTSUITE_AT) %D%/testsuite %D%/testsuite.h EXTRA_DIST += %D%/linear $(TESTSUITE_AT) %D%/testsuite %D%/testsuite.h
DISTCLEANFILES += %D%/atconfig $(check_SCRIPTS) DISTCLEANFILES += %D%/atconfig $(check_SCRIPTS)
MAINTAINERCLEANFILES += $(TESTSUITE) MAINTAINERCLEANFILES += $(TESTSUITE)

View File

@@ -233,7 +233,7 @@ AT_DATA_HORIZONTAL_GRAMMAR([input.y], [1000])
# Ask for 200 MiB, which should be plenty even on a 64-bit host. # Ask for 200 MiB, which should be plenty even on a 64-bit host.
AT_INCREASE_DATA_SIZE(204000) AT_INCREASE_DATA_SIZE(204000)
AT_BISON_CHECK_NO_XML([-v -o input.c input.y]) AT_BISON_CHECK_NO_XML([-o input.c input.y])
AT_COMPILE([input]) AT_COMPILE([input])
AT_PARSER_CHECK([input]) AT_PARSER_CHECK([input])
@@ -241,6 +241,43 @@ AT_CLEANUP
## ------------------- ##
## State number type. ##
## ------------------- ##
# AT_TEST(NUM-STATES, TYPE)
# -------------------------
# Check that automaton with NUM-STATES uses TYPE has state number type.
# Check that parser works.
m4_pushdef([AT_TEST],
[AT_SETUP([State number type: $1 states])
AT_BISON_OPTION_PUSHDEFS
AT_CHECK([ruby $abs_top_srcdir/tests/linear $1 >input.y || { echo "ruby does not work"; exit 77; }])
# Old versions of GCC reject large values given to #line.
AT_FULL_COMPILE([input], [], [], [], [--no-line])
AT_CHECK([grep 'define YYNSTATES *$1' input.c], [], [ignore])
AT_CHECK([grep 'typedef $2 yy_state_num' input.c], [], [ignore])
AT_PARSER_CHECK([input])
AT_BISON_OPTION_POPDEFS
AT_CLEANUP])
AT_TEST( [256], [yytype_uint8])
AT_TEST( [257], [yytype_uint16])
AT_TEST([65536], [yytype_uint16])
AT_TEST([65537], [int])
m4_popdef([AT_TEST])
## ------------------------ ##
## Many lookahead tokens. ##
## ------------------------ ##
# AT_DATA_LOOKAHEAD_TOKENS_GRAMMAR(FILE-NAME, SIZE) # AT_DATA_LOOKAHEAD_TOKENS_GRAMMAR(FILE-NAME, SIZE)
# -------------------------------------------------- # --------------------------------------------------
# Create FILE-NAME, containing a self checking parser for a grammar # Create FILE-NAME, containing a self checking parser for a grammar
@@ -340,11 +377,6 @@ mv stdout $1
AT_BISON_OPTION_POPDEFS AT_BISON_OPTION_POPDEFS
]) ])
## ------------------------ ##
## Many lookahead tokens. ##
## ------------------------ ##
AT_SETUP([Many lookahead tokens]) AT_SETUP([Many lookahead tokens])
AT_DATA_LOOKAHEAD_TOKENS_GRAMMAR([input.y], [1000]) AT_DATA_LOOKAHEAD_TOKENS_GRAMMAR([input.y], [1000])