fix: crash when redefining the EOF token

Reported by Agency for Defense Development.
https://lists.gnu.org/r/bug-bison/2020-08/msg00008.html

On an empty such as

    %token FOO
           BAR
           FOO 0
    %%
    input: %empty

we crash because when we find FOO 0, we decrement ntokens (since FOO
was discovered to be EOF, which is already known to be a token, so we
increment ntokens for it, and need to cancel this).  This "works well"
when EOF is properly defined in one go, but here it is first defined
and later only assign token code 0.  In the meanwhile BAR was given
the token number that we just decremented.

To fix this, assign symbol numbers after parsing, not during parsing,
so that we also saw all the explicit token codes.  To maintain the
current numbers (I'd like to keep no difference in the output, not
just equivalence), we need to make sure the symbols are numbered in
the same order: that of appearance in the source file.  So we need the
locations to be correct, which was almost the case, except for nterms
that appeared several times as LHS (i.e., several times as "foo:
...").  Fixing the use of location_of_lhs sufficed (it appears it was
intended for this use, but its implementation was unfinished: it was
always set to "false" only).

* src/symtab.c (symbol_location_as_lhs_set): Update location_of_lhs.
(symbol_code_set): Remove broken hack that decremented ntokens.
(symbol_class_set, dummy_symbol_get): Don't set number, ntokens and
nnterms.
(symbol_check_defined): Do it.
(symbols): Don't count nsyms here.
Actually, don't count nsyms at all: let it be done in...
* src/reader.c (check_and_convert_grammar): here.  Define nsyms from
ntokens and nnterms after parsing.
* tests/input.at (EOF redeclared): New.

* examples/c/bistromathic/bistromathic.test: Adjust the traces: in
"%nterm <double> exp %% input: ...", exp used to be numbered before
input.
This commit is contained in:
Akim Demaille
2020-08-05 08:50:48 +02:00
parent 89e42ffb4b
commit b7aab2dbad
4 changed files with 53 additions and 28 deletions

View File

@@ -251,29 +251,29 @@ err: Next token is token ) (1.4: )
err: Shifting token ) (1.4: ) err: Shifting token ) (1.4: )
err: Entering state 20 err: Entering state 20
err: Stack now 0 2 10 20 err: Stack now 0 2 10 20
err: Reducing stack by rule 15 (line 151): err: Reducing stack by rule XX (line XXX):
err: $1 = token ( (1.1: ) err: $1 = token ( (1.1: )
err: $2 = token error (1.2-3: ) err: $2 = token error (1.2-3: )
err: $3 = token ) (1.4: ) err: $3 = token ) (1.4: )
err: -> $$ = nterm exp (1.1-4: 666) err: -> $$ = nterm exp (1.1-4: 666)
err: Entering state 7 err: Entering state 8
err: Stack now 0 7 err: Stack now 0 8
err: Return for a new token: err: Return for a new token:
err: Reading a token err: Reading a token
err: Now at end of input. err: Now at end of input.
err: LAC: initial context established for end of file err: LAC: initial context established for end of file
err: LAC: checking lookahead end of file: R2 G8 S19 err: LAC: checking lookahead end of file: R2 G7 S14
err: Reducing stack by rule 2 (line 126): err: Reducing stack by rule XX (line XXX):
err: $1 = nterm exp (1.1-4: 666) err: $1 = nterm exp (1.1-4: 666)
err: -> $$ = nterm input (1.1-4: ) err: -> $$ = nterm input (1.1-4: )
err: Entering state 8 err: Entering state 7
err: Stack now 0 8 err: Stack now 0 7
err: Now at end of input. err: Now at end of input.
err: Shifting token end of file (1.5: ) err: Shifting token end of file (1.5: )
err: LAC: initial context discarded due to shift err: LAC: initial context discarded due to shift
err: Entering state 19 err: Entering state 14
err: Stack now 0 8 19 err: Stack now 0 7 14
err: Stack now 0 8 19 err: Stack now 0 7 14
err: Cleanup: popping token end of file (1.5: ) err: Cleanup: popping token end of file (1.5: )
err: Cleanup: popping nterm input (1.1-4: )' -p err: Cleanup: popping nterm input (1.1-4: )' -p

View File

@@ -815,8 +815,11 @@ check_and_convert_grammar (void)
grammar = p; grammar = p;
} }
aver (nsyms <= SYMBOL_NUMBER_MAXIMUM); if (SYMBOL_NUMBER_MAXIMUM - nnterms < ntokens)
aver (nsyms == ntokens + nnterms); complain (NULL, fatal, "too many symbols in input grammar (limit is %d)",
SYMBOL_NUMBER_MAXIMUM);
nsyms = ntokens + nnterms;
/* Assign the symbols their symbol numbers. */ /* Assign the symbols their symbol numbers. */
symbols_pack (); symbols_pack ();

View File

@@ -137,11 +137,6 @@ symbol_new (uniqstr tag, location loc)
res->alias = NULL; res->alias = NULL;
res->content = sym_content_new (res); res->content = sym_content_new (res);
res->is_alias = false; res->is_alias = false;
if (nsyms == SYMBOL_NUMBER_MAXIMUM)
complain (NULL, fatal, _("too many symbols in input grammar (limit is %d)"),
SYMBOL_NUMBER_MAXIMUM);
nsyms++;
return res; return res;
} }
@@ -406,7 +401,10 @@ void
symbol_location_as_lhs_set (symbol *sym, location loc) symbol_location_as_lhs_set (symbol *sym, location loc)
{ {
if (!sym->location_of_lhs) if (!sym->location_of_lhs)
sym->location = loc; {
sym->location = loc;
sym->location_of_lhs = true;
}
} }
@@ -556,10 +554,6 @@ symbol_class_set (symbol *sym, symbol_class class, location loc, bool declaring)
if (class == token_sym && s->class == pct_type_sym) if (class == token_sym && s->class == pct_type_sym)
complain_pct_type_on_token (&sym->location); complain_pct_type_on_token (&sym->location);
if (class == nterm_sym && s->class != nterm_sym)
s->number = nnterms++;
else if (class == token_sym && s->number == NUMBER_UNDEFINED)
s->number = ntokens++;
s->class = class; s->class = class;
if (declaring) if (declaring)
@@ -606,10 +600,6 @@ symbol_code_set (symbol *sym, int code, location loc)
if (code == 0 && !eoftoken) if (code == 0 && !eoftoken)
{ {
eoftoken = sym->content->symbol; eoftoken = sym->content->symbol;
/* It is always mapped to 0, so it was already counted in
NTOKENS. */
if (eoftoken->content->number != NUMBER_UNDEFINED)
--ntokens;
eoftoken->content->number = 0; eoftoken->content->number = 0;
} }
} }
@@ -629,9 +619,11 @@ symbol_check_defined (symbol *sym)
{ {
complain_symbol_undeclared (sym); complain_symbol_undeclared (sym);
s->class = nterm_sym; s->class = nterm_sym;
s->number = nnterms++;
} }
if (s->number == NUMBER_UNDEFINED)
s->number = s->class == token_sym ? ntokens++ : nnterms++;
if (s->class == token_sym if (s->class == token_sym
&& sym->tag[0] == '"' && sym->tag[0] == '"'
&& !sym->is_alias) && !sym->is_alias)
@@ -975,7 +967,6 @@ dummy_symbol_get (location loc)
assure (len < sizeof buf); assure (len < sizeof buf);
symbol *sym = symbol_get (buf, loc); symbol *sym = symbol_get (buf, loc);
sym->content->class = nterm_sym; sym->content->class = nterm_sym;
sym->content->number = nnterms++;
return sym; return sym;
} }

View File

@@ -135,6 +135,10 @@ input.y:9.1-10.0: error: missing '%}' at end of file
AT_CLEANUP AT_CLEANUP
## ------------------------ ##
## Invalid inputs with {}. ##
## ------------------------ ##
AT_SETUP([Invalid inputs with {}]) AT_SETUP([Invalid inputs with {}])
# We used to SEGV here. See # We used to SEGV here. See
@@ -816,6 +820,33 @@ input.y:3.8-10: note: previous declaration
AT_CLEANUP AT_CLEANUP
## ---------------- ##
## EOF redeclared. ##
## ---------------- ##
AT_SETUP([EOF redeclared])
# We used to crash when redefining a token after having defined EOF.
# See https://lists.gnu.org/r/bug-bison/2020-08/msg00008.html.
AT_DATA([[input.y]],
[[%token FOO BAR FOO 0
%%
input: %empty
]])
AT_BISON_CHECK([-fcaret input.y], [0], [],
[[input.y:1.16-18: warning: symbol FOO redeclared [-Wother]
1 | %token FOO BAR FOO 0
| ^~~
input.y:1.8-10: note: previous declaration
1 | %token FOO BAR FOO 0
| ^~~
]])
AT_CLEANUP
## --------------------------- ## ## --------------------------- ##
## Symbol class redefinition. ## ## Symbol class redefinition. ##
## --------------------------- ## ## --------------------------- ##