mirror of
https://git.savannah.gnu.org/git/bison.git
synced 2026-03-09 04:13:03 +00:00
Violation of C standard detected by clang -fsanitize=undefined with clang 19.1.7 on Fedora 41 x86-64. * src/counterexample.c (si_bfs_free): * src/files.c (prefix_map_free, add_prefix_map): * src/fixits.c (fixit_cmp, fixit_free, fixits_register): (expand_to_conflict, nonunifying_shift_path) (search_state_free_children, search_state_free, ssb_free) (ssb_hasher, ssb_comp, ssb_equals, visited_hasher) (visited_comparator, ssb_append, unifying_example): * src/lssi.c (lssi_free, lssi_hasher, lssi_comparator) (shortest_path_from_start): * src/parse-simulation.c (free_parse_state) (parse_state_list_new, parser_pop): * src/state-item.c (hash_pair_hasher, hash_pair_comparator) (hash_pair_free, hash_pair_table_create): Avoid undefined behavior in C, which does not allow you to cast a function pointer to some other function type and then call it via that other type. Instead, use functions with correct types according to the C standard, and cast their parameters. * src/getargs.c (xargmatch_fn): Return int const, not int, to match what ARGMATCH_DEFINE_GROUP does. In all uses of ARGMATCH_DEFINE_GROUP, say that they return int, to match xargmatch_fn. (FLAGS_ARGMATCH): Do not cast function pointer. * src/parse-simulation.c (vc_derivation_list_append): New function. * src/system.h (deconst): New static function.
375 lines
11 KiB
C
375 lines
11 KiB
C
/* Lookahead sensitive state item searches for counterexample generation
|
|
|
|
Copyright (C) 2020-2022 Free Software Foundation, Inc.
|
|
|
|
This file is part of Bison, the GNU Compiler Compiler.
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
|
|
|
#include <config.h>
|
|
|
|
#include "lssi.h"
|
|
|
|
#include <gl_linked_list.h>
|
|
#include <gl_xlist.h>
|
|
#include <stdlib.h>
|
|
|
|
#include "getargs.h"
|
|
#include "nullable.h"
|
|
|
|
// Lookahead sensitive state item.
|
|
typedef struct lssi
|
|
{
|
|
state_item_number si;
|
|
struct lssi *parent;
|
|
// this is the precise lookahead set (follow_L from the CupEx paper)
|
|
bitset lookahead;
|
|
bool free_lookahead;
|
|
} lssi;
|
|
|
|
static lssi *
|
|
new_lssi (state_item_number si, lssi *p, bitset l, bool free_l)
|
|
{
|
|
lssi *res = xmalloc (sizeof *res);
|
|
res->si = si;
|
|
res->parent = p;
|
|
res->lookahead = l;
|
|
res->free_lookahead = free_l;
|
|
return res;
|
|
}
|
|
|
|
static void
|
|
lssi_free (void *vsn)
|
|
{
|
|
lssi *sn = vsn;
|
|
if (sn == NULL)
|
|
return;
|
|
if (sn->free_lookahead)
|
|
bitset_free (sn->lookahead);
|
|
free (sn);
|
|
}
|
|
|
|
static size_t
|
|
lssi_hasher (void const *vsn, size_t max)
|
|
{
|
|
lssi const *sn = vsn;
|
|
size_t hash = sn->si;
|
|
bitset_iterator biter;
|
|
symbol_number syn;
|
|
BITSET_FOR_EACH (biter, sn->lookahead, syn, 0)
|
|
hash += syn;
|
|
return hash % max;
|
|
}
|
|
|
|
static bool
|
|
lssi_comparator (void const *vs1, void const *vs2)
|
|
{
|
|
lssi const *s1 = vs1;
|
|
lssi const *s2 = vs2;
|
|
if (s1->si == s2->si)
|
|
{
|
|
if (s1->lookahead == s2->lookahead)
|
|
return true;
|
|
return bitset_equal_p (s1->lookahead, s2->lookahead);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
typedef gl_list_t lssi_list;
|
|
|
|
static inline bool
|
|
append_lssi (lssi *sn, Hash_table *visited, lssi_list queue)
|
|
{
|
|
if (hash_lookup (visited, sn))
|
|
{
|
|
sn->free_lookahead = false;
|
|
lssi_free (sn);
|
|
return false;
|
|
}
|
|
hash_xinsert (visited, sn);
|
|
gl_list_add_last (queue, sn);
|
|
return true;
|
|
}
|
|
|
|
#if 0
|
|
static void
|
|
lssi_print (lssi *l)
|
|
{
|
|
FILE *out = stderr;
|
|
print_state_item (&state_items[l->si], out);
|
|
if (l->lookahead)
|
|
{
|
|
fprintf (out, "FOLLOWL = { ");
|
|
bitset_iterator biter;
|
|
symbol_number sin;
|
|
BITSET_FOR_EACH (biter, l->lookahead, sin, 0)
|
|
fprintf (out, "%s, \n", symbols[sin]->tag);
|
|
fprintf (out, "}\n");
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/**
|
|
* Compute the set of state-items that can reach the given conflict item via
|
|
* a combination of transitions or production steps.
|
|
*/
|
|
static bitset
|
|
eligible_state_items (state_item *target)
|
|
{
|
|
bitset result = bitset_create (nstate_items, BITSET_FIXED);
|
|
state_item_list queue =
|
|
gl_list_create (GL_LINKED_LIST, NULL, NULL, NULL, true, 1,
|
|
(const void **) &target);
|
|
while (gl_list_size (queue) > 0)
|
|
{
|
|
state_item *si = (state_item *) gl_list_get_at (queue, 0);
|
|
gl_list_remove_at (queue, 0);
|
|
if (bitset_test (result, si - state_items))
|
|
continue;
|
|
bitset_set (result, si - state_items);
|
|
// search all reverse edges.
|
|
bitset rsi = si->revs;
|
|
bitset_iterator biter;
|
|
state_item_number sin;
|
|
BITSET_FOR_EACH (biter, rsi, sin, 0)
|
|
gl_list_add_last (queue, &state_items[sin]);
|
|
}
|
|
gl_list_free (queue);
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Compute the shortest lookahead-sensitive path from the start state to
|
|
* this conflict. If optimized is true, only consider parser states
|
|
* that can reach the conflict state.
|
|
*/
|
|
state_item_list
|
|
shortest_path_from_start (state_item_number target, symbol_number next_sym)
|
|
{
|
|
bitset eligible = eligible_state_items (&state_items[target]);
|
|
Hash_table *visited = hash_initialize (32, NULL, lssi_hasher,
|
|
lssi_comparator, lssi_free);
|
|
bitset il = bitset_create (nsyms, BITSET_FIXED);
|
|
bitset_set (il, 0);
|
|
lssi *init = new_lssi (0, NULL, il, true);
|
|
lssi_list queue = gl_list_create_empty (GL_LINKED_LIST, NULL, NULL,
|
|
NULL, true);
|
|
append_lssi (init, visited, queue);
|
|
// breadth-first search
|
|
bool finished = false;
|
|
lssi *n;
|
|
while (gl_list_size (queue) > 0)
|
|
{
|
|
n = (lssi *) gl_list_get_at (queue, 0);
|
|
gl_list_remove_at (queue, 0);
|
|
state_item_number last = n->si;
|
|
if (target == last && bitset_test (n->lookahead, next_sym))
|
|
{
|
|
finished = true;
|
|
break;
|
|
}
|
|
state_item *si = &state_items[last];
|
|
// Transitions don't change follow_L
|
|
if (si->trans >= 0)
|
|
{
|
|
if (bitset_test (eligible, si->trans))
|
|
{
|
|
lssi *next = new_lssi (si->trans, n, n->lookahead, false);
|
|
append_lssi (next, visited, queue);
|
|
}
|
|
}
|
|
// For production steps, follow_L is based on the symbol after the
|
|
// nonterminal being produced.
|
|
// if no such symbol exists, follow_L is unchanged
|
|
// if the symbol is a terminal, follow_L only contains that terminal
|
|
// if the symbol is not nullable, follow_L is its FIRSTS set
|
|
// if the symbol is nullable, follow_L is its FIRSTS set unioned with
|
|
// this logic applied to the next symbol in the rule
|
|
if (si->prods)
|
|
{
|
|
// Compute follow_L as above
|
|
bitset lookahead = bitset_create (nsyms, BITSET_FIXED);
|
|
item_number *pos = si->item + 1;
|
|
for (; !item_number_is_rule_number (*pos); ++pos)
|
|
{
|
|
item_number it = *pos;
|
|
if (ISTOKEN (it))
|
|
{
|
|
bitset_set (lookahead, it);
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
bitset_union (lookahead, lookahead, FIRSTS (it));
|
|
if (!nullable[it - ntokens])
|
|
break;
|
|
}
|
|
}
|
|
if (item_number_is_rule_number (*pos))
|
|
bitset_union (lookahead, n->lookahead, lookahead);
|
|
|
|
bool lookahead_used = false;
|
|
// Try all possible production steps within this parser state.
|
|
bitset_iterator biter;
|
|
state_item_number nextSI;
|
|
BITSET_FOR_EACH (biter, si->prods, nextSI, 0)
|
|
{
|
|
if (!bitset_test (eligible, nextSI))
|
|
continue;
|
|
lssi *next = new_lssi (nextSI, n, lookahead,
|
|
!lookahead_used);
|
|
lookahead_used = append_lssi (next, visited, queue)
|
|
|| lookahead_used;
|
|
}
|
|
if (!lookahead_used)
|
|
bitset_free (lookahead);
|
|
}
|
|
}
|
|
|
|
bitset_free (eligible);
|
|
if (!finished)
|
|
{
|
|
gl_list_free (queue);
|
|
fputs ("Cannot find shortest path to conflict state.", stderr);
|
|
abort ();
|
|
}
|
|
state_item_list res =
|
|
gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, true);
|
|
for (lssi *sn = n; sn != NULL; sn = sn->parent)
|
|
gl_list_add_first (res, &state_items[sn->si]);
|
|
|
|
hash_free (visited);
|
|
gl_list_free (queue);
|
|
|
|
if (trace_flag & trace_cex)
|
|
{
|
|
fputs ("REDUCE ITEM PATH:\n", stderr);
|
|
gl_list_iterator_t it = gl_list_iterator (res);
|
|
const void *sip;
|
|
while (gl_list_iterator_next (&it, &sip, NULL))
|
|
state_item_print ((state_item *) sip, stderr, "");
|
|
}
|
|
return res;
|
|
}
|
|
|
|
/**
|
|
* Determine if the given terminal is in the given symbol set or can begin
|
|
* a nonterminal in the given symbol set.
|
|
*/
|
|
bool
|
|
intersect_symbol (symbol_number sym, bitset syms)
|
|
{
|
|
if (!syms)
|
|
return true;
|
|
bitset_iterator biter;
|
|
symbol_number sn;
|
|
BITSET_FOR_EACH (biter, syms, sn, 0)
|
|
{
|
|
if (sym == sn)
|
|
return true;
|
|
if (ISVAR (sn) && bitset_test (FIRSTS (sn), sym))
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Determine if any symbol in ts is in syms
|
|
* or can begin a nonterminal syms.
|
|
*/
|
|
bool
|
|
intersect (bitset ts, bitset syms)
|
|
{
|
|
if (!syms || !ts)
|
|
return true;
|
|
bitset_iterator biter;
|
|
symbol_number sn;
|
|
BITSET_FOR_EACH (biter, syms, sn, 0)
|
|
{
|
|
if (bitset_test (ts, sn))
|
|
return true;
|
|
if (ISVAR (sn) && !bitset_disjoint_p (ts, FIRSTS (sn)))
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
/**
|
|
* Compute a list of state_items that have a production to n with respect
|
|
* to its lookahead
|
|
*/
|
|
state_item_list
|
|
lssi_reverse_production (const state_item *si, bitset lookahead)
|
|
{
|
|
state_item_list result =
|
|
gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, true);
|
|
if (SI_TRANSITION (si))
|
|
return result;
|
|
// A production step was made to the current lalr_item.
|
|
// Check that the next symbol in the parent lalr_item is
|
|
// compatible with the lookahead.
|
|
bitset_iterator biter;
|
|
state_item_number sin;
|
|
BITSET_FOR_EACH (biter, si->revs, sin, 0)
|
|
{
|
|
state_item *prevsi = &state_items[sin];
|
|
if (!production_allowed (prevsi, si))
|
|
continue;
|
|
bitset prev_lookahead = prevsi->lookahead;
|
|
if (item_number_is_rule_number (*(prevsi->item)))
|
|
{
|
|
// reduce item
|
|
// Check that some lookaheads can be preserved.
|
|
if (!intersect (prev_lookahead, lookahead))
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
// shift item
|
|
if (lookahead)
|
|
{
|
|
// Check that lookahead is compatible with the first
|
|
// possible symbols in the rest of the production.
|
|
// Alternatively, if the rest of the production is
|
|
// nullable, the lookahead must be compatible with
|
|
// the lookahead of the corresponding item.
|
|
bool applicable = false;
|
|
bool nlable = true;
|
|
for (item_number *pos = prevsi->item + 1;
|
|
!applicable && nlable && item_number_is_symbol_number (*pos);
|
|
++pos)
|
|
{
|
|
symbol_number next_sym = item_number_as_symbol_number (*pos);
|
|
if (ISTOKEN (next_sym))
|
|
{
|
|
applicable = intersect_symbol (next_sym, lookahead);
|
|
nlable = false;
|
|
}
|
|
else
|
|
{
|
|
applicable = intersect (FIRSTS (next_sym), lookahead);
|
|
if (!applicable)
|
|
nlable = nullable[next_sym - ntokens];
|
|
}
|
|
}
|
|
if (!applicable && !nlable)
|
|
continue;
|
|
}
|
|
}
|
|
gl_list_add_last (result, prevsi);
|
|
}
|
|
return result;
|
|
}
|