mirror of
https://git.savannah.gnu.org/git/bison.git
synced 2026-03-09 12:23:04 +00:00
cex: introduce the parse simulator
* src/derivation.h, src/derivation.c, * src/parse-simulation.h, src/parse-simulation.c: New.
This commit is contained in:
committed by
Akim Demaille
parent
5807dd9279
commit
ac54d19eba
107
src/derivation.c
Normal file
107
src/derivation.c
Normal file
@@ -0,0 +1,107 @@
|
||||
/* Counterexample derivation trees
|
||||
|
||||
Copyright (C) 2020 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of Bison, the GNU Compiler Compiler.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <config.h>
|
||||
#include "derivation.h"
|
||||
#include "system.h"
|
||||
|
||||
static derivation d_dot = { -1, NULL };
|
||||
|
||||
const derivation *
|
||||
derivation_dot (void)
|
||||
{
|
||||
return &d_dot;
|
||||
}
|
||||
|
||||
derivation *
|
||||
derivation_new (symbol_number sym, gl_list_t children)
|
||||
{
|
||||
derivation *deriv = xmalloc (sizeof (derivation));
|
||||
deriv->sym = sym;
|
||||
deriv->children = children;
|
||||
return deriv;
|
||||
}
|
||||
|
||||
void
|
||||
derivation_free (derivation *d)
|
||||
{
|
||||
if (d && d != &d_dot)
|
||||
{
|
||||
if (d->children)
|
||||
gl_list_free (d->children);
|
||||
free (d);
|
||||
}
|
||||
}
|
||||
|
||||
size_t
|
||||
derivation_size (const derivation *deriv)
|
||||
{
|
||||
if (!deriv->children)
|
||||
return 1;
|
||||
int size = 1;
|
||||
gl_list_iterator_t it = gl_list_iterator (deriv->children);
|
||||
derivation *child;
|
||||
while (gl_list_iterator_next (&it, (const void **) &child, NULL))
|
||||
size += derivation_size (child);
|
||||
return size;
|
||||
}
|
||||
|
||||
// these are used rarely enough that I don't think they should be interned.
|
||||
void
|
||||
derivation_print (const derivation *deriv, FILE *f)
|
||||
{
|
||||
if (deriv == &d_dot)
|
||||
{
|
||||
fputs (" • ", f);
|
||||
return;
|
||||
}
|
||||
symbol *sym = symbols[deriv->sym];
|
||||
if (!deriv->children)
|
||||
{
|
||||
fprintf (f, " %s ", sym->tag);
|
||||
return;
|
||||
}
|
||||
gl_list_iterator_t it = gl_list_iterator (deriv->children);
|
||||
derivation *child;
|
||||
fprintf (f, " %s ::=[", sym->tag);
|
||||
while (gl_list_iterator_next (&it, (const void **) &child, NULL))
|
||||
derivation_print (child, f);
|
||||
fputs ("] ", f);
|
||||
}
|
||||
|
||||
void
|
||||
derivation_print_leaves (const derivation *deriv, FILE *f)
|
||||
{
|
||||
if (deriv == &d_dot)
|
||||
{
|
||||
fputs (" • ", f);
|
||||
return;
|
||||
}
|
||||
if (!deriv->children)
|
||||
{
|
||||
symbol *sym = symbols[deriv->sym];
|
||||
fprintf (f, " %s ", sym->tag);
|
||||
return;
|
||||
}
|
||||
|
||||
gl_list_iterator_t it = gl_list_iterator (deriv->children);
|
||||
derivation *child;
|
||||
while (gl_list_iterator_next (&it, (const void **) &child, NULL))
|
||||
derivation_print_leaves (child, f);
|
||||
}
|
||||
46
src/derivation.h
Normal file
46
src/derivation.h
Normal file
@@ -0,0 +1,46 @@
|
||||
/* Counterexample derivation trees
|
||||
|
||||
Copyright (C) 2020 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of Bison, the GNU Compiler Compiler.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef DERIVATION_H
|
||||
# define DERIVATION_H
|
||||
|
||||
# include <gl_list.h>
|
||||
|
||||
# include "gram.h"
|
||||
|
||||
/* Derivations are trees of symbols such that each non terminal's
|
||||
children are symbols that produce that nonterminal if they are
|
||||
relevant to the counterexample. The leaves of a derivation form a
|
||||
counterexample when printed. */
|
||||
|
||||
typedef struct derivation
|
||||
{
|
||||
symbol_number sym;
|
||||
gl_list_t children;
|
||||
} derivation;
|
||||
|
||||
derivation *derivation_new (symbol_number sym, gl_list_t children);
|
||||
size_t derivation_size (const derivation *deriv);
|
||||
void derivation_print (const derivation *deriv, FILE *f);
|
||||
void derivation_print_leaves (const derivation *deriv, FILE *f);
|
||||
void derivation_free (derivation *deriv);
|
||||
|
||||
const derivation *derivation_dot (void);
|
||||
|
||||
#endif /* DERIVATION_H */
|
||||
556
src/parse-simulation.c
Normal file
556
src/parse-simulation.c
Normal file
@@ -0,0 +1,556 @@
|
||||
/* Parser simulator for unifying counterexample search
|
||||
|
||||
Copyright (C) 2020 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of Bison, the GNU Compiler Compiler.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
#include <gl_linked_list.h>
|
||||
#include <gl_xlist.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "lssi.h"
|
||||
#include "nullable.h"
|
||||
#include "parse-simulation.h"
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// elements newly added in this chunk
|
||||
gl_list_t contents;
|
||||
// properties of the linked list this chunk represents
|
||||
const void *head_elt;
|
||||
const void *tail_elt;
|
||||
size_t total_size;
|
||||
} ps_chunk;
|
||||
|
||||
struct parse_state;
|
||||
typedef struct parse_state
|
||||
{
|
||||
// path of state-items the parser has traversed
|
||||
ps_chunk state_items;
|
||||
// list of derivations of the symbols
|
||||
ps_chunk derivs;
|
||||
struct parse_state *parent;
|
||||
int reference_count;
|
||||
// incremented during productions,
|
||||
// decremented during reductions
|
||||
int depth;
|
||||
// whether the contents of the chunks should be
|
||||
// prepended or appended to the list the chunks
|
||||
// represent
|
||||
bool prepend;
|
||||
// causes chunk contents to be freed when the
|
||||
// reference count is one. Used when only the chunk metadata
|
||||
// will be needed.
|
||||
bool free_contents_early;
|
||||
} parse_state;
|
||||
|
||||
|
||||
static void
|
||||
ps_chunk_prepend (ps_chunk *chunk, const void *element)
|
||||
{
|
||||
gl_list_add_first (chunk->contents, element);
|
||||
chunk->head_elt = element;
|
||||
++chunk->total_size;
|
||||
if (!chunk->tail_elt)
|
||||
chunk->tail_elt = element;
|
||||
}
|
||||
|
||||
static void
|
||||
ps_chunk_append (ps_chunk *chunk, const void *element)
|
||||
{
|
||||
gl_list_add_last (chunk->contents, element);
|
||||
chunk->tail_elt = element;
|
||||
++chunk->total_size;
|
||||
if (!chunk->head_elt)
|
||||
chunk->head_elt = element;
|
||||
}
|
||||
|
||||
static int allocs = 0;
|
||||
static int frees = 0;
|
||||
|
||||
static parse_state *
|
||||
empty_parse_state (void)
|
||||
{
|
||||
parse_state *ret = xcalloc (1, sizeof (parse_state));
|
||||
ret->state_items.contents = gl_list_create_empty (GL_LINKED_LIST, NULL,
|
||||
NULL, NULL, true);
|
||||
ret->derivs.contents = gl_list_create_empty (GL_LINKED_LIST, NULL,
|
||||
NULL, NULL, true);
|
||||
++allocs;
|
||||
return ret;
|
||||
}
|
||||
|
||||
parse_state *
|
||||
new_parse_state (const state_item *si)
|
||||
{
|
||||
parse_state *ret = empty_parse_state ();
|
||||
ps_chunk_append (&ret->state_items, si);
|
||||
ps_chunk_append (&ret->derivs, derivation_dot ());
|
||||
return ret;
|
||||
}
|
||||
|
||||
static parse_state *
|
||||
copy_parse_state (bool prepend, parse_state *parent)
|
||||
{
|
||||
parse_state *ret = xmalloc (sizeof (parse_state));
|
||||
memcpy (ret, parent, sizeof (parse_state));
|
||||
ret->state_items.contents = gl_list_create_empty (GL_LINKED_LIST, NULL,
|
||||
NULL, NULL, true);
|
||||
ret->derivs.contents = gl_list_create_empty (GL_LINKED_LIST, NULL,
|
||||
NULL, NULL, true);
|
||||
ret->parent = parent;
|
||||
ret->prepend = prepend;
|
||||
ret->reference_count = 0;
|
||||
ret->free_contents_early = false;
|
||||
++parent->reference_count;
|
||||
++allocs;
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool
|
||||
parse_state_derivation_completed (const parse_state *ps)
|
||||
{
|
||||
return ps->derivs.total_size == 1;
|
||||
}
|
||||
|
||||
const derivation *
|
||||
parse_state_derivation (const parse_state *ps)
|
||||
{
|
||||
return ps->derivs.head_elt;
|
||||
}
|
||||
|
||||
const state_item *
|
||||
parse_state_head (const parse_state *ps)
|
||||
{
|
||||
return ps->state_items.head_elt;
|
||||
}
|
||||
|
||||
const state_item *
|
||||
parse_state_tail (const parse_state *ps)
|
||||
{
|
||||
return ps->state_items.tail_elt;
|
||||
}
|
||||
|
||||
int
|
||||
parse_state_length (const parse_state *ps)
|
||||
{
|
||||
return ps->state_items.total_size;
|
||||
}
|
||||
|
||||
int
|
||||
parse_state_depth (const parse_state *ps)
|
||||
{
|
||||
return ps->depth;
|
||||
}
|
||||
|
||||
void
|
||||
parse_state_retain (parse_state *ps)
|
||||
{
|
||||
++ps->reference_count;
|
||||
}
|
||||
|
||||
void
|
||||
parse_state_free_contents_early (parse_state *ps)
|
||||
{
|
||||
ps->free_contents_early = true;
|
||||
}
|
||||
|
||||
void
|
||||
parse_state_retain_deriv (parse_state *ps)
|
||||
{
|
||||
ps->derivs.contents = NULL;
|
||||
}
|
||||
|
||||
void
|
||||
free_parse_state (parse_state *ps)
|
||||
{
|
||||
if (ps == NULL)
|
||||
return;
|
||||
--ps->reference_count;
|
||||
// need to keep the parse state around
|
||||
// for visited, but its contents can be freed
|
||||
if ((ps->reference_count == 1 && ps->free_contents_early) ||
|
||||
(ps->reference_count == 0 && !ps->free_contents_early))
|
||||
{
|
||||
if (ps->state_items.contents)
|
||||
gl_list_free (ps->state_items.contents);
|
||||
if (ps->derivs.contents)
|
||||
gl_list_free (ps->derivs.contents);
|
||||
free_parse_state (ps->parent);
|
||||
}
|
||||
if (ps->reference_count <= 0)
|
||||
{
|
||||
free (ps);
|
||||
++frees;
|
||||
}
|
||||
}
|
||||
|
||||
size_t
|
||||
parse_state_hasher (const parse_state *ps, size_t max)
|
||||
{
|
||||
const ps_chunk *sis = &ps->state_items;
|
||||
return ((state_item *) sis->head_elt - state_items +
|
||||
(state_item *) sis->tail_elt - state_items + sis->total_size) % max;
|
||||
}
|
||||
|
||||
bool
|
||||
parse_state_comparator (const parse_state *ps1, const parse_state *ps2)
|
||||
{
|
||||
const ps_chunk *sis1 = &ps1->state_items;
|
||||
const ps_chunk *sis2 = &ps2->state_items;
|
||||
return sis1->head_elt == sis2->head_elt &&
|
||||
sis1->tail_elt == sis2->tail_elt && sis1->total_size == sis2->total_size;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
parse_state_completed_steps (const parse_state *ps, int *shifts, int *productions)
|
||||
{
|
||||
// traverse to the root parse_state,
|
||||
// which will have a list of all completed productions.
|
||||
const parse_state *root_ps = ps;
|
||||
while (root_ps->parent)
|
||||
root_ps = root_ps->parent;
|
||||
|
||||
gl_list_t sis = root_ps->state_items.contents;
|
||||
int count = 0;
|
||||
gl_list_iterator_t it = gl_list_iterator (sis);
|
||||
state_item *last = NULL;
|
||||
state_item *next = NULL;
|
||||
while (gl_list_iterator_next (&it, (const void **) &next, NULL))
|
||||
{
|
||||
if (last && last->state == next->state)
|
||||
++count;
|
||||
last = next;
|
||||
}
|
||||
*productions = count;
|
||||
*shifts = root_ps->state_items.total_size - count;
|
||||
}
|
||||
|
||||
// takes an array of n gl_lists and flattens them into two list
|
||||
// based off of the index split
|
||||
static void
|
||||
list_flatten_and_split (gl_list_t *list, gl_list_t *rets, int split, int n)
|
||||
{
|
||||
int ret_index = 0;
|
||||
int ret_array = 0;
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
gl_list_iterator_t it = gl_list_iterator (list[i]);
|
||||
gl_list_t l;
|
||||
while (gl_list_iterator_next (&it, (const void **) &l, NULL))
|
||||
{
|
||||
if (!l)
|
||||
continue;
|
||||
gl_list_iterator_t it2 = gl_list_iterator (l);
|
||||
void *si;
|
||||
while (gl_list_iterator_next (&it2, (const void **) &si, NULL))
|
||||
{
|
||||
if (ret_index++ == split)
|
||||
++ret_array;
|
||||
if (rets[ret_array])
|
||||
gl_list_add_last (rets[ret_array], si);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Emulates a reduction on a parse state by popping some amount of
|
||||
// derivations and state_items off of the parse_state and returning
|
||||
// the result in ret. Returns the derivation of what's popped.
|
||||
static gl_list_t
|
||||
parser_pop (parse_state *ps, int deriv_index,
|
||||
int si_index, parse_state *ret)
|
||||
{
|
||||
// prepend sis, append sis, prepend derivs, append derivs
|
||||
gl_list_t chunks[4];
|
||||
for (int i = 0; i < 4; ++i)
|
||||
chunks[i] = gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, 1);
|
||||
for (parse_state *pn = ps; pn != NULL; pn = pn->parent)
|
||||
{
|
||||
if (pn->prepend)
|
||||
{
|
||||
gl_list_add_last (chunks[0], pn->state_items.contents);
|
||||
gl_list_add_last (chunks[2], pn->derivs.contents);
|
||||
}
|
||||
else
|
||||
{
|
||||
gl_list_add_first (chunks[1], pn->state_items.contents);
|
||||
gl_list_add_first (chunks[3], pn->derivs.contents);
|
||||
}
|
||||
}
|
||||
gl_list_t popped_derivs =
|
||||
gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, 1);
|
||||
gl_list_t ret_chunks[4] = { ret->state_items.contents, NULL,
|
||||
ret->derivs.contents, popped_derivs
|
||||
};
|
||||
list_flatten_and_split (chunks, ret_chunks, si_index, 2);
|
||||
list_flatten_and_split (chunks + 2, ret_chunks + 2, deriv_index, 2);
|
||||
size_t s_size = gl_list_size (ret->state_items.contents);
|
||||
ret->state_items.total_size = s_size;
|
||||
if (s_size > 0)
|
||||
{
|
||||
ret->state_items.tail_elt = gl_list_get_at (ret->state_items.contents,
|
||||
s_size - 1);
|
||||
ret->state_items.head_elt =
|
||||
gl_list_get_at (ret->state_items.contents, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
ret->state_items.tail_elt = NULL;
|
||||
ret->state_items.head_elt = NULL;
|
||||
}
|
||||
size_t d_size = gl_list_size (ret->derivs.contents);
|
||||
ret->derivs.total_size = d_size;
|
||||
if (d_size > 0)
|
||||
{
|
||||
ret->derivs.tail_elt = gl_list_get_at (ret->derivs.contents,
|
||||
d_size - 1);
|
||||
ret->derivs.head_elt = gl_list_get_at (ret->derivs.contents, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
ret->derivs.tail_elt = NULL;
|
||||
ret->derivs.head_elt = NULL;
|
||||
}
|
||||
for (int i = 0; i < 4; ++i)
|
||||
gl_list_free (chunks[i]);
|
||||
return popped_derivs;
|
||||
}
|
||||
|
||||
void
|
||||
parse_state_lists (parse_state *ps, gl_list_t *sitems,
|
||||
gl_list_t *derivs)
|
||||
{
|
||||
parse_state *temp = empty_parse_state ();
|
||||
size_t si_size = ps->state_items.total_size;
|
||||
size_t deriv_size = ps->derivs.total_size;
|
||||
parser_pop (ps, si_size, deriv_size, temp);
|
||||
*sitems = temp->state_items.contents;
|
||||
*derivs = temp->derivs.contents;
|
||||
// prevent the return lists from being freed
|
||||
temp->state_items.contents = NULL;
|
||||
temp->derivs.contents = NULL;
|
||||
free_parse_state (temp);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the parse states that result from taking a transition on
|
||||
* nullable symbols whenever possible from the given state_item.
|
||||
*/
|
||||
void
|
||||
nullable_closure (parse_state *ps, state_item *si, gl_list_t state_list)
|
||||
{
|
||||
parse_state *current_ps = ps;
|
||||
state_item_number prev_sin = si - state_items;
|
||||
for (state_item_number sin = si_trans[prev_sin];
|
||||
sin != -1; prev_sin = sin, sin = si_trans[sin])
|
||||
{
|
||||
state_item *psi = state_items + prev_sin;
|
||||
symbol_number sp = item_number_as_symbol_number (*psi->item);
|
||||
if (ISTOKEN (sp) || !nullable[sp - ntokens])
|
||||
break;
|
||||
|
||||
state_item *nsi = state_items + sin;
|
||||
current_ps = copy_parse_state (false, current_ps);
|
||||
ps_chunk_append (¤t_ps->state_items, nsi);
|
||||
ps_chunk_append (¤t_ps->derivs, derivation_new (sp, NULL));
|
||||
++current_ps->reference_count;
|
||||
gl_list_add_last (state_list, current_ps);
|
||||
}
|
||||
}
|
||||
|
||||
gl_list_t
|
||||
simulate_transition (parse_state *ps)
|
||||
{
|
||||
const state_item *si = ps->state_items.tail_elt;
|
||||
symbol_number sym = item_number_as_symbol_number (*si->item);
|
||||
// Transition on the same next symbol, taking nullable
|
||||
// symbols into account.
|
||||
gl_list_t result =
|
||||
gl_list_create_empty (GL_LINKED_LIST, NULL, NULL,
|
||||
(gl_listelement_dispose_fn)free_parse_state,
|
||||
true);
|
||||
state_item_number si_next = si_trans[si - state_items];
|
||||
// check for disabled transition, shouldn't happen
|
||||
// as any state_items that lead to these should be
|
||||
// disabled.
|
||||
if (si_next < 0)
|
||||
return result;
|
||||
parse_state *next_ps = copy_parse_state (false, ps);
|
||||
ps_chunk_append (&next_ps->state_items, state_items + si_next);
|
||||
ps_chunk_append (&next_ps->derivs, derivation_new (sym, NULL));
|
||||
++next_ps->reference_count;
|
||||
gl_list_add_last (result, next_ps);
|
||||
|
||||
nullable_closure (next_ps, state_items + si_next, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine if the given symbols are equal or their first sets
|
||||
* intersect.
|
||||
*/
|
||||
static bool
|
||||
compatible (symbol_number sym1, symbol_number sym2)
|
||||
{
|
||||
if (sym1 == sym2)
|
||||
return true;
|
||||
if (ISTOKEN (sym1) && ISVAR (sym2))
|
||||
return bitset_test (FIRSTS (sym2), sym1);
|
||||
else if (ISVAR (sym1) && ISTOKEN (sym2))
|
||||
return bitset_test (FIRSTS (sym1), sym2);
|
||||
else if (ISVAR (sym1) && ISVAR (sym2))
|
||||
return !bitset_disjoint_p (FIRSTS (sym1), FIRSTS (sym2));
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
gl_list_t
|
||||
simulate_production (parse_state *ps, symbol_number compat_sym)
|
||||
{
|
||||
gl_list_t result =
|
||||
gl_list_create_empty (GL_LINKED_LIST, NULL, NULL,
|
||||
(gl_listelement_dispose_fn)free_parse_state,
|
||||
true);
|
||||
const state_item *si = parse_state_tail (ps);
|
||||
bitset prod = si_prods_lookup (si - state_items);
|
||||
if (prod)
|
||||
{
|
||||
bitset_iterator biter;
|
||||
state_item_number sin;
|
||||
BITSET_FOR_EACH (biter, prod, sin, 0)
|
||||
{
|
||||
// Take production step only if lhs is not nullable and
|
||||
// if first rhs symbol is compatible with compat_sym
|
||||
state_item *next = state_items + sin;
|
||||
item_number *itm1 = next->item;
|
||||
if (!compatible (*itm1, compat_sym) || !production_allowed (si, next))
|
||||
continue;
|
||||
parse_state *next_ps = copy_parse_state (false, ps);
|
||||
ps_chunk_append (&next_ps->state_items, next);
|
||||
++next_ps->reference_count;
|
||||
gl_list_add_last (result, next_ps);
|
||||
if (next_ps->depth >= 0)
|
||||
++next_ps->depth;
|
||||
nullable_closure (next_ps, next, result);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// simulates a reduction on the given parse state, conflict_item is the
|
||||
// item associated with ps's conflict. symbol_set is a lookahead set this
|
||||
// reduction must be compatible with
|
||||
gl_list_t
|
||||
simulate_reduction (parse_state *ps, int rule_len, bitset symbol_set)
|
||||
{
|
||||
gl_list_t result =
|
||||
gl_list_create_empty (GL_LINKED_LIST, NULL, NULL,
|
||||
(gl_listelement_dispose_fn)free_parse_state,
|
||||
true);
|
||||
|
||||
int s_size = ps->state_items.total_size;
|
||||
int d_size = ps->derivs.total_size;
|
||||
if (ps->depth >= 0)
|
||||
d_size--; // account for dot
|
||||
parse_state *new_root = empty_parse_state ();
|
||||
gl_list_t popped_derivs = parser_pop (ps, d_size - rule_len,
|
||||
s_size - rule_len - 1,
|
||||
new_root);
|
||||
|
||||
// update derivation
|
||||
state_item *si = (state_item *) ps->state_items.tail_elt;
|
||||
const rule *r = item_rule (si->item);
|
||||
symbol_number lhs = r->lhs->number;
|
||||
derivation *deriv = derivation_new (lhs, popped_derivs);
|
||||
--new_root->depth;
|
||||
ps_chunk_append (&new_root->derivs, deriv);
|
||||
|
||||
if (s_size != rule_len + 1)
|
||||
{
|
||||
state_item *tail = (state_item *) new_root->state_items.tail_elt;
|
||||
ps_chunk_append (&new_root->state_items,
|
||||
state_items + si_trans[tail - state_items]);
|
||||
++new_root->reference_count;
|
||||
gl_list_add_last (result, new_root);
|
||||
}
|
||||
else
|
||||
{
|
||||
// The head state_item is a production item, so we need to prepend
|
||||
// with possible source state-items.
|
||||
const state_item *head = ps->state_items.head_elt;
|
||||
gl_list_t prev = lssi_reverse_production (head, symbol_set);
|
||||
gl_list_iterator_t it = gl_list_iterator (prev);
|
||||
state_item *psis;
|
||||
while (gl_list_iterator_next (&it, (const void **) &psis, NULL))
|
||||
{
|
||||
//Prepend the result from the reverse production
|
||||
parse_state *copy = copy_parse_state (true, new_root);
|
||||
ps_chunk_prepend (©->state_items, psis);
|
||||
|
||||
// Append the left hand side to the end of the parser state
|
||||
copy = copy_parse_state (false, copy);
|
||||
ps_chunk *sis = ©->state_items;
|
||||
const state_item *tail = sis->tail_elt;
|
||||
ps_chunk_append (sis, state_items + si_trans[tail - state_items]);
|
||||
++copy->reference_count;
|
||||
gl_list_add_last (result, copy);
|
||||
nullable_closure (copy, (state_item *) sis->tail_elt, result);
|
||||
}
|
||||
gl_list_free (prev);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
gl_list_t
|
||||
parser_prepend (parse_state *ps)
|
||||
{
|
||||
gl_list_t result = gl_list_create_empty (GL_LINKED_LIST, NULL, NULL,
|
||||
(gl_listelement_dispose_fn)
|
||||
free_parse_state,
|
||||
true);
|
||||
const state_item *head = ps->state_items.head_elt;
|
||||
bitset prev = si_revs[head - state_items];
|
||||
symbol_number prepend_sym =
|
||||
item_number_as_symbol_number (*(head->item - 1));
|
||||
bitset_iterator biter;
|
||||
state_item_number sin;
|
||||
BITSET_FOR_EACH (biter, prev, sin, 0)
|
||||
{
|
||||
parse_state *copy = copy_parse_state (true, ps);
|
||||
copy->reference_count++;
|
||||
ps_chunk_prepend (©->state_items, state_items + sin);
|
||||
if (SI_TRANSITION (head))
|
||||
ps_chunk_prepend (©->derivs, derivation_new (prepend_sym, NULL));
|
||||
gl_list_add_last (result, copy);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
print_parse_state (parse_state *ps)
|
||||
{
|
||||
printf ("(size %zu depth %d rc %d)\n",
|
||||
ps->state_items.total_size, ps->depth, ps->reference_count);
|
||||
print_state_item (ps->state_items.head_elt, stdout);
|
||||
print_state_item (ps->state_items.tail_elt, stdout);
|
||||
if (ps->derivs.total_size > 0)
|
||||
derivation_print (ps->derivs.head_elt, stdout);
|
||||
putc ('\n', stdout);
|
||||
}
|
||||
133
src/parse-simulation.h
Normal file
133
src/parse-simulation.h
Normal file
@@ -0,0 +1,133 @@
|
||||
/* Parser simulator for unifying counterexample search
|
||||
|
||||
Copyright (C) 2020 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of Bison, the GNU Compiler Compiler.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef PARSE_SIMULATION_H
|
||||
# define PARSE_SIMULATION_H
|
||||
|
||||
# include <stdio.h>
|
||||
# include <gl_xlist.h>
|
||||
|
||||
# include "derivation.h"
|
||||
# include "state-item.h"
|
||||
|
||||
/*
|
||||
Simulating states of the parser:
|
||||
Each state is an array of state-items and an array of derivations.
|
||||
Each consecutive state-item represents a transition/goto or production,
|
||||
and the derivations are the dereivation trees associated with the symbols
|
||||
transitioned on each step. In more detail:
|
||||
|
||||
Parse states are stored as a tree. Each new parse state contains two "chunks,"
|
||||
one corresponding to its state-items and the other corresponding to its derivations.
|
||||
Chunks only have new elements which weren't present in its parent.
|
||||
Each chunk also stores the head, tail, and total_size of the list it represents.
|
||||
So if a parse state was to be copied it retains the list metadata but its
|
||||
contents are empty.
|
||||
|
||||
A transition gets the state-item which the last state-item of the parse state
|
||||
transitions to. This is appended to the state-item list, and a derivation with
|
||||
just the symbol being transitioned on is appended to the derivation list.
|
||||
A production appends the new state-item, but does not have a derivation
|
||||
associated with it.
|
||||
|
||||
A reduction looks at the rule of the last state-item in the state, and pops
|
||||
the last few state-items that make up the rhs of the rule along with their
|
||||
derivations. The derivations become the derivation of the lhs which is then
|
||||
shifted over.
|
||||
|
||||
Effectively, everytime a derivation is appended, it represents a shift in
|
||||
the parser. So a parse state that contains
|
||||
start: A . B C D
|
||||
start: A B C D .
|
||||
and the state-items in between will represent a parser that has BCD on the
|
||||
parse stack.
|
||||
|
||||
However, the above example cannot be reduced, as it's missing A.
|
||||
Since we start at a state-item that can have a dot in the middle of a rule,
|
||||
it's necessary to support a prepend operation. Luckily the prepend operations
|
||||
are very similar to transitions and productions with the difference being that
|
||||
they operate on the head of the state-item list instead of the tail.
|
||||
|
||||
A production
|
||||
A transition gets the state-item which the last state-item of the parse state
|
||||
transitions to. This is appended to the state-item list, and a derivation with
|
||||
just the symbol being transitioned on is appended to the derivation list.
|
||||
|
||||
*/
|
||||
|
||||
typedef struct parse_state parse_state;
|
||||
|
||||
parse_state *new_parse_state (const state_item *conflict);
|
||||
|
||||
size_t parse_state_hasher (const parse_state *ps, size_t max);
|
||||
|
||||
bool parse_state_comparator (const parse_state *ps1, const parse_state *ps2);
|
||||
|
||||
/* Memory management */
|
||||
|
||||
void parse_state_retain (parse_state *ps);
|
||||
/* This allows a parse_state to free its contents list
|
||||
* when its reference count reaches 1. This is used to
|
||||
* free memory while the parse state is in a hash set. */
|
||||
void parse_state_free_contents_early (parse_state *ps);
|
||||
void parse_state_retain_deriv (parse_state *ps);
|
||||
void free_parse_state (parse_state *ps);
|
||||
|
||||
/* counts the amount of shift and production steps in this parse state */
|
||||
void parse_state_completed_steps (const parse_state *ps, int *shifts, int *productions);
|
||||
|
||||
/* parse state getters */
|
||||
bool parse_state_derivation_completed (const parse_state *ps);
|
||||
const derivation *parse_state_derivation (const parse_state *ps);
|
||||
const state_item *parse_state_head (const parse_state *ps);
|
||||
const state_item *parse_state_tail (const parse_state *ps);
|
||||
int parse_state_length (const parse_state *ps);
|
||||
int parse_state_depth (const parse_state *ps);
|
||||
|
||||
/* returns the linked lists that the parse state is supposed to represent */
|
||||
void parse_state_lists (parse_state *ps, gl_list_t *state_items,
|
||||
gl_list_t *derivs);
|
||||
|
||||
/* various functions that return a list of states based off of
|
||||
* whatever operation is simulated. After whatever operation, every possible
|
||||
* transition on nullable nonterminals will be added to the returned list. */
|
||||
|
||||
/* Look at the tail state-item of the parse state and transition on the symbol
|
||||
* after its dot. The symbol gets added to derivs, and the resulting state-item
|
||||
* is appended to state-items. */
|
||||
gl_list_t simulate_transition (parse_state *ps);
|
||||
|
||||
/* Look at all of the productions for the non-terminal following the dot in the tail
|
||||
* state-item. Appends to state-items each production state-item which may start with
|
||||
* compat_sym. */
|
||||
gl_list_t simulate_production (parse_state *ps, symbol_number compat_sym);
|
||||
|
||||
/* Removes the last rule_len state-items along with their derivations. A new state-item is
|
||||
* appended representing the goto after the reduction. A derivation for the non-terminal that
|
||||
* was just reduced is appended which consists of the list of derivations that were just removed. */
|
||||
gl_list_t simulate_reduction (parse_state *ps, int rule_len,
|
||||
bitset symbol_set);
|
||||
|
||||
/* Generate states with a state-item prepended for each state-item that has a
|
||||
* transition or production step to ps's head. */
|
||||
gl_list_t parser_prepend (parse_state *ps);
|
||||
|
||||
void print_parse_state (parse_state *ps);
|
||||
|
||||
#endif /* PARSE_SIMULATION_H */
|
||||
Reference in New Issue
Block a user