Use lib/hash for the symbol table.

* src/gram.c (ntokens): Initialize to 1, to reserve a slot for
EOF.
* src/lex.c (lex): Set the `number' member of new terminals.
* src/reader.c (bucket_check_defined, bucket_make_alias)
(bucket_check_alias_consistence, bucket_translation): New.
(reader, grammar_free, readgram, token_translations_init)
(packsymbols): Adjust.
(reader): Number the predefined tokens.
* src/reduce.c (inaccessable_symbols): Just use hard coded numbers
for predefined tokens.
* src/symtab.h (bucket): Remove all the hash table related
members.
* src/symtab.c (symtab): Replace by...
(bucket_table): this.
(bucket_new, bucket_free, hash_compare_bucket, hash_bucket)
(buckets_new, buckets_do): New.
This commit is contained in:
Akim Demaille
2002-04-07 17:43:21 +00:00
parent 280a38c306
commit 72a23c9797
7 changed files with 360 additions and 232 deletions

View File

@@ -1,3 +1,24 @@
2002-04-07 Akim Demaille <akim@epita.fr>
Use lib/hash for the symbol table.
* src/gram.c (ntokens): Initialize to 1, to reserve a slot for
EOF.
* src/lex.c (lex): Set the `number' member of new terminals.
* src/reader.c (bucket_check_defined, bucket_make_alias)
(bucket_check_alias_consistence, bucket_translation): New.
(reader, grammar_free, readgram, token_translations_init)
(packsymbols): Adjust.
(reader): Number the predefined tokens.
* src/reduce.c (inaccessable_symbols): Just use hard coded numbers
for predefined tokens.
* src/symtab.h (bucket): Remove all the hash table related
members.
* src/symtab.c (symtab): Replace by...
(bucket_table): this.
(bucket_new, bucket_free, hash_compare_bucket, hash_bucket)
(buckets_new, buckets_do): New.
2002-04-07 Akim Demaille <akim@epita.fr>
* src/gram.c (nitems, nrules, nsyms, ntokens, nvars, nritems)

View File

@@ -30,7 +30,7 @@
int nitems = 0;
int nrules = 0;
int nsyms = 0;
int ntokens = 0;
int ntokens = 1;
int nvars = 0;
short *ritem = NULL;

View File

@@ -364,9 +364,13 @@ lex (void)
obstack_1grow (&token_obstack, '\0');
token_buffer = obstack_finish (&token_obstack);
symval = getsym (token_buffer);
symval->class = token_sym;
if (symval->user_token_number == SUNDEF)
symval->user_token_number = code;
if (symval->number == -1)
{
symval->number = ntokens++;
symval->class = token_sym;
if (symval->user_token_number == SUNDEF)
symval->user_token_number = code;
}
return tok_identifier;
}
@@ -388,7 +392,11 @@ lex (void)
token_buffer = obstack_finish (&token_obstack);
symval = getsym (token_buffer);
symval->class = token_sym;
if (symval->number == -1)
{
symval->number = ntokens++;
symval->class = token_sym;
}
return tok_identifier;
}

View File

@@ -86,6 +86,181 @@ symbol_list_new (bucket *sym)
return res;
}
/*------------------------.
| Operations on buckets. |
`------------------------*/
/*-----------------------------------------------------------.
| If THIS is not defined, report an error, and consider it a |
| nonterminal. |
`-----------------------------------------------------------*/
static bool
bucket_check_defined (bucket *this)
{
if (this->class == unknown_sym)
{
complain
(_("symbol %s is used, but is not defined as a token and has no rules"),
this->tag);
this->class = nterm_sym;
this->number = nvars++;
}
return TRUE;
}
/*-------------------------------------------------------------------.
| Assign a symbol number, and write the definition of the token name |
| into FDEFINES. Put in SYMBOLS. |
`-------------------------------------------------------------------*/
static bool
bucket_make_alias (bucket *symbol, char *typename)
{
if (symval->alias)
warn (_("symbol `%s' used more than once as a literal string"),
symval->tag);
else if (symbol->alias)
warn (_("symbol `%s' given more than one literal string"),
symbol->tag);
else
{
symval->class = token_sym;
symval->type_name = typename;
symval->user_token_number = symbol->user_token_number;
symbol->user_token_number = SALIAS;
symval->alias = symbol;
symbol->alias = symval;
/* symbol and symval combined are only one symbol */
nsyms--;
ntokens--;
assert (ntokens == symbol->number || ntokens == symval->number);
symbol->number = symval->number =
(symval->number < symbol->number) ? symval->number : symbol->number;
}
return TRUE;
}
/*---------------------------------------------------------.
| Check that THIS, and its alias, have same precedence and |
| associativity. |
`---------------------------------------------------------*/
static bool
bucket_check_alias_consistence (bucket *this)
{
/* Check only those who _are_ the aliases. */
if (this->alias && this->user_token_number == SALIAS)
{
if (this->prec != this->alias->prec)
{
if (this->prec != 0 && this->alias->prec != 0)
complain (_("conflicting precedences for %s and %s"),
this->tag, this->alias->tag);
if (this->prec != 0)
this->alias->prec = this->prec;
else
this->prec = this->alias->prec;
}
if (this->assoc != this->alias->assoc)
{
if (this->assoc != 0 && this->alias->assoc != 0)
complain (_("conflicting assoc values for %s and %s"),
this->tag, this->alias->tag);
if (this->assoc != 0)
this->alias->assoc = this->assoc;
else
this->assoc = this->alias->assoc;
}
}
return TRUE;
}
/*-------------------------------------------------------------------.
| Assign a symbol number, and write the definition of the token name |
| into FDEFINES. Put in SYMBOLS. |
`-------------------------------------------------------------------*/
static bool
bucket_pack (bucket *this)
{
if (getenv ("DEBUG"))
fprintf (stderr, "Packing %s, %s, number = %d\n",
this->tag,
this->class == nterm_sym ? "nterm" : "TERM",
this->number);
if (this->class == nterm_sym)
{
this->number += ntokens;
}
else if (this->alias)
{
/* This symbol and its alias are a single token defn.
Allocate a tokno, and assign to both check agreement of
prec and assoc fields and make both the same */
if (this->number == -1)
{
if (this == eoftoken || this->alias == eoftoken)
this->number = this->alias->number = 0;
else
{
assert (this->alias->number != -1);
this->number = this->alias->number;
}
}
/* Do not do processing below for SALIASs. */
if (this->user_token_number == SALIAS)
return TRUE;
}
else /* this->class == token_sym */
{
assert (this->number != -1);
}
if (getenv ("DEBUG"))
fprintf (stderr, "Setting %d = %s\n", this->number, this->tag);
symbols[this->number] = this;
return TRUE;
}
/*--------------------------------------------------.
| Put THIS in TOKEN_TRANSLATIONS if it is a token. |
`--------------------------------------------------*/
static bool
bucket_translation (bucket *this)
{
if (getenv ("DEBUG"))
fprintf (stderr, "Considering Setting UserVal %s = %d (val = %d)\n",
this->tag, this->user_token_number, this->number);
/* Non-terminal? */
if (this->class == token_sym
&& this->user_token_number != SALIAS)
{
/* A token which translation has already been set? */
if (token_translations[this->user_token_number] != 2)
complain (_("tokens %s and %s both assigned number %d"),
symbols[token_translations[this->user_token_number]]->tag,
this->tag, this->user_token_number);
if (getenv ("DEBUG"))
fprintf (stderr, "Setting UserVal %s = %d (val = %d)\n",
this->tag, this->user_token_number, this->number);
token_translations[this->user_token_number] = this->number;
}
return TRUE;
}
/*===================\
@@ -531,23 +706,7 @@ parse_token_decl (symbol_class what_is, symbol_class what_is_not)
}
else if (token == tok_identifier && *symval->tag == '\"' && symbol)
{
if (symval->alias)
warn (_("symbol `%s' used more than once as a literal string"),
symval->tag);
else if (symbol->alias)
warn (_("symbol `%s' given more than one literal string"),
symbol->tag);
else
{
symval->class = token_sym;
symval->type_name = typename;
symval->user_token_number = symbol->user_token_number;
symbol->user_token_number = SALIAS;
symval->alias = symbol;
symbol->alias = symval;
/* symbol and symval combined are only one symbol */
nsyms--;
}
bucket_make_alias (symbol, typename);
symbol = NULL;
}
else if (token == tok_identifier)
@@ -560,6 +719,13 @@ parse_token_decl (symbol_class what_is, symbol_class what_is_not)
symbol->class = what_is;
if (what_is == nterm_sym && oldclass != nterm_sym)
symbol->number = nvars++;
if (what_is == token_sym && symbol->number == -1)
{
symbol->number = ntokens++;
if (getenv ("DEBUG"))
fprintf (stderr, "Set %s to %d\n",
symbol->tag, symbol->number);
}
if (typename)
{
@@ -574,7 +740,13 @@ parse_token_decl (symbol_class what_is, symbol_class what_is_not)
symbol->user_token_number = numval;
/* User defined EOF token? */
if (numval == 0)
eoftoken = symbol;
{
eoftoken = symbol;
eoftoken->number = 0;
/* It is always mapped to 0, so it was already counted in
NTOKENS. */
--ntokens;
}
}
else
{
@@ -703,7 +875,11 @@ parse_assoc_decl (associativity assoc)
symval->assoc = assoc;
if (symval->class == nterm_sym)
complain (_("symbol %s redefined"), symval->tag);
symval->class = token_sym;
if (symval->number == -1)
{
symval->number = ntokens++;
symval->class = token_sym;
}
if (name)
{ /* record the type, if one is specified */
if (symval->type_name == NULL)
@@ -720,9 +896,9 @@ parse_assoc_decl (associativity assoc)
}
else
{
complain (_
("invalid text (%s) - number should be after identifier"),
token_buffer);
complain
(_("invalid text (%s) - number should be after identifier"),
token_buffer);
skip_to_char ('%');
}
break;
@@ -1073,7 +1249,7 @@ read_declarations (void)
case tok_stropt:
case tok_intopt:
case tok_obsolete:
abort ();
assert (0);
break;
case tok_illegal:
@@ -1231,7 +1407,6 @@ readgram (void)
bucket *lhs = NULL;
symbol_list *p = NULL;
symbol_list *p1 = NULL;
bucket *bp;
/* Points to first symbol_list of current rule. its symbol is the
lhs of the rule. */
@@ -1463,16 +1638,7 @@ readgram (void)
fatal (_("no rules in the input grammar"));
/* Report any undefined symbols and consider them nonterminals. */
for (bp = firstsymbol; bp; bp = bp->next)
if (bp->class == unknown_sym)
{
complain (_
("symbol %s is used, but is not defined as a token and has no rules"),
bp->tag);
bp->class = nterm_sym;
bp->number = nvars++;
}
buckets_do (bucket_check_defined, NULL);
/* Insert the initial rule, which line is that of the first rule
(not that of the start symbol):
@@ -1493,7 +1659,10 @@ readgram (void)
fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
MAXSHORT);
ntokens = nsyms - nvars;
if (getenv ("DEBUG"))
fprintf (stderr, "nsyms == ntokens + nvars: %d == %d + %d\n",
nsyms, ntokens, nvars);
assert (nsyms == ntokens + nvars);
}
/* At the end of the grammar file, some C source code must
@@ -1530,9 +1699,25 @@ read_additionnal_code (void)
static void
token_translations_init (void)
{
bucket *bp = NULL;
int last_user_token_number = 256;
int i;
/* Set the user numbers. */
for (i = 0; i < ntokens; ++i)
{
bucket *this = symbols[i];
if (getenv ("DEBUG"))
fprintf (stderr, "UserVal %s = %d (val = %d)\n",
this->tag, this->user_token_number, this->number);
if (this->user_token_number == SUNDEF)
this->user_token_number = ++last_user_token_number;
if (this->user_token_number > max_user_token_number)
max_user_token_number = this->user_token_number;
if (getenv ("DEBUG"))
fprintf (stderr, "Now: UserVal %s = %d (val = %d)\n",
this->tag, this->user_token_number, this->number);
}
token_translations = XCALLOC (short, max_user_token_number + 1);
/* Initialize all entries for literal tokens to 2, the internal
@@ -1541,24 +1726,7 @@ token_translations_init (void)
for (i = 0; i < max_user_token_number + 1; i++)
token_translations[i] = 2;
for (bp = firstsymbol; bp; bp = bp->next)
{
/* Non-terminal? */
if (bp->number >= ntokens)
continue;
/* A token string alias? */
if (bp->user_token_number == SALIAS)
continue;
assert (bp->user_token_number != SUNDEF);
/* A token which translation has already been set? */
if (token_translations[bp->user_token_number] != 2)
complain (_("tokens %s and %s both assigned number %d"),
symbols[token_translations[bp->user_token_number]]->tag,
bp->tag, bp->user_token_number);
token_translations[bp->user_token_number] = bp->number;
}
buckets_do (bucket_translation, NULL);
}
@@ -1570,83 +1738,10 @@ token_translations_init (void)
static void
packsymbols (void)
{
bucket *bp = NULL;
int tokno = 1;
int last_user_token_number;
symbols = XCALLOC (bucket *, nsyms);
max_user_token_number = 256;
last_user_token_number = 256;
for (bp = firstsymbol; bp; bp = bp->next)
{
if (bp->class == nterm_sym)
{
bp->number += ntokens;
}
else if (bp->alias)
{
/* This symbol and its alias are a single token defn.
Allocate a tokno, and assign to both check agreement of
prec and assoc fields and make both the same */
if (bp->number == -1)
{
if (bp == eoftoken || bp->alias == eoftoken)
bp->number = bp->alias->number = 0;
else
{
bp->number = bp->alias->number = tokno++;
}
}
if (bp->prec != bp->alias->prec)
{
if (bp->prec != 0 && bp->alias->prec != 0
&& bp->user_token_number == SALIAS)
complain (_("conflicting precedences for %s and %s"),
bp->tag, bp->alias->tag);
if (bp->prec != 0)
bp->alias->prec = bp->prec;
else
bp->prec = bp->alias->prec;
}
if (bp->assoc != bp->alias->assoc)
{
if (bp->assoc != 0 && bp->alias->assoc != 0
&& bp->user_token_number == SALIAS)
complain (_("conflicting assoc values for %s and %s"),
bp->tag, bp->alias->tag);
if (bp->assoc != 0)
bp->alias->assoc = bp->assoc;
else
bp->assoc = bp->alias->assoc;
}
/* Do not do processing below for SALIASs. */
if (bp->user_token_number == SALIAS)
continue;
}
else /* bp->class == token_sym */
{
if (bp == eoftoken)
bp->number = 0;
else
bp->number = tokno++;
}
if (bp->class == token_sym)
{
if (bp->user_token_number == SUNDEF)
bp->user_token_number = ++last_user_token_number;
if (bp->user_token_number > max_user_token_number)
max_user_token_number = bp->user_token_number;
}
symbols[bp->number] = bp;
}
buckets_do (bucket_check_alias_consistence, NULL);
buckets_do (bucket_pack, NULL);
token_translations_init ();
@@ -1750,7 +1845,7 @@ reader (void)
obstack_init (&muscle_obstack);
/* Initialize the symbol table. */
tabinit ();
buckets_new ();
/* Construct the axiom symbol. */
axiom = getsym ("$axiom");
@@ -1760,12 +1855,14 @@ reader (void)
/* Construct the error token */
errtoken = getsym ("error");
errtoken->class = token_sym;
errtoken->number = ntokens++;
errtoken->user_token_number = 256; /* Value specified by POSIX. */
/* Construct a token that represents all undefined literal tokens.
It is always token number 2. */
undeftoken = getsym ("$undefined.");
undeftoken->class = token_sym;
undeftoken->number = ntokens++;
undeftoken->user_token_number = 2;
/* Initialize the obstacks. */
@@ -1785,6 +1882,7 @@ reader (void)
{
eoftoken = getsym ("$");
eoftoken->class = token_sym;
eoftoken->number = 0;
/* Value specified by POSIX. */
eoftoken->user_token_number = 0;
}
@@ -1815,5 +1913,5 @@ grammar_free (void)
XFREE (ritem);
free (rules + 1);
/* Free the symbol table data structure. */
free_symtab ();
buckets_free ();
}

View File

@@ -199,9 +199,9 @@ inaccessable_symbols (void)
V = Vp;
/* Tokens 0, 1, and 2 are internal to Bison. Consider them useful. */
bitset_set (V, 0); /* end-of-input token */
bitset_set (V, 1); /* error token */
bitset_set (V, 2); /* some undefined token */
bitset_set (V, eoftoken->number); /* end-of-input token */
bitset_set (V, errtoken->number); /* error token */
bitset_set (V, undeftoken->number); /* some undefined token */
bitset_free (P);
P = Pp;

View File

@@ -1,5 +1,5 @@
/* Symbol table manager for Bison,
Copyright 1984, 1989, 2000, 2001, 2002 Free Software Foundation, Inc.
Copyright (C) 1984, 1989, 2000, 2001, 2002 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
@@ -20,39 +20,19 @@
#include "system.h"
#include "hash.h"
#include "symtab.h"
#include "gram.h"
bucket *firstsymbol;
static bucket *lastsymbol;
static bucket **symtab;
static int
hash (const char *key)
{
const char *cp;
int k;
cp = key;
k = 0;
while (*cp)
k = ((k << 1) ^ (*cp++)) & 0x3fff;
return k % TABSIZE;
}
/*--------------------------------------------------------------.
| Create a new symbol, named TAG, which hash value is HASHVAL. |
`--------------------------------------------------------------*/
/*---------------------------------.
| Create a new symbol, named TAG. |
`---------------------------------*/
static bucket *
bucket_new (const char *tag, int hashval)
bucket_new (const char *tag)
{
bucket *res = XMALLOC (bucket, 1);
res->link = symtab[hashval];
res->next = NULL;
res->tag = xstrdup (tag);
res->type_name = NULL;
res->number = -1;
@@ -62,19 +42,67 @@ bucket_new (const char *tag, int hashval)
res->alias = NULL;
res->class = unknown_sym;
if (getenv ("DEBUG"))
fprintf (stderr, "Creating: nsyms = %d, ntokens = %d: %s\n",
nsyms, ntokens, tag);
nsyms++;
return res;
}
void
tabinit (void)
{
symtab = XCALLOC (bucket *, TABSIZE);
/*------------.
| Free THIS. |
`------------*/
firstsymbol = NULL;
lastsymbol = NULL;
static void
bucket_free (bucket *this)
{
#if 0
/* This causes crashes because one string can appear more
than once. */
XFREE (this->type_name);
#endif
XFREE (this->tag);
XFREE (this);
}
/*----------------------.
| A bucket hash table. |
`----------------------*/
/* Initial capacity of buckets hash table. */
#define HT_INITIAL_CAPACITY 257
static struct hash_table *bucket_table = NULL;
static bool
hash_compare_bucket (const bucket *m1, const bucket *m2)
{
return strcmp (m1->tag, m2->tag) ? FALSE : TRUE;
}
static unsigned int
hash_bucket (const bucket *m, unsigned int tablesize)
{
return hash_string (m->tag, tablesize);
}
/*-------------------------------.
| Create the bucket hash table. |
`-------------------------------*/
void
buckets_new (void)
{
bucket_table = hash_initialize (HT_INITIAL_CAPACITY,
NULL,
(Hash_hasher) hash_bucket,
(Hash_comparator) hash_compare_bucket,
(Hash_data_freer) bucket_free);
}
@@ -86,66 +114,42 @@ tabinit (void)
bucket *
getsym (const char *key)
{
int hashval;
bucket *bp;
int found;
bucket probe;
bucket *entry;
hashval = hash (key);
bp = symtab[hashval];
(const char *) probe.tag = key;
entry = hash_lookup (bucket_table, &probe);
found = 0;
while (bp != NULL && found == 0)
if (!entry)
{
if (strcmp (key, bp->tag) == 0)
found = 1;
else
bp = bp->link;
/* First insertion in the hash. */
entry = bucket_new (key);
hash_insert (bucket_table, entry);
}
if (found == 0)
{
bp = bucket_new (key, hashval);
if (firstsymbol == NULL)
{
firstsymbol = bp;
lastsymbol = bp;
}
else
{
lastsymbol->next = bp;
lastsymbol = bp;
}
symtab[hashval] = bp;
}
return bp;
return entry;
}
/*-------------------.
| Free the buckets. |
`-------------------*/
void
free_symtab (void)
buckets_free (void)
{
int i;
bucket *bp, *bptmp; /* JF don't use ptr after free */
for (i = 0; i < TABSIZE; i++)
{
bp = symtab[i];
while (bp)
{
bptmp = bp->link;
#if 0
/* This causes crashes because one string can appear more
than once. */
if (bp->type_name)
XFREE (bp->type_name);
#endif
XFREE (bp->tag);
XFREE (bp);
bp = bptmp;
}
}
XFREE (symtab);
hash_free (bucket_table);
}
/*---------------------------------------------------------------.
| Look for undefined buckets, report an error, and consider them |
| terminals. |
`---------------------------------------------------------------*/
void
buckets_do (bucket_processor processor, void *processor_data)
{
hash_do_for_each (bucket_table,
(Hash_processor) processor,
processor_data);
}

View File

@@ -46,10 +46,6 @@ typedef enum
typedef struct bucket
{
/* Needed for the hash table. */
struct bucket *link;
struct bucket *next;
/* The key, name of the symbol. */
char *tag;
/* Its type. */
@@ -66,12 +62,13 @@ typedef struct bucket
symbol_class class;
} bucket;
extern bucket *firstsymbol;
/* A function to apply to each symbol. */
typedef bool (*bucket_processor) PARAMS ((bucket *));
bucket *getsym PARAMS ((const char *));
void tabinit PARAMS ((void));
void free_symtab PARAMS ((void));
void buckets_new PARAMS ((void));
void buckets_do PARAMS ((bucket_processor processor, void *processor_data));
void buckets_free PARAMS ((void));
#endif /* !SYMTAB_H_ */