Move symbols handling code out of the reader.

* src/reader.h, src/reader.c (errtoken, undeftoken, eoftoken)
(axiom): Move to...
* src/symtab.h, src/symtab.c: here.
* src/gram.c (start_symbol): Remove: use startsymbol->number.
* src/reader.c (startval): Rename as...
* src/symtab.h, src/symtab.c (startsymbol): this.
* src/reader.c: Adjust.
* src/reader.c (symbol_check_defined, symbol_make_alias)
(symbol_check_alias_consistence, symbol_pack, symbol_translation)
(token_translations_init)
Move to...
* src/symtab.c: here.
* src/reader.c (packsymbols): Move to...
* src/symtab.h, src/symtab.c (symbols_pack): here.
* src/symtab.h, src/symtab.c (symbol_make_alias): Takes SYMVAL as
argument.
This commit is contained in:
Akim Demaille
2002-06-10 08:35:39 +00:00
parent e9bca3aded
commit 2f1afb737f
18 changed files with 1071 additions and 1192 deletions

View File

@@ -41,8 +41,6 @@ int nvars = 0;
symbol_number_t *token_translations = NULL;
symbol_number_t start_symbol = 0;
int max_user_token_number = 256;
int pure_parser = 0;

View File

@@ -51,7 +51,6 @@ typedef struct symbol_list
int lineno;
static symbol_list *grammar = NULL;
static int start_flag = 0;
static symbol_t *startval = NULL;
/* Nonzero if components of semantic values are used, implying
they must be unions. */
@@ -63,11 +62,6 @@ static int typed = 0;
/* Incremented for each %left, %right or %nonassoc seen */
static int lastprec = 0;
symbol_t *errtoken = NULL;
symbol_t *undeftoken = NULL;
symbol_t *eoftoken = NULL;
symbol_t *axiom = NULL;
static symbol_list *
symbol_list_new (symbol_t *sym)
{
@@ -81,169 +75,6 @@ symbol_list_new (symbol_t *sym)
return res;
}
/*------------------------.
| Operations on symbols. |
`------------------------*/
/*-----------------------------------------------------------.
| If THIS is not defined, report an error, and consider it a |
| nonterminal. |
`-----------------------------------------------------------*/
static bool
symbol_check_defined (symbol_t *this)
{
if (this->class == unknown_sym)
{
complain
(_("symbol %s is used, but is not defined as a token and has no rules"),
this->tag);
this->class = nterm_sym;
this->number = nvars++;
}
return TRUE;
}
/*-------------------------------------------------------------------.
| Assign a symbol number, and write the definition of the token name |
| into FDEFINES. Put in SYMBOLS. |
`-------------------------------------------------------------------*/
static bool
symbol_make_alias (symbol_t *symbol, char *typename)
{
if (symval->alias)
warn (_("symbol `%s' used more than once as a literal string"),
symval->tag);
else if (symbol->alias)
warn (_("symbol `%s' given more than one literal string"),
symbol->tag);
else
{
symval->class = token_sym;
symval->type_name = typename;
symval->user_token_number = symbol->user_token_number;
symbol->user_token_number = USER_NUMBER_ALIAS;
symval->alias = symbol;
symbol->alias = symval;
/* symbol and symval combined are only one symbol */
nsyms--;
ntokens--;
assert (ntokens == symbol->number || ntokens == symval->number);
symbol->number = symval->number =
(symval->number < symbol->number) ? symval->number : symbol->number;
}
return TRUE;
}
/*---------------------------------------------------------.
| Check that THIS, and its alias, have same precedence and |
| associativity. |
`---------------------------------------------------------*/
static bool
symbol_check_alias_consistence (symbol_t *this)
{
/* Check only those who _are_ the aliases. */
if (this->alias && this->user_token_number == USER_NUMBER_ALIAS)
{
if (this->prec != this->alias->prec)
{
if (this->prec != 0 && this->alias->prec != 0)
complain (_("conflicting precedences for %s and %s"),
this->tag, this->alias->tag);
if (this->prec != 0)
this->alias->prec = this->prec;
else
this->prec = this->alias->prec;
}
if (this->assoc != this->alias->assoc)
{
if (this->assoc != 0 && this->alias->assoc != 0)
complain (_("conflicting assoc values for %s and %s"),
this->tag, this->alias->tag);
if (this->assoc != 0)
this->alias->assoc = this->assoc;
else
this->assoc = this->alias->assoc;
}
}
return TRUE;
}
/*-------------------------------------------------------------------.
| Assign a symbol number, and write the definition of the token name |
| into FDEFINES. Put in SYMBOLS. |
`-------------------------------------------------------------------*/
static bool
symbol_pack (symbol_t *this)
{
if (this->class == nterm_sym)
{
this->number += ntokens;
}
else if (this->alias)
{
/* This symbol and its alias are a single token defn.
Allocate a tokno, and assign to both check agreement of
prec and assoc fields and make both the same */
if (this->number == NUMBER_UNDEFINED)
{
if (this == eoftoken || this->alias == eoftoken)
this->number = this->alias->number = 0;
else
{
assert (this->alias->number != NUMBER_UNDEFINED);
this->number = this->alias->number;
}
}
/* Do not do processing below for USER_NUMBER_ALIASs. */
if (this->user_token_number == USER_NUMBER_ALIAS)
return TRUE;
}
else /* this->class == token_sym */
{
assert (this->number != NUMBER_UNDEFINED);
}
symbols[this->number] = this;
return TRUE;
}
/*--------------------------------------------------.
| Put THIS in TOKEN_TRANSLATIONS if it is a token. |
`--------------------------------------------------*/
static bool
symbol_translation (symbol_t *this)
{
/* Non-terminal? */
if (this->class == token_sym
&& this->user_token_number != USER_NUMBER_ALIAS)
{
/* A token which translation has already been set? */
if (token_translations[this->user_token_number] != undeftoken->number)
complain (_("tokens %s and %s both assigned number %d"),
symbols[token_translations[this->user_token_number]]->tag,
this->tag, this->user_token_number);
token_translations[this->user_token_number] = this->number;
}
return TRUE;
}
/*===================\
| Low level lexing. |
\===================*/
@@ -699,7 +530,7 @@ parse_token_decl (symbol_class what_is, symbol_class what_is_not)
}
else if (token == tok_identifier && *symval->tag == '\"' && symbol)
{
symbol_make_alias (symbol, typename);
symbol_make_alias (symbol, symval, typename);
symbol = NULL;
}
else if (token == tok_identifier)
@@ -762,7 +593,7 @@ parse_start_decl (void)
else
{
start_flag = 1;
startval = symval;
startsymbol = symval;
}
}
@@ -1401,7 +1232,7 @@ readgram (void)
if (!start_flag)
{
startval = lhs;
startsymbol = lhs;
start_flag = 1;
}
@@ -1599,7 +1430,7 @@ readgram (void)
fatal (_("no rules in the input grammar"));
/* Report any undefined symbols and consider them nonterminals. */
symbols_do (symbol_check_defined, NULL);
symbols_check_defined ();
/* Insert the initial rule, which line is that of the first rule
(not that of the start symbol):
@@ -1607,14 +1438,13 @@ readgram (void)
axiom: %start EOF. */
p = symbol_list_new (axiom);
p->line = grammar->line;
p->next = symbol_list_new (startval);
p->next = symbol_list_new (startsymbol);
p->next->next = symbol_list_new (eoftoken);
p->next->next->next = symbol_list_new (NULL);
p->next->next->next->next = grammar;
nrules += 1;
nritems += 3;
grammar = p;
startval = axiom;
if (nsyms > SHRT_MAX)
fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
@@ -1649,85 +1479,6 @@ read_additionnal_code (void)
}
/*------------------------------------------------------------------.
| Set TOKEN_TRANSLATIONS. Check that no two symbols share the same |
| number. |
`------------------------------------------------------------------*/
static void
token_translations_init (void)
{
int num_256_available_p = TRUE;
int i;
/* Find the highest user token number, and whether 256, the POSIX
preferred user token number for the error token, is used. */
max_user_token_number = 0;
for (i = 0; i < ntokens; ++i)
{
symbol_t *this = symbols[i];
if (this->user_token_number != USER_NUMBER_UNDEFINED)
{
if (this->user_token_number > max_user_token_number)
max_user_token_number = this->user_token_number;
if (this->user_token_number == 256)
num_256_available_p = FALSE;
}
}
/* If 256 is not used, assign it to error, to follow POSIX. */
if (num_256_available_p
&& errtoken->user_token_number == USER_NUMBER_UNDEFINED)
errtoken->user_token_number = 256;
/* Set the missing user numbers. */
if (max_user_token_number < 256)
max_user_token_number = 256;
for (i = 0; i < ntokens; ++i)
{
symbol_t *this = symbols[i];
if (this->user_token_number == USER_NUMBER_UNDEFINED)
this->user_token_number = ++max_user_token_number;
if (this->user_token_number > max_user_token_number)
max_user_token_number = this->user_token_number;
}
token_translations = XCALLOC (symbol_number_t, max_user_token_number + 1);
/* Initialize all entries for literal tokens to 2, the internal
token number for $undefined., which represents all invalid
inputs. */
for (i = 0; i < max_user_token_number + 1; i++)
token_translations[i] = undeftoken->number;
symbols_do (symbol_translation, NULL);
}
/*----------------------------------------------------------------.
| Assign symbol numbers, and write definition of token names into |
| FDEFINES. Set up vectors SYMBOL_TABLE, TAGS of symbols. |
`----------------------------------------------------------------*/
static void
packsymbols (void)
{
symbols = XCALLOC (symbol_t *, nsyms);
symbols_do (symbol_check_alias_consistence, NULL);
symbols_do (symbol_pack, NULL);
token_translations_init ();
if (startval->class == unknown_sym)
fatal (_("the start symbol %s is undefined"), startval->tag);
else if (startval->class == token_sym)
fatal (_("the start symbol %s is a token"), startval->tag);
start_symbol = startval->number;
}
/*---------------------------------------------------------------.
| Convert the rules into the representation using RRHS, RLHS and |
| RITEM. |
@@ -1862,7 +1613,7 @@ reader (void)
/* Assign the symbols their symbol numbers. Write #defines for the
token symbols into FDEFINES if requested. */
packsymbols ();
symbols_pack ();
/* Convert the grammar into the format described in gram.h. */
packgram ();

View File

@@ -30,9 +30,4 @@ void reader PARAMS ((void));
extern int lineno;
extern symbol_t *errtoken;
extern symbol_t *undeftoken;
extern symbol_t *eoftoken;
extern symbol_t *axiom;
#endif /* !READER_H_ */

View File

@@ -165,9 +165,9 @@ inaccessable_symbols (void)
Pp = bitset_create (nrules + 1, BITSET_FIXED);
/* If the start symbol isn't useful, then nothing will be useful. */
if (bitset_test (N, start_symbol - ntokens))
if (bitset_test (N, axiom->number - ntokens))
{
bitset_set (V, start_symbol);
bitset_set (V, axiom->number);
while (1)
{
@@ -315,7 +315,7 @@ nonterminals_reduce (void)
if (ISVAR (*rhsp))
*rhsp = symbol_number_as_item_number (nontermmap[*rhsp]);
}
start_symbol = nontermmap[start_symbol];
axiom->number = nontermmap[axiom->number];
}
nsyms -= nuseless_nonterminals;
@@ -435,9 +435,9 @@ reduce_grammar (void)
reduce_print ();
if (!bitset_test (N, start_symbol - ntokens))
if (!bitset_test (N, axiom->number - ntokens))
fatal (_("Start symbol %s does not derive any sentence"),
quotearg_style (escape_quoting_style, symbols[start_symbol]->tag));
quotearg_style (escape_quoting_style, symbols[axiom->number]->tag));
/* First reduce the nonterminals, as they renumber themselves in the
whole grammar. If you change the order, nonterms would be

View File

@@ -21,9 +21,20 @@
#include "system.h"
#include "hash.h"
#include "complain.h"
#include "symtab.h"
#include "gram.h"
/*------------------------.
| Distinguished symbols. |
`------------------------*/
symbol_t *errtoken = NULL;
symbol_t *undeftoken = NULL;
symbol_t *eoftoken = NULL;
symbol_t *axiom = NULL;
symbol_t *startsymbol = NULL;
/*---------------------------------.
| Create a new symbol, named TAG. |
`---------------------------------*/
@@ -65,9 +76,165 @@ symbol_free (symbol_t *this)
}
/*-----------------------------------------------------------.
| If THIS is not defined, report an error, and consider it a |
| nonterminal. |
`-----------------------------------------------------------*/
static bool
symbol_check_defined (symbol_t *this)
{
if (this->class == unknown_sym)
{
complain
(_("symbol %s is used, but is not defined as a token and has no rules"),
this->tag);
this->class = nterm_sym;
this->number = nvars++;
}
return TRUE;
}
/*-------------------------------------------------------------------.
| Declare the new SYMBOL. Make it an alias of SYMVAL, and type them |
| with TYPENAME. |
`-------------------------------------------------------------------*/
void
symbol_make_alias (symbol_t *symbol, symbol_t *symval, char *typename)
{
if (symval->alias)
warn (_("symbol `%s' used more than once as a literal string"),
symval->tag);
else if (symbol->alias)
warn (_("symbol `%s' given more than one literal string"),
symbol->tag);
else
{
symval->class = token_sym;
symval->type_name = typename;
symval->user_token_number = symbol->user_token_number;
symbol->user_token_number = USER_NUMBER_ALIAS;
symval->alias = symbol;
symbol->alias = symval;
/* symbol and symval combined are only one symbol */
nsyms--;
ntokens--;
assert (ntokens == symbol->number || ntokens == symval->number);
symbol->number = symval->number =
(symval->number < symbol->number) ? symval->number : symbol->number;
}
}
/*---------------------------------------------------------.
| Check that THIS, and its alias, have same precedence and |
| associativity. |
`---------------------------------------------------------*/
static bool
symbol_check_alias_consistence (symbol_t *this)
{
/* Check only those who _are_ the aliases. */
if (this->alias && this->user_token_number == USER_NUMBER_ALIAS)
{
if (this->prec != this->alias->prec)
{
if (this->prec != 0 && this->alias->prec != 0)
complain (_("conflicting precedences for %s and %s"),
this->tag, this->alias->tag);
if (this->prec != 0)
this->alias->prec = this->prec;
else
this->prec = this->alias->prec;
}
if (this->assoc != this->alias->assoc)
{
if (this->assoc != 0 && this->alias->assoc != 0)
complain (_("conflicting assoc values for %s and %s"),
this->tag, this->alias->tag);
if (this->assoc != 0)
this->alias->assoc = this->assoc;
else
this->assoc = this->alias->assoc;
}
}
return TRUE;
}
/*-------------------------------------------------------------------.
| Assign a symbol number, and write the definition of the token name |
| into FDEFINES. Put in SYMBOLS. |
`-------------------------------------------------------------------*/
static bool
symbol_pack (symbol_t *this)
{
if (this->class == nterm_sym)
{
this->number += ntokens;
}
else if (this->alias)
{
/* This symbol and its alias are a single token defn.
Allocate a tokno, and assign to both check agreement of
prec and assoc fields and make both the same */
if (this->number == NUMBER_UNDEFINED)
{
if (this == eoftoken || this->alias == eoftoken)
this->number = this->alias->number = 0;
else
{
assert (this->alias->number != NUMBER_UNDEFINED);
this->number = this->alias->number;
}
}
/* Do not do processing below for USER_NUMBER_ALIASs. */
if (this->user_token_number == USER_NUMBER_ALIAS)
return TRUE;
}
else /* this->class == token_sym */
{
assert (this->number != NUMBER_UNDEFINED);
}
symbols[this->number] = this;
return TRUE;
}
/*--------------------------------------------------.
| Put THIS in TOKEN_TRANSLATIONS if it is a token. |
`--------------------------------------------------*/
static bool
symbol_translation (symbol_t *this)
{
/* Non-terminal? */
if (this->class == token_sym
&& this->user_token_number != USER_NUMBER_ALIAS)
{
/* A token which translation has already been set? */
if (token_translations[this->user_token_number] != undeftoken->number)
complain (_("tokens %s and %s both assigned number %d"),
symbols[token_translations[this->user_token_number]]->tag,
this->tag, this->user_token_number);
token_translations[this->user_token_number] = this->number;
}
return TRUE;
}
/*----------------------.
| A symbol_t hash table. |
| A symbol hash table. |
`----------------------*/
/* Initial capacity of symbols hash table. */
@@ -89,7 +256,7 @@ hash_symbol_t (const symbol_t *m, unsigned int tablesize)
/*-------------------------------.
| Create the symbol_t hash table. |
| Create the symbol hash table. |
`-------------------------------*/
void
@@ -150,3 +317,91 @@ symbols_do (symbol_processor processor, void *processor_data)
(Hash_processor) processor,
processor_data);
}
/*--------------------------------------------------------------.
| Check that all the symbols are defined. Report any undefined |
| symbols and consider them nonterminals. |
`--------------------------------------------------------------*/
void
symbols_check_defined (void)
{
symbols_do (symbol_check_defined, NULL);
}
/*------------------------------------------------------------------.
| Set TOKEN_TRANSLATIONS. Check that no two symbols share the same |
| number. |
`------------------------------------------------------------------*/
static void
symbols_token_translations_init (void)
{
int num_256_available_p = TRUE;
int i;
/* Find the highest user token number, and whether 256, the POSIX
preferred user token number for the error token, is used. */
max_user_token_number = 0;
for (i = 0; i < ntokens; ++i)
{
symbol_t *this = symbols[i];
if (this->user_token_number != USER_NUMBER_UNDEFINED)
{
if (this->user_token_number > max_user_token_number)
max_user_token_number = this->user_token_number;
if (this->user_token_number == 256)
num_256_available_p = FALSE;
}
}
/* If 256 is not used, assign it to error, to follow POSIX. */
if (num_256_available_p
&& errtoken->user_token_number == USER_NUMBER_UNDEFINED)
errtoken->user_token_number = 256;
/* Set the missing user numbers. */
if (max_user_token_number < 256)
max_user_token_number = 256;
for (i = 0; i < ntokens; ++i)
{
symbol_t *this = symbols[i];
if (this->user_token_number == USER_NUMBER_UNDEFINED)
this->user_token_number = ++max_user_token_number;
if (this->user_token_number > max_user_token_number)
max_user_token_number = this->user_token_number;
}
token_translations = XCALLOC (symbol_number_t, max_user_token_number + 1);
/* Initialize all entries for literal tokens to 2, the internal
token number for $undefined., which represents all invalid
inputs. */
for (i = 0; i < max_user_token_number + 1; i++)
token_translations[i] = undeftoken->number;
symbols_do (symbol_translation, NULL);
}
/*----------------------------------------------------------------.
| Assign symbol numbers, and write definition of token names into |
| FDEFINES. Set up vectors SYMBOL_TABLE, TAGS of symbols. |
`----------------------------------------------------------------*/
void
symbols_pack (void)
{
symbols = XCALLOC (symbol_t *, nsyms);
symbols_do (symbol_check_alias_consistence, NULL);
symbols_do (symbol_pack, NULL);
symbols_token_translations_init ();
if (startsymbol->class == unknown_sym)
fatal (_("the start symbol %s is undefined"), startsymbol->tag);
else if (startsymbol->class == token_sym)
fatal (_("the start symbol %s is a token"), startsymbol->tag);
}

View File

@@ -22,7 +22,9 @@
#ifndef SYMTAB_H_
# define SYMTAB_H_
#define TABSIZE 1009
/*----------.
| Symbols. |
`----------*/
/* Associativity values for tokens and rules. */
typedef enum
@@ -79,13 +81,48 @@ struct symbol_s
#define NUMBER_UNDEFINED ((symbol_number_t) -1)
/* Fetch (or create) the symbol associated to KEY. */
symbol_t *getsym PARAMS ((const char *key));
/* Declare the new SYMBOL. Make it an alias of SYMVAL, and type */
/* them with TYPENAME. */
void symbol_make_alias PARAMS ((symbol_t *symbol, symbol_t *symval,
char *typename));
/* Distinguished symbols. AXIOM is the real start symbol, that used
by the automaton. STARTSYMBOL is the one specified by the user.
*/
extern symbol_t *errtoken;
extern symbol_t *undeftoken;
extern symbol_t *eoftoken;
extern symbol_t *axiom;
extern symbol_t *startsymbol;
/*---------------.
| Symbol table. |
`---------------*/
/* Create the symbol table. */
void symbols_new PARAMS ((void));
/* A function to apply to each symbol. */
typedef bool (*symbol_processor) PARAMS ((symbol_t *));
symbol_t *getsym PARAMS ((const char *));
void symbols_new PARAMS ((void));
/* Apply PROCESSOR to all the symbols. PROCESSOR must return TRUE: on
FALSE, the processing stops. */
void symbols_do PARAMS ((symbol_processor processor, void *processor_data));
/* Free all the memory allocated for symbols. */
void symbols_free PARAMS ((void));
/* Check that all the symbols are defined. Report any undefined */
/* symbols and consider them nonterminals. */
void symbols_check_defined PARAMS ((void));
/* Perform various sanity checks, assign symbol numbers, and set up
TOKEN_TRANSLATIONS. */
void symbols_pack PARAMS ((void));
#endif /* !SYMTAB_H_ */