* src/symtab.h (SALIAS, SUNDEF): Rename as...

(USER_NUMBER_ALIAS, USER_NUMBER_UNDEFINED): these.
Adjust dependencies.
* src/output.c (token_definitions_output): Be sure not to output a
`#define 'a'' when fed with `%token 'a' "a"'.
* tests/regression.at (Token definitions): New.
This commit is contained in:
Akim Demaille
2002-05-04 08:48:04 +00:00
parent 8bb936e4f7
commit b87f8b2159
9 changed files with 89 additions and 68 deletions

View File

@@ -1,3 +1,12 @@
2002-05-04 Akim Demaille <akim@epita.fr>
* src/symtab.h (SALIAS, SUNDEF): Rename as...
(USER_NUMBER_ALIAS, USER_NUMBER_UNDEFINED): these.
Adjust dependencies.
* src/output.c (token_definitions_output): Be sure not to output a
`#define 'a'' when fed with `%token 'a' "a"'.
* tests/regression.at (Token definitions): New.
2002-05-03 Paul Eggert <eggert@twinsun.com>
* data/bison.simple (b4_token_defines): Also define YYTOKENTYPE

3
NEWS
View File

@@ -64,6 +64,9 @@ Changes in version 1.49b:
* New tranlations
Croatian, thanks to Denis Lackovic.
* Token definitions
When fed with `%token 'a' "A"', Bison used to output `#define 'a' 65'.
Changes in version 1.35, 2002-03-25:

32
TODO
View File

@@ -277,38 +277,6 @@ Rewrite the reader in Flex/Bison. There will be delicate parts, in
particular, expect the scanner to be hard to write. Many interesting
features cannot be implemented without such a new reader.
* Problems with aliases
From: "Baum, Nathan I" <s0009525@chelt.ac.uk>
Subject: Token Alias Bug
To: "'bug-bison@gnu.org'" <bug-bison@gnu.org>
I've noticed a bug in bison. Sadly, our eternally wise sysadmins won't let
us use CVS, so I can't find out if it's been fixed already...
Basically, I made a program (in flex) that went through a .y file looking
for "..."-tokens, and then outputed a %token
line for it. For single-character ""-tokens, I reasoned, I could just use
[%token 'A' "A"]. However, this causes Bison to output a [#define 'A' 65],
which cppp chokes on, not unreasonably. (And even if cppp didn't choke, I
obviously wouldn't want (char)'A' to be replaced with (int)65 throughout my
code.
Bison normally forgoes outputing a #define for a character token. However,
it always outputs an aliased token -- even if the token is an alias for a
character token. We don't want that. The problem is in /output.c/, as I
recall. When it outputs the token definitions, it checks for a character
token, and then checks for an alias token. If the character token check is
placed after the alias check, then it works correctly.
Alias tokens seem to be something of a kludge. What about an [%alias "..."]
command...
%alias T_IF "IF"
Hmm. I can't help thinking... What about a --generate-lex option that
creates an .l file for the alias tokens used... (Or an option to make a
gperf file, etc...)
* Presentation of the report file
From: "Baum, Nathan I" <s0009525@chelt.ac.uk>
Subject: Token Alias Bug

View File

@@ -368,7 +368,7 @@ lex (void)
{
symval->number = ntokens++;
symval->class = token_sym;
if (symval->user_token_number == SUNDEF)
if (symval->user_token_number == USER_NUMBER_UNDEFINED)
symval->user_token_number = code;
}
return tok_identifier;

View File

@@ -582,21 +582,25 @@ token_definitions_output (FILE *out)
symbol_t *symbol = symbols[i];
int number = symbol->user_token_number;
if (number == SALIAS)
continue;
/* At this stage, if there are literal aliases, they are part of
SYMBOLS, so we should not find symbols which are the aliases
here. */
assert (number != USER_NUMBER_ALIAS);
/* Skip error token. */
if (symbol == errtoken)
continue;
if (symbol->tag[0] == '\'')
continue; /* skip literal character */
if (symbol->tag[0] == '\"')
{
/* use literal string only if given a symbol with an alias */
/* If this string has an alias, then it is necessarily the alias
which is to be output. */
if (symbol->alias)
symbol = symbol->alias;
else
/* Don't output literal chars or strings (when defined only as a
string). Note that must be done after the alias resolution:
think about `%token 'f' "f"'. */
if (symbol->tag[0] == '\'' || symbol->tag[0] == '\"')
continue;
}
/* Don't #define nonliteral tokens whose names contain periods
or '$' (as does the default value of the EOF token). */
@@ -605,6 +609,7 @@ token_definitions_output (FILE *out)
fprintf (out, "%s[[[%s]], [%d]]",
first ? "" : ",\n", symbol->tag, number);
first = 0;
}
}

View File

@@ -126,7 +126,7 @@ symbol_make_alias (symbol_t *symbol, char *typename)
symval->class = token_sym;
symval->type_name = typename;
symval->user_token_number = symbol->user_token_number;
symbol->user_token_number = SALIAS;
symbol->user_token_number = USER_NUMBER_ALIAS;
symval->alias = symbol;
symbol->alias = symval;
/* symbol and symval combined are only one symbol */
@@ -149,7 +149,7 @@ static bool
symbol_check_alias_consistence (symbol_t *this)
{
/* Check only those who _are_ the aliases. */
if (this->alias && this->user_token_number == SALIAS)
if (this->alias && this->user_token_number == USER_NUMBER_ALIAS)
{
if (this->prec != this->alias->prec)
{
@@ -204,8 +204,8 @@ symbol_pack (symbol_t *this)
this->number = this->alias->number;
}
}
/* Do not do processing below for SALIASs. */
if (this->user_token_number == SALIAS)
/* Do not do processing below for USER_NUMBER_ALIASs. */
if (this->user_token_number == USER_NUMBER_ALIAS)
return TRUE;
}
else /* this->class == token_sym */
@@ -229,7 +229,7 @@ symbol_translation (symbol_t *this)
{
/* Non-terminal? */
if (this->class == token_sym
&& this->user_token_number != SALIAS)
&& this->user_token_number != USER_NUMBER_ALIAS)
{
/* A token which translation has already been set? */
if (token_translations[this->user_token_number] != undeftoken->number)
@@ -998,10 +998,10 @@ parse_expect_decl (void)
| |
| Two symbols are entered in the table, one for the token symbol and |
| one for the literal. Both are given the <type>, if any, from the |
| declaration. The ->user_token_number of the first is SALIAS and |
| the ->user_token_number of the second is set to the number, if |
| any, from the declaration. The two symbols are linked via |
| pointers in their ->alias fields. |
| declaration. The ->user_token_number of the first is |
| USER_NUMBER_ALIAS and the ->user_token_number of the second is set |
| to the number, if any, from the declaration. The two symbols are |
| linked via pointers in their ->alias fields. |
| |
| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
| only the literal string is retained it is the literal string that |
@@ -1014,7 +1014,7 @@ parse_thong_decl (void)
token_t token;
symbol_t *symbol;
char *typename = 0;
int usrtoknum = SUNDEF;
int usrtoknum = USER_NUMBER_UNDEFINED;
token = lex (); /* fetch typename or first token */
if (token == tok_typename)
@@ -1035,7 +1035,7 @@ parse_thong_decl (void)
}
symval->class = token_sym;
symval->type_name = typename;
symval->user_token_number = SALIAS;
symval->user_token_number = USER_NUMBER_ALIAS;
symbol = symval;
token = lex (); /* get number or literal string */
@@ -1666,7 +1666,7 @@ token_translations_init (void)
for (i = 0; i < ntokens; ++i)
{
symbol_t *this = symbols[i];
if (this->user_token_number != SUNDEF)
if (this->user_token_number != USER_NUMBER_UNDEFINED)
{
if (this->user_token_number > max_user_token_number)
max_user_token_number = this->user_token_number;
@@ -1676,7 +1676,8 @@ token_translations_init (void)
}
/* If 256 is not used, assign it to error, to follow POSIX. */
if (num_256_available_p && errtoken->user_token_number == SUNDEF)
if (num_256_available_p
&& errtoken->user_token_number == USER_NUMBER_UNDEFINED)
errtoken->user_token_number = 256;
/* Set the missing user numbers. */
@@ -1686,7 +1687,7 @@ token_translations_init (void)
for (i = 0; i < ntokens; ++i)
{
symbol_t *this = symbols[i];
if (this->user_token_number == SUNDEF)
if (this->user_token_number == USER_NUMBER_UNDEFINED)
this->user_token_number = ++max_user_token_number;
if (this->user_token_number > max_user_token_number)
max_user_token_number = this->user_token_number;

View File

@@ -38,7 +38,7 @@ symbol_new (const char *tag)
res->number = NUMBER_UNDEFINED;
res->prec = 0;
res->assoc = right_assoc;
res->user_token_number = SUNDEF;
res->user_token_number = USER_NUMBER_UNDEFINED;
res->alias = NULL;
res->class = unknown_sym;

View File

@@ -41,14 +41,12 @@ typedef enum
nterm_sym /* non-terminal */
} symbol_class;
/* Internal token numbers. */
typedef short token_number_t;
#define SUNDEF -1 /* For undefined user number. */
#define SALIAS -9991 /* for symbol generated with an alias */
#define NUMBER_UNDEFINED ((token_number_t) -1)
typedef struct symbol_s symbol_t;
struct symbol_s
{
/* The key, name of the symbol. */
@@ -61,14 +59,25 @@ struct symbol_s
associativity assoc;
int user_token_number;
/* Points to the other in the identifier-symbol pair for an
alias. Special value SALIAS in the identifier half of the
/* Points to the other in the identifier-symbol pair for an alias.
Special value USER_NUMBER_ALIAS in the identifier half of the
identifier-symbol pair for an alias. */
struct symbol_s *alias;
symbol_t *alias;
symbol_class class;
};
typedef struct symbol_s symbol_t;
/* Undefined user number. */
#define USER_NUMBER_UNDEFINED -1
/* `symbol->user_token_number == USER_NUMBER_ALIAS' means this symbol
*has* (not is) a string literal alias. For instance, `%token foo
"foo"' has `"foo"' numbered regularly, and `foo' numbered as
USER_NUMBER_ALIAS. */
#define USER_NUMBER_ALIAS -9991
/* Undefined internal token number. */
#define NUMBER_UNDEFINED ((token_number_t) -1)
/* A function to apply to each symbol. */
typedef bool (*symbol_processor) PARAMS ((symbol_t *));

View File

@@ -287,6 +287,32 @@ AT_CLEANUP
## ------------------- ##
## Token definitions. ##
## ------------------- ##
AT_SETUP([Token definitions])
# Bison managed, when fed with `%token 'f' "f"' to #define 'f'!
AT_DATA([input.y],
[[%token "end of file"
%token 'a' "a"
%token "b" 'b'
%token "c" c
%token d "d"
%token e 'e'
%token 'f' e
%%
exp: "a";
]])
AT_CHECK([bison input.y -o input.c])
AT_CHECK([$CC $CFLAGS $CPPFLAGS input.c -c])
AT_CLEANUP
## -------------- ##
## Web2c Report. ##
## -------------- ##