diff --git a/ChangeLog b/ChangeLog index 8fa43858..3a683551 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,28 @@ +2006-08-18 Joel E. Denny + + Don't allow an undeclared string literal, but allow a string literal to + be used before its declaration. + * src/reader.c (check_and_convert_grammar): Don't invoke packgram if + symbols_pack complained. + * src/symtab.c (symbol_new): Don't count a string literal as a new + symbol. + (symbol_class_set): Don't count a string literal as a new token, and + don't assign it a symbol number since symbol_make_alias does that. + (symbol_make_alias): It's not necessary to decrement the symbol and + token counts anymore. Don't assume that an alias declaration occurs + before any uses of the identifier or string, and thus don't assert that + one of them has the highest symbol number so far. + (symbol_check_alias_consistency): Complain if there's a string literal + that wasn't declared as an alias. + (symbols_pack): Bail if symbol_check_alias_consistency failed since + symbol_pack asserts that every token has been assigned a symbol number + although undeclared string literals have not. + * tests/regression.at (String alias declared after use, Undeclared + string literal): New test case. + (Characters Escapes, Web2c Actions): Declare string literals as + aliases. + * tests/sets.at (Firsts): Likewise. + 2006-08-14 Joel E. Denny In the grammar scanner, STRING_FINISH unclosed constructs and return diff --git a/src/reader.c b/src/reader.c index 5084618f..e31fe4c5 100644 --- a/src/reader.c +++ b/src/reader.c @@ -630,7 +630,8 @@ check_and_convert_grammar (void) symbols_pack (); /* Convert the grammar into the format described in gram.h. */ - packgram (); + if (!complaint_issued) + packgram (); /* The grammar as a symbol_list is no longer needed. */ LIST_FREE (symbol_list, grammar); diff --git a/src/symtab.c b/src/symtab.c index 8b3e30d3..28f49fb4 100644 --- a/src/symtab.c +++ b/src/symtab.c @@ -79,7 +79,8 @@ symbol_new (uniqstr tag, location loc) if (nsyms == SYMBOL_NUMBER_MAXIMUM) fatal (_("too many symbols in input grammar (limit is %d)"), SYMBOL_NUMBER_MAXIMUM); - nsyms++; + if (tag[0] != '"') + nsyms++; return res; } @@ -266,7 +267,8 @@ symbol_class_set (symbol *sym, symbol_class class, location loc, bool declaring) if (class == nterm_sym && sym->class != nterm_sym) sym->number = nvars++; - else if (class == token_sym && sym->number == NUMBER_UNDEFINED) + else if (class == token_sym && sym->number == NUMBER_UNDEFINED + && sym->tag[0] != '"') sym->number = ntokens++; sym->class = class; @@ -361,12 +363,7 @@ symbol_make_alias (symbol *sym, symbol *symval, location loc) sym->user_token_number = USER_NUMBER_ALIAS; symval->alias = sym; sym->alias = symval; - /* sym and symval combined are only one symbol. */ - nsyms--; - ntokens--; - assert (ntokens == sym->number || ntokens == symval->number); - sym->number = symval->number = - (symval->number < sym->number) ? symval->number : sym->number; + symval->number = sym->number; symbol_type_set (symval, sym->type_name, loc); } } @@ -383,6 +380,9 @@ symbol_check_alias_consistency (symbol *this) symbol *alias = this; symbol *orig = this->alias; + if (this->tag[0] == '"' && !this->alias) + complain_at (this->location, _("%s undeclared"), this->tag); + /* Check only those that _are_ the aliases. */ if (!(this->alias && this->user_token_number == USER_NUMBER_ALIAS)) return; @@ -723,6 +723,8 @@ symbols_pack (void) symbols = xcalloc (nsyms, sizeof *symbols); symbols_do (symbol_check_alias_consistency_processor, NULL); + if (complaint_issued) + return; symbols_do (symbol_pack_processor, NULL); symbols_token_translations_init (); diff --git a/tests/regression.at b/tests/regression.at index 923a8933..1c5ccbbf 100644 --- a/tests/regression.at +++ b/tests/regression.at @@ -489,7 +489,9 @@ AT_DATA_GRAMMAR([input.y], void yyerror (const char *s); int yylex (void); %} -[%% +[%token QUOTES "\"" +%token TICK "'" +%% exp: '\'' "\'" | '\"' "\"" @@ -700,6 +702,10 @@ statement: struct_stat; struct_stat: /* empty. */ | if else; if: "if" "const" "then" statement; else: "else" statement; +%token IF "if"; +%token CONST "const"; +%token THEN "then"; +%token ELSE "else"; %% ]]) @@ -1108,3 +1114,48 @@ Stack now 0 ]]) AT_CLEANUP + + + +## --------------------------------- ## +## String alias declared after use. ## +## --------------------------------- ## + +AT_SETUP([String alias declared after use]) + +# Bison once incorrectly asserted that the symbol number for either a token or +# its alias was the highest symbol number so far at the point of the alias +# declaration. That was true unless the declaration appeared after their first +# uses. + +AT_DATA([input.y], +[[%% +start: 'a' "A" 'b'; +%token 'a' "A"; +]]) + +AT_CHECK([bison -t -o input.c input.y]) + +AT_CLEANUP + + + +## --------------------------- ## +## Undeclared string literal. ## +## --------------------------- ## + +AT_SETUP([Undeclared string literal]) + +# Bison once allowed a string literal to be used in the grammar without any +# declaration assigning it as an alias of another token. + +AT_DATA([input.y], +[[%% +start: "abc"; +]]) + +AT_CHECK([bison -t -o input.c input.y], [1], [], +[[input.y:2.8-12: "abc" undeclared +]]) + +AT_CLEANUP diff --git a/tests/sets.at b/tests/sets.at index a731315b..941d0cc2 100644 --- a/tests/sets.at +++ b/tests/sets.at @@ -196,6 +196,7 @@ AT_DATA([input.y], [[%nonassoc '<' '>' %left '+' '-' %right '^' '=' +%token EXP "exp" %% exp: exp '<' exp