Named symbol references.

Discussed in:
http://lists.gnu.org/archive/html/bison-patches/2009-01/msg00000.html
http://lists.gnu.org/archive/html/bison-patches/2009-02/msg00002.html
http://lists.gnu.org/archive/html/bison-patches/2009-03/msg00009.html

	* src/parse-gram.y: Add new syntax (named_ref.opt).
	* src/reader.c: Store named refs in symbol lists.
	* src/reader.h: New argument for symbol_append and
	action_append functions.
	* src/scan-code.h: Add new field (named_ref) into
	code_props data structure. Keeps named ref of midrule
	actions.
	* src/scan-code.l: Support for named refs in semantic
	action code. New function 'parse_named_ref'.
	* src/scan-gram.l: Support bracketed id.
	* src/symlist.c: Store named refs in symbol lists.
	* src/symlist.h: New field in symbol list: named_ref.
	* src/named-ref.h: New file, a struct for named_ref.
	* src/named-ref.c: New file, named_ref_new function.
	* src/Makefile.am: Add two new files.
	* tests/testsuite.at: Include new test group:
	* tests/named-refs.at: this new file.
This commit is contained in:
Alex Rozenman
2009-05-23 18:48:03 +03:00
parent 67f8cf51c3
commit 7685e2f7ba
14 changed files with 1161 additions and 86 deletions

View File

@@ -48,6 +48,10 @@ YY_DECL;
static void handle_action_dollar (symbol_list *rule, char *cp,
location dollar_loc);
static void handle_action_at (symbol_list *rule, char *cp, location at_loc);
/* A string to be pushed to obstack after dollar/at has been handled */
static char *ref_tail_fields;
static location the_location;
static location *loc = &the_location;
@@ -56,6 +60,7 @@ static char *last_string;
/* True if an untyped $$ or $n was seen. */
static bool untyped_var_seen;
%}
/* C and C++ comments in code. */
%x SC_COMMENT SC_LINE_COMMENT
@@ -75,6 +80,12 @@ tag [^\0\n>]+
white space between the backslash and the newline. */
splice (\\[ \f\t\v]*\n)*
/* C style identifier. Must start with letter. Will be used for
named symbol references. */
letter [-.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
id {letter}({letter}|[0-9])*
ref -?[0-9]+|{id}|"["{id}"]"|"$"
%%
%{
@@ -167,15 +178,22 @@ splice (\\[ \f\t\v]*\n)*
<SC_RULE_ACTION>
{
"$"("<"{tag}">")?(-?[0-9]+|"$") {
"$"("<"{tag}">")?{ref} {
ref_tail_fields = 0;
handle_action_dollar (self->rule, yytext, *loc);
if (ref_tail_fields != NULL) {
obstack_sgrow (&obstack_for_string, ref_tail_fields);
}
need_semicolon = true;
}
"@"(-?[0-9]+|"$") {
"@"{ref} {
ref_tail_fields = 0;
handle_action_at (self->rule, yytext, *loc);
if (ref_tail_fields != NULL) {
obstack_sgrow (&obstack_for_string, ref_tail_fields);
}
need_semicolon = true;
}
"$" {
warn_at (*loc, _("stray `$'"));
obstack_sgrow (&obstack_for_string, "$][");
@@ -267,6 +285,317 @@ splice (\\[ \f\t\v]*\n)*
%%
static inline bool
symbol_list_null(symbol_list *l)
{
if (l && !(l->content_type == SYMLIST_SYMBOL && l->content.sym == NULL))
return false;
else
return true;
}
static inline bool
is_dot_or_dash(char ch)
{
return ch == '.' || ch == '-';
}
static inline bool
is_digit(char ch)
{
return '0' <= ch && ch <= '9';
}
static inline bool
contains_dot_or_dash(const char* str)
{
return strpbrk(str, ".-") != NULL;
}
#define VARIANT_HIDDEN (1 << 0)
#define VARIANT_BAD_BRACKETING (1 << 1)
#define VARIANT_NOT_VISIBLE_FROM_MIDRULE (1 << 2)
typedef struct
{
/* Index in symbol list. */
long int ind;
/* Matched symbol id and loc. */
uniqstr id;
location loc;
/* Hidding named reference. */
named_ref* hidden_by;
/* Error flags. */
unsigned err;
} variant;
static variant *variant_table = 0;
static unsigned variant_table_size = 0;
static unsigned variant_count = 0;
static variant *
variant_table_grow()
{
++variant_count;
if (variant_count > variant_table_size)
{
while (variant_count > variant_table_size)
variant_table_size = 2 * variant_table_size + 3;
variant_table = xnrealloc (variant_table, variant_table_size,
sizeof *variant_table);
}
return &variant_table[variant_count - 1];
}
static char *
find_prefix_end(const char *prefix, char *begin, char *end)
{
char *ptr = begin;
while (*prefix && ptr != end)
{
if (*prefix != *ptr)
return 0;
++prefix, ++ptr;
}
if (*prefix)
return 0;
return ptr;
}
static variant *
variant_add(uniqstr id, location loc, long int ind,
char *cp, char *cp_end, bool exact_mode)
{
char *prefix_end;
prefix_end = find_prefix_end(id, cp, cp_end);
if (prefix_end &&
(prefix_end == cp_end ||
(!exact_mode && is_dot_or_dash(*prefix_end))))
{
variant *r = variant_table_grow();
r->ind = ind;
r->id = id;
r->loc = loc;
r->hidden_by = NULL;
r->err = 0;
return r;
}
else
return NULL;
}
#define INVALID_REF (INT_MIN)
#define LHS_REF (INT_MIN + 1)
static long int
parse_named_ref(char *cp, symbol_list *rule, int rule_length,
int midrule_rhs_index, char *text, location loc,
char dollar_or_at)
{
symbol_list *l;
char *cp_end;
bool exact_mode;
bool has_error;
bool has_valid;
long int ind, i;
variant* variant;
char* p;
if ('$' == *cp)
return LHS_REF;
if (is_digit (*cp) || (*cp == '-' && is_digit (* (cp + 1))))
{
long int num = strtol (cp, &cp, 10);
if (1 - INT_MAX + rule_length <= num && num <= rule_length)
return num;
else
{
complain_at (loc, _("integer out of range: %s"), quote (text));
return INVALID_REF;
}
}
if ('[' == *cp)
{
exact_mode = true;
/* Ignore the brackets. */
++cp;
for (p = cp; *p != ']'; ++p);
cp_end = p;
}
else
{
exact_mode = false;
/* Take all characters of the name. */
for (p = cp; *p; ++p)
if (is_dot_or_dash(*p))
{
ref_tail_fields = p;
break;
}
for (p = cp; *p; ++p);
cp_end = p;
}
/* Add all relevant variants. */
variant_count = 0;
for (ind = 0, l = rule; !symbol_list_null(l); ++ind, l = l->next)
{
if (l->content_type != SYMLIST_SYMBOL)
continue;
variant = variant_add(l->content.sym->tag, l->sym_loc, ind,
cp, cp_end, exact_mode);
if (variant && l->named_ref)
variant->hidden_by = l->named_ref;
if (l->named_ref)
variant_add(l->named_ref->id, l->named_ref->loc, ind,
cp, cp_end, exact_mode);
}
/* Check errors. */
has_error = false;
has_valid = false;
for (i = 0; i < variant_count; ++i)
{
variant = &variant_table[i];
ind = variant->ind;
/* Check visibility from mid-rule actions. */
if (midrule_rhs_index != 0 &&
(ind == 0 || ind > midrule_rhs_index))
{
variant->err |= VARIANT_NOT_VISIBLE_FROM_MIDRULE;
has_error = true;
}
/* Check correct bracketing. */
if (!exact_mode && contains_dot_or_dash(variant->id))
{
variant->err |= VARIANT_BAD_BRACKETING;
has_error = true;
}
/* Check using of hidden symbols. */
if (variant->hidden_by != NULL)
{
variant->err |= VARIANT_HIDDEN;
has_error = true;
}
if (!variant->err)
has_valid = true;
}
if (variant_count == 1 && has_valid)
{
/* The only "good" case is here. */
ind = variant_table[0].ind;
if (ind == midrule_rhs_index)
return LHS_REF;
else
return ind;
}
/* Start complaining. */
if (variant_count == 0)
complain_at (loc, _("reference is invalid: %s, symbol not found"),
quote (text));
else if (variant_count > 1 && !has_error)
complain_at (loc, _("reference is ambiguous: %s"),
quote (text));
else if (variant_count > 1 && has_valid && has_error)
complain_at (loc, _("reference is misleading: %s"),
quote (text));
else
complain_at (loc, _("reference is invalid: %s"),
quote (text));
for (i = 0; i < variant_count; ++i)
{
static char at_buf[20];
variant = &variant_table[i];
if (variant->ind == 0)
strcpy(at_buf, "$$");
else
snprintf(at_buf, sizeof(at_buf), "$%d", variant->ind);
if (variant->err == 0)
complain_at (variant->loc, _(" refers to: %c%s at %s"),
dollar_or_at, variant->id, at_buf);
else
{
static struct obstack msg_buf;
const char *tail = "";
const char *id;
location loc;
if (!exact_mode)
tail = cp + strlen(variant->id);
if (variant->hidden_by)
{
id = variant->hidden_by->id;
loc = variant->hidden_by->loc;
}
else
{
id = variant->id;
loc = variant->loc;
}
/* Create the explanation message. */
obstack_init (&msg_buf);
obstack_fgrow1 (&msg_buf, " possibly meant: %c", dollar_or_at);
if (contains_dot_or_dash (id))
obstack_fgrow1 (&msg_buf, "[%s]", id);
else
obstack_sgrow (&msg_buf, id);
obstack_sgrow (&msg_buf, tail);
if (variant->err & VARIANT_HIDDEN)
{
obstack_fgrow1 (&msg_buf, ", hiding %c", dollar_or_at);
if (contains_dot_or_dash (variant->id))
obstack_fgrow1 (&msg_buf, "[%s]", variant->id);
else
obstack_sgrow (&msg_buf, variant->id);
obstack_sgrow (&msg_buf, tail);
}
obstack_fgrow1 (&msg_buf, " at %s", at_buf);
if (variant->err & VARIANT_NOT_VISIBLE_FROM_MIDRULE)
obstack_fgrow1 (&msg_buf, ", cannot be accessed from "
"mid-rule action at $%d", midrule_rhs_index);
obstack_1grow (&msg_buf, '\0');
complain_at (loc, _("%s"), obstack_finish (&msg_buf));
obstack_free (&msg_buf, 0);
}
}
return INVALID_REF;
}
/* Keeps track of the maximum number of semantic values to the left of
a handle (those referenced by $0, $-1, etc.) are required by the
semantic actions of this grammar. */
@@ -286,8 +615,9 @@ handle_action_dollar (symbol_list *rule, char *text, location dollar_loc)
{
char const *type_name = NULL;
char *cp = text + 1;
char *gt_ptr = 0;
symbol_list *effective_rule;
int effective_rule_length;
int effective_rule_length, n;
if (rule->midrule_parent_rule)
{
@@ -306,15 +636,28 @@ handle_action_dollar (symbol_list *rule, char *text, location dollar_loc)
type_name = ++cp;
while (*cp != '>')
++cp;
*cp = '\0';
/* The '>' symbol will be later replaced by '\0'. Original
'text' is needed for error messages. */
gt_ptr = cp;
++cp;
if (untyped_var_seen)
complain_at (dollar_loc, _("explicit type given in untyped grammar"));
tag_seen = true;
}
if (*cp == '$')
n = parse_named_ref (cp, effective_rule, effective_rule_length,
rule->midrule_parent_rhs_index, text, dollar_loc, '$');
if (gt_ptr)
*gt_ptr = '\0';
switch (n)
{
case INVALID_REF:
break;
case LHS_REF:
if (!type_name)
type_name = symbol_list_n_type_name_get (rule, dollar_loc, 0);
@@ -340,39 +683,31 @@ handle_action_dollar (symbol_list *rule, char *text, location dollar_loc)
obstack_fgrow1 (&obstack_for_string,
"]b4_lhs_value([%s])[", type_name);
rule->action_props.is_value_used = true;
}
else
{
long int num = strtol (cp, NULL, 10);
break;
if (1 - INT_MAX + effective_rule_length <= num
&& num <= effective_rule_length)
default:
if (max_left_semantic_context < 1 - n)
max_left_semantic_context = 1 - n;
if (!type_name && 0 < n)
type_name =
symbol_list_n_type_name_get (effective_rule, dollar_loc, n);
if (!type_name)
{
int n = num;
if (max_left_semantic_context < 1 - n)
max_left_semantic_context = 1 - n;
if (!type_name && 0 < n)
type_name =
symbol_list_n_type_name_get (effective_rule, dollar_loc, n);
if (!type_name)
{
if (union_seen | tag_seen)
complain_at (dollar_loc, _("$%d of `%s' has no declared type"),
n, effective_rule->content.sym->tag);
else
untyped_var_seen = true;
type_name = "";
}
obstack_fgrow3 (&obstack_for_string,
"]b4_rhs_value(%d, %d, [%s])[",
effective_rule_length, n, type_name);
if (n > 0)
symbol_list_n_get (effective_rule, n)->action_props.is_value_used =
true;
if (union_seen | tag_seen)
complain_at (dollar_loc, _("$%s of `%s' has no declared type"),
cp, effective_rule->content.sym->tag);
else
untyped_var_seen = true;
type_name = "";
}
else
complain_at (dollar_loc, _("integer out of range: %s"), quote (text));
obstack_fgrow3 (&obstack_for_string,
"]b4_rhs_value(%d, %d, [%s])[",
effective_rule_length, n, type_name);
if (n > 0)
symbol_list_n_get (effective_rule, n)->action_props.is_value_used =
true;
break;
}
}
@@ -386,28 +721,37 @@ static void
handle_action_at (symbol_list *rule, char *text, location at_loc)
{
char *cp = text + 1;
int effective_rule_length =
(rule->midrule_parent_rule
? rule->midrule_parent_rhs_index - 1
: symbol_list_length (rule->next));
symbol_list *effective_rule;
int effective_rule_length, n;
if (rule->midrule_parent_rule)
{
effective_rule = rule->midrule_parent_rule;
effective_rule_length = rule->midrule_parent_rhs_index - 1;
}
else
{
effective_rule = rule;
effective_rule_length = symbol_list_length (rule->next);
}
locations_flag = true;
if (*cp == '$')
obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
else
n = parse_named_ref (cp, effective_rule, effective_rule_length,
rule->midrule_parent_rhs_index, text, at_loc, '@');
switch (n)
{
long int num = strtol (cp, NULL, 10);
case INVALID_REF:
break;
if (1 - INT_MAX + effective_rule_length <= num
&& num <= effective_rule_length)
{
int n = num;
obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location(%d, %d)[",
effective_rule_length, n);
}
else
complain_at (at_loc, _("integer out of range: %s"), quote (text));
case LHS_REF:
obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
break;
default:
obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location(%d, %d)[",
effective_rule_length, n);
break;
}
}
@@ -459,6 +803,7 @@ code_props_plain_init (code_props *self, char const *code, location code_loc)
self->location = code_loc;
self->is_value_used = false;
self->rule = NULL;
self->named_ref = NULL;
}
void
@@ -470,17 +815,20 @@ code_props_symbol_action_init (code_props *self, char const *code,
self->location = code_loc;
self->is_value_used = false;
self->rule = NULL;
self->named_ref = NULL;
}
void
code_props_rule_action_init (code_props *self, char const *code,
location code_loc, symbol_list *rule)
location code_loc, symbol_list *rule,
named_ref *named_ref)
{
self->kind = CODE_PROPS_RULE_ACTION;
self->code = code;
self->location = code_loc;
self->is_value_used = false;
self->rule = rule;
self->named_ref = named_ref;
}
void