mirror of
https://git.savannah.gnu.org/git/bison.git
synced 2026-03-09 12:23:04 +00:00
This patch contains more fixes to prefer signed to unsigned integer types, as modern tools like 'gcc -fsanitize=undefined' can check for signed integer overflow but not unsigned overflow. * NEWS: Document the API change. * boostrap.conf (gnulib_modules): Add intprops. * data/skeletons/glr.c: Include stddef.h and stdint.h, since this skeleton can assume C99 or later. (YYSIZEMAX): Now signed, and the minimum of SIZE_MAX and PTRDIFF_MAX. (yybool) [!__cplusplus]: Now signed (which is how bool behaves). (YYTRANSLATE): Avoid use of unsigned, and make the macro safe even for values greater than UINT_MAX. (yytnamerr, struct yyGLRState, struct yyGLRStateSet, struct yyGLRStack) (yyaddDeferredAction, yyinitStateSet, yyinitGLRStack) (yyexpandGLRStack, yymarkStackDeleted, yyremoveDeletes) (yyglrShift, yyglrShiftDefer, yy_reduce_print, yydoAction) (yyglrReduce, yysplitStack, yyreportTree, yycompressStack) (yyprocessOneStack, yyreportSyntaxError, yyrecoverSyntaxError) (yyparse, yy_yypstack, yypstack, yypdumpstack): * tests/input.at (Torturing the Scanner): Prefer ptrdiff_t to size_t. * data/skeletons/c++.m4 (b4_yytranslate_define): * src/AnnotationList.c (AnnotationList__computePredecessorAnnotations): * src/AnnotationList.h (AnnotationIndex): * src/InadequacyList.h (InadequacyListNodeCount): * src/closure.c (closure_new): * src/complain.c (error_message, complains, complain_indent) (complain_args, duplicate_directive, duplicate_rule_directive): * src/gram.c (nritems, ritem_print, grammar_dump): * src/ielr.c (ielr_compute_ritem_sees_lookahead_set) (ielr_item_has_lookahead, ielr_compute_annotation_lists) (ielr_compute_lookaheads): * src/location.c (columns, boundary_print, location_print): * src/muscle-tab.c (muscle_percent_define_insert) (muscle_percent_define_check_values): * src/output.c (prepare_rules, prepare_actions): * src/parse-gram.y (id, handle_require): * src/reader.c (record_merge_function_type, packgram): * src/reduce.c (nuseless_productions, nuseless_nonterminals) (inaccessable_symbols): * src/relation.c (relation_print): * src/scan-code.l (variant, variant_table_size, variant_count) (variant_add, get_at_spec, show_sub_message, show_sub_messages) (parse_ref): * src/scan-gram.l (<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>) (scan_integer, convert_ucn_to_byte, handle_syncline): * src/scan-skel.l (at_complain): * src/symtab.c (complain_symbol_redeclared) (complain_semantic_type_redeclared, complain_class_redeclared) (symbol_class_set, complain_user_token_number_redeclared): * src/tables.c (conflict_tos, conflrow, conflict_table) (conflict_list, save_row, pack_vector): * tests/local.at (AT_YYLEX_DEFINE(c)): Prefer signed to unsigned integer. * data/skeletons/lalr1.cc (yy_lac_check_): * tests/actions.at (_AT_CHECK_PRINTER_AND_DESTRUCTOR): * tests/local.at (AT_YYLEX_DEFINE(c)): Omit now-unnecessary casts. * data/skeletons/location.cc (b4_location_define): * doc/bison.texi (Mfcalc Lexer, C++ position, C++ location): Prefer int to unsigned for line and column numbers. Change example to abort explicitly on memory exhaustion, and fix an off-by-one bug that led to undefined behavior. * data/skeletons/stack.hh (stack::operator[]): Also allow ptrdiff_t indexes. (stack::pop, slice::slice, slice::operator[]): Index arg is now ptrdiff_t, not int. (stack::ssize): New method. (slice::range_): Now ptrdiff_t, not int. * data/skeletons/yacc.c (b4_state_num_type): Remove. All uses replaced by b4_int_type. (YY_CONVERT_INT_BEGIN, YY_CONVERT_INT_END): New macros. (yylac, yyparse): Use them around conversions that -Wconversion would give false alarms about. Omit unnecessary casts. (yy_stack_print): Use int rather than unsigned, and omit a cast that doesn’t seem to be needed here any more. * examples/c++/variant.yy (yylex): * examples/c++/variant-11.yy (yylex): Omit no-longer-needed conversions to unsigned. * src/InadequacyList.c (InadequacyList__new_conflict): Don’t assume *node_count is unsigned. * src/output.c (muscle_insert_unsigned_table): Remove; no longer used.
302 lines
9.3 KiB
C
302 lines
9.3 KiB
C
/* Data definitions for internal representation of Bison's input.
|
|
|
|
Copyright (C) 1984, 1986, 1989, 1992, 2001-2007, 2009-2015, 2018-2019
|
|
Free Software Foundation, Inc.
|
|
|
|
This file is part of Bison, the GNU Compiler Compiler.
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
|
|
|
#ifndef GRAM_H_
|
|
# define GRAM_H_
|
|
|
|
/* Representation of the grammar rules:
|
|
|
|
NTOKENS is the number of tokens, and NVARS is the number of
|
|
variables (nonterminals). NSYMS is the total number, ntokens +
|
|
nvars.
|
|
|
|
Each symbol (either token or variable) receives a symbol number.
|
|
Numbers 0 to NTOKENS - 1 are for tokens, and NTOKENS to NSYMS - 1
|
|
are for variables. Symbol number zero is the end-of-input token.
|
|
This token is counted in ntokens. The true number of token values
|
|
assigned is NTOKENS reduced by one for each alias declaration.
|
|
|
|
The rules receive rule numbers 1 to NRULES in the order they are
|
|
written. More precisely Bison augments the grammar with the
|
|
initial rule, '$accept: START-SYMBOL $end', which is numbered 1,
|
|
all the user rules are 2, 3 etc. Each time a rule number is
|
|
presented to the user, we subtract 1, so *displayed* rule numbers
|
|
are 0, 1, 2...
|
|
|
|
Internally, we cannot use the number 0 for a rule because for
|
|
instance RITEM stores both symbol (the RHS) and rule numbers: the
|
|
symbols are shorts >= 0, and rule number are stored negative.
|
|
Therefore 0 cannot be used, since it would be both the rule number
|
|
0, and the token $end).
|
|
|
|
Actions are accessed via the rule number.
|
|
|
|
The rules themselves are described by several arrays: amongst which
|
|
RITEM, and RULES.
|
|
|
|
RULES is an array of rules, whose members are:
|
|
|
|
RULES[R].lhs -- the symbol of the left hand side of rule R.
|
|
|
|
RULES[R].rhs -- the index in RITEM of the beginning of the portion
|
|
for rule R.
|
|
|
|
RULES[R].prec -- the symbol providing the precedence level of R.
|
|
|
|
RULES[R].precsym -- the symbol attached (via %prec) to give its
|
|
precedence to R. Of course, if set, it is equal to 'prec', but we
|
|
need to distinguish one from the other when reducing: a symbol used
|
|
in a %prec is not useless.
|
|
|
|
RULES[R].assoc -- the associativity of R.
|
|
|
|
RULES[R].dprec -- the dynamic precedence level of R (for GLR
|
|
parsing).
|
|
|
|
RULES[R].merger -- index of merging function for R (for GLR
|
|
parsing).
|
|
|
|
RULES[R].line -- the line where R was defined.
|
|
|
|
RULES[R].useful -- whether the rule is used (i.e., false if thrown
|
|
away by reduce).
|
|
|
|
The right hand side is stored as symbol numbers in a portion of
|
|
RITEM.
|
|
|
|
The length of the portion is one greater than the number of symbols
|
|
in the rule's right hand side. The last element in the portion
|
|
contains minus R, which identifies it as the end of a portion and
|
|
says which rule it is for.
|
|
|
|
The portions of RITEM come in order of increasing rule number.
|
|
NRITEMS is the total length of RITEM. Each element of RITEM is
|
|
called an "item" and its index in RITEM is an item number.
|
|
|
|
Item numbers are used in the finite state machine to represent
|
|
places that parsing can get to.
|
|
|
|
SYMBOLS[I]->prec records the precedence level of each symbol.
|
|
|
|
Precedence levels are assigned in increasing order starting with 1
|
|
so that numerically higher precedence values mean tighter binding
|
|
as they ought to. Zero as a symbol or rule's precedence means none
|
|
is assigned.
|
|
|
|
Associativities are recorded similarly in SYMBOLS[I]->assoc. */
|
|
|
|
# include "location.h"
|
|
# include "symtab.h"
|
|
|
|
# define ISTOKEN(i) ((i) < ntokens)
|
|
# define ISVAR(i) ((i) >= ntokens)
|
|
|
|
extern int nsyms;
|
|
extern int ntokens;
|
|
extern int nvars;
|
|
|
|
typedef int item_number;
|
|
# define ITEM_NUMBER_MAX INT_MAX
|
|
extern item_number *ritem;
|
|
extern int nritems;
|
|
|
|
/* There is weird relationship between OT1H item_number and OTOH
|
|
symbol_number and rule_number: we store the latter in
|
|
item_number. symbol_number values are stored as-is, while
|
|
the negation of (rule_number + 1) is stored.
|
|
|
|
Therefore, a symbol_number must be a valid item_number, and we
|
|
sometimes have to perform the converse transformation. */
|
|
|
|
static inline item_number
|
|
symbol_number_as_item_number (symbol_number sym)
|
|
{
|
|
return sym;
|
|
}
|
|
|
|
static inline symbol_number
|
|
item_number_as_symbol_number (item_number i)
|
|
{
|
|
return i;
|
|
}
|
|
|
|
static inline bool
|
|
item_number_is_symbol_number (item_number i)
|
|
{
|
|
return i >= 0;
|
|
}
|
|
|
|
/* Rule numbers. */
|
|
typedef int rule_number;
|
|
# define RULE_NUMBER_MAX INT_MAX
|
|
|
|
static inline item_number
|
|
rule_number_as_item_number (rule_number r)
|
|
{
|
|
return -1 - r;
|
|
}
|
|
|
|
static inline rule_number
|
|
item_number_as_rule_number (item_number i)
|
|
{
|
|
return -1 - i;
|
|
}
|
|
|
|
static inline bool
|
|
item_number_is_rule_number (item_number i)
|
|
{
|
|
return i < 0;
|
|
}
|
|
|
|
|
|
/*--------.
|
|
| Rules. |
|
|
`--------*/
|
|
|
|
typedef struct
|
|
{
|
|
/* The number of the rule in the source. It is usually the index in
|
|
RULES too, except if there are useless rules. */
|
|
rule_number user_number;
|
|
|
|
/* The index in RULES. Usually the rule number in the source,
|
|
except if some rules are useless. */
|
|
rule_number number;
|
|
|
|
sym_content *lhs;
|
|
item_number *rhs;
|
|
|
|
/* This symbol provides both the associativity, and the precedence. */
|
|
sym_content *prec;
|
|
|
|
int dprec;
|
|
int merger;
|
|
|
|
/* This symbol was attached to the rule via %prec. */
|
|
sym_content *precsym;
|
|
|
|
/* Location of the rhs. */
|
|
location location;
|
|
bool useful;
|
|
bool is_predicate;
|
|
|
|
/* Counts of the numbers of expected conflicts for this rule, or -1 if none
|
|
given. */
|
|
int expected_sr_conflicts;
|
|
int expected_rr_conflicts;
|
|
|
|
const char *action;
|
|
location action_loc;
|
|
} rule;
|
|
|
|
/* The used rules (size NRULES). */
|
|
extern rule *rules;
|
|
extern rule_number nrules;
|
|
|
|
/* Get the rule associated to this item. ITEM points inside RITEM. */
|
|
rule const *item_rule (item_number const *item);
|
|
|
|
/* Pretty-print this ITEM (as in the report). ITEM points inside
|
|
RITEM. PREVIOUS_RULE is used to see if the lhs is common, in which
|
|
case LHS is factored. Passing NULL is fine. */
|
|
void item_print (item_number *item, rule const *previous_rule,
|
|
FILE *out);
|
|
|
|
/* A function that selects a rule. */
|
|
typedef bool (*rule_filter) (rule const *);
|
|
|
|
/* Whether the rule has a 'number' smaller than NRULES. That is, it
|
|
is useful in the grammar. */
|
|
bool rule_useful_in_grammar_p (rule const *r);
|
|
|
|
/* Whether the rule has a 'number' higher than NRULES. That is, it is
|
|
useless in the grammar. */
|
|
bool rule_useless_in_grammar_p (rule const *r);
|
|
|
|
/* Whether the rule is not flagged as useful but is useful in the
|
|
grammar. In other words, it was discarded because of conflicts. */
|
|
bool rule_useless_in_parser_p (rule const *r);
|
|
|
|
/* Whether the rule has a single RHS, and no user action. */
|
|
bool rule_useless_chain_p (rule const *r);
|
|
|
|
/* Print this rule's number and lhs on OUT. If a PREVIOUS_LHS was
|
|
already displayed (by a previous call for another rule), avoid
|
|
useless repetitions. */
|
|
void rule_lhs_print (rule const *r, sym_content const *previous_lhs,
|
|
FILE *out);
|
|
void rule_lhs_print_xml (rule const *r, FILE *out, int level);
|
|
|
|
/* The length of the RHS. */
|
|
size_t rule_rhs_length (rule const *r);
|
|
|
|
/* Print this rule's RHS on OUT. */
|
|
void rule_rhs_print (rule const *r, FILE *out);
|
|
|
|
/* Print this rule on OUT. If a PREVIOUS_RULE was already displayed,
|
|
avoid useless repetitions of their LHS. */
|
|
void rule_print (rule const *r, rule const *prev_rule, FILE *out);
|
|
|
|
|
|
|
|
/* Table of the symbols, indexed by the symbol number. */
|
|
extern symbol **symbols;
|
|
|
|
/* TOKEN_TRANSLATION -- a table indexed by a token number as returned
|
|
by the user's yylex routine, it yields the internal token number
|
|
used by the parser and throughout bison. */
|
|
extern symbol_number *token_translations;
|
|
extern int max_user_token_number;
|
|
|
|
|
|
|
|
/* Dump RITEM for traces. */
|
|
void ritem_print (FILE *out);
|
|
|
|
/* The size of the longest rule RHS. */
|
|
size_t ritem_longest_rhs (void);
|
|
|
|
/* Print the grammar's rules that match FILTER on OUT under TITLE. */
|
|
void grammar_rules_partial_print (FILE *out, const char *title,
|
|
rule_filter filter);
|
|
|
|
/* Print the grammar's useful rules on OUT. */
|
|
void grammar_rules_print (FILE *out);
|
|
/* Print all of the grammar's rules with a "usefulness" attribute. */
|
|
void grammar_rules_print_xml (FILE *out, int level);
|
|
|
|
/* Dump the grammar. */
|
|
void grammar_dump (FILE *out, const char *title);
|
|
|
|
/* Report on STDERR the rules that are not flagged USEFUL, using the
|
|
MESSAGE (which can be 'rule useless in grammar' when invoked after grammar
|
|
reduction, or 'rule useless in parser due to conflicts' after conflicts
|
|
were taken into account). */
|
|
void grammar_rules_useless_report (const char *message);
|
|
|
|
/* Free the packed grammar. */
|
|
void grammar_free (void);
|
|
|
|
/* The version %required by the grammar file, as an int (100 * major +
|
|
minor). 0 if unspecified. */
|
|
extern int required_version;
|
|
|
|
#endif /* !GRAM_H_ */
|