Extract the parsing of user actions from the grammar scanner.

As a consequence, the relation between the grammar scanner and
parser is much simpler.  We can also split "composite tokens" back
into simple tokens.
* src/gram.h (ITEM_NUMBER_MAX, RULE_NUMBER_MAX): New.
* src/scan-gram.l (add_column_width, adjust_location): Move to and
rename as...
* src/location.h, src/location.c (add_column_width)
(location_compute): these.
Fix the column count: the initial column is 0.
(location_print): Be robust to ending column being 0.
* src/location.h (boundary_set): New.
* src/main.c: Adjust to scanner_free being renamed as
gram_scanner_free.
* src/output.c: Include scan-code.h.
* src/parse-gram.y: Include scan-gram.h and scan-code.h.
Use boundary_set.
(PERCENT_DESTRUCTOR, PERCENT_PRINTER, PERCENT_INITIAL_ACTION)
(PERCENT_LEX_PARAM, PERCENT_PARSE_PARAM): Remove the {...} part,
which is now, again, a separate token.
Adjust all dependencies.
Whereever actions with $ and @ are used, use translate_code.
(action): Remove this nonterminal which is now useless.
* src/reader.c: Include assert.h, scan-gram.h and scan-code.h.
(grammar_current_rule_action_append): Use translate_code.
(packgram): Bound check ruleno, itemno, and rule_length.
* src/reader.h (gram_in, gram__flex_debug, scanner_cursor)
(last_string, last_braced_code_loc, max_left_semantic_context)
(scanner_initialize, scanner_free, scanner_last_string_free)
(gram_out, gram_lineno, YY_DECL_): Move to...
* src/scan-gram.h: this new file.
(YY_DECL): Rename as...
(GRAM_DECL): this.
* src/scan-code.h, src/scan-code.l, src/scan-code-c.c: New.
* src/scan-gram.l (gram_get_lineno, gram_get_in, gram_get_out):
(gram_get_leng, gram_get_text, gram_set_lineno, gram_set_in):
(gram_set_out, gram_get_debug, gram_set_debug, gram_lex_destroy):
Move these declarations, and...
(obstack_for_string, STRING_GROW, STRING_FINISH, STRING_FREE):
these to...
* src/flex-scanner.h: this new file.
* src/scan-gram.l (rule_length, rule_length_overflow)
(increment_rule_length): Remove.
(last_braced_code_loc): Rename as...
(gram_last_braced_code_loc): this.
Adjust to the changes of the parser.
Move all the handling of $ and @ into...
* src/scan-code.l: here.
* src/scan-gram.l (handle_dollar, handle_at): Remove.
(handle_action_dollar, handle_action_at): Move to...
* src/scan-code.l: here.
* src/Makefile.am (bison_SOURCES): Add flex-scanner.h,
scan-code.h, scan-code-c.c, scan-gram.h.
(EXTRA_bison_SOURCES): Add scan-code.l.
(BUILT_SOURCES): Add scan-code.c.
(yacc): Be robust to white spaces.
* tests/conflicts.at, tests/input.at, tests/reduce.at,
* tests/regression.at: Adjust the column numbers.
* tests/regression.at: Adjust the error message.
This commit is contained in:
Akim Demaille
2006-06-06 16:40:06 +00:00
parent 184e42f065
commit e9071366c3
21 changed files with 1857 additions and 776 deletions

View File

@@ -29,112 +29,48 @@
#undef gram_wrap
#define gram_wrap() 1
#include "system.h"
#include <mbswidth.h>
#include <quote.h>
#define FLEX_PREFIX(Id) gram_ ## Id
#include "flex-scanner.h"
#include "complain.h"
#include "files.h"
#include "getargs.h"
#include "getargs.h" /* yacc_flag */
#include "gram.h"
#include "quotearg.h"
#include "reader.h"
#include "uniqstr.h"
#define YY_USER_INIT \
do \
{ \
scanner_cursor.file = current_file; \
scanner_cursor.line = 1; \
scanner_cursor.column = 1; \
code_start = scanner_cursor; \
} \
while (0)
#include <mbswidth.h>
#include <quote.h>
/* Pacify "gcc -Wmissing-prototypes" when flex 2.5.31 is used. */
int gram_get_lineno (void);
FILE *gram_get_in (void);
FILE *gram_get_out (void);
int gram_get_leng (void);
char *gram_get_text (void);
void gram_set_lineno (int);
void gram_set_in (FILE *);
void gram_set_out (FILE *);
int gram_get_debug (void);
void gram_set_debug (int);
int gram_lex_destroy (void);
#include "scan-gram.h"
#define YY_DECL GRAM_LEX_DECL
#define YY_USER_INIT \
code_start = scanner_cursor = loc->start; \
/* Location of scanner cursor. */
boundary scanner_cursor;
static void adjust_location (location *, char const *, size_t);
#define YY_USER_ACTION adjust_location (loc, yytext, yyleng);
#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
static size_t no_cr_read (FILE *, char *, size_t);
#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
/* OBSTACK_FOR_STRING -- Used to store all the characters that we need to
keep (to construct ID, STRINGS etc.). Use the following macros to
use it.
Use STRING_GROW to append what has just been matched, and
STRING_FINISH to end the string (it puts the ending 0).
STRING_FINISH also stores this string in LAST_STRING, which can be
used, and which is used by STRING_FREE to free the last string. */
static struct obstack obstack_for_string;
/* A string representing the most recently saved token. */
char *last_string;
/* The location of the most recently saved token, if it was a
BRACED_CODE token; otherwise, this has an unspecified value. */
location last_braced_code_loc;
#define STRING_GROW \
obstack_grow (&obstack_for_string, yytext, yyleng)
#define STRING_FINISH \
do { \
obstack_1grow (&obstack_for_string, '\0'); \
last_string = obstack_finish (&obstack_for_string); \
} while (0)
#define STRING_FREE \
obstack_free (&obstack_for_string, last_string)
void
scanner_last_string_free (void)
gram_scanner_last_string_free (void)
{
STRING_FREE;
}
/* Within well-formed rules, RULE_LENGTH is the number of values in
the current rule so far, which says where to find `$0' with respect
to the top of the stack. It is not the same as the rule->length in
the case of mid rule actions.
/* The location of the most recently saved token, if it was a
BRACED_CODE token; otherwise, this has an unspecified value. */
location gram_last_braced_code_loc;
Outside of well-formed rules, RULE_LENGTH has an undefined value. */
static int rule_length;
static void rule_length_overflow (location) __attribute__ ((__noreturn__));
/* Increment the rule length by one, checking for overflow. */
static inline void
increment_rule_length (location loc)
{
rule_length++;
/* Don't allow rule_length == INT_MAX, since that might cause
confusion with strtol if INT_MAX == LONG_MAX. */
if (rule_length == INT_MAX)
rule_length_overflow (loc);
}
static void handle_dollar (int token_type, char *cp, location loc);
static void handle_at (int token_type, char *cp, location loc);
static void handle_syncline (char *, location);
static unsigned long int scan_integer (char const *p, int base, location loc);
static int convert_ucn_to_byte (char const *hex_text);
@@ -142,11 +78,26 @@ static void unexpected_eof (boundary, char const *);
static void unexpected_newline (boundary, char const *);
%}
%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
%x SC_STRING SC_CHARACTER
%x SC_AFTER_IDENTIFIER
/* A C-like comment in directives/rules. */
%x SC_YACC_COMMENT
/* Strings and characters in directives/rules. */
%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
%x SC_PRE_CODE SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
/* A identifier was just read in directives/rules. Special state
to capture the sequence `identifier :'. */
%x SC_AFTER_IDENTIFIER
/* A keyword that should be followed by some code was read (e.g.
%printer). */
%x SC_PRE_CODE
/* Three types of user code:
- prologue (code between `%{' `%}' in the first section, before %%);
- actions, printers, union, etc, (between braced in the middle section);
- epilogue (everything after the second %%). */
%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE
/* C and C++ comments in code. */
%x SC_COMMENT SC_LINE_COMMENT
/* Strings and characters in code. */
%x SC_STRING SC_CHARACTER
letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
id {letter}({letter}|[0-9])*
@@ -221,17 +172,17 @@ splice (\\[ \f\t\v]*\n)*
"%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
"%define" return PERCENT_DEFINE;
"%defines" return PERCENT_DEFINES;
"%destructor" token_type = PERCENT_DESTRUCTOR; BEGIN SC_PRE_CODE;
"%destructor" /* FIXME: Remove once %union handled differently. */ token_type = BRACED_CODE; return PERCENT_DESTRUCTOR;
"%dprec" return PERCENT_DPREC;
"%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
"%expect" return PERCENT_EXPECT;
"%expect"[-_]"rr" return PERCENT_EXPECT_RR;
"%file-prefix" return PERCENT_FILE_PREFIX;
"%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
"%initial-action" token_type = PERCENT_INITIAL_ACTION; BEGIN SC_PRE_CODE;
"%initial-action" /* FIXME: Remove once %union handled differently. */ token_type = BRACED_CODE; return PERCENT_INITIAL_ACTION;
"%glr-parser" return PERCENT_GLR_PARSER;
"%left" return PERCENT_LEFT;
"%lex-param" token_type = PERCENT_LEX_PARAM; BEGIN SC_PRE_CODE;
"%lex-param" /* FIXME: Remove once %union handled differently. */ token_type = BRACED_CODE; return PERCENT_LEX_PARAM;
"%locations" return PERCENT_LOCATIONS;
"%merge" return PERCENT_MERGE;
"%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
@@ -241,9 +192,9 @@ splice (\\[ \f\t\v]*\n)*
"%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
"%nterm" return PERCENT_NTERM;
"%output" return PERCENT_OUTPUT;
"%parse-param" token_type = PERCENT_PARSE_PARAM; BEGIN SC_PRE_CODE;
"%prec" rule_length--; return PERCENT_PREC;
"%printer" token_type = PERCENT_PRINTER; BEGIN SC_PRE_CODE;
"%parse-param" /* FIXME: Remove once %union handled differently. */ token_type = BRACED_CODE; return PERCENT_PARSE_PARAM;
"%prec" return PERCENT_PREC;
"%printer" /* FIXME: Remove once %union handled differently. */ token_type = BRACED_CODE; return PERCENT_PRINTER;
"%pure"[-_]"parser" return PERCENT_PURE_PARSER;
"%require" return PERCENT_REQUIRE;
"%right" return PERCENT_RIGHT;
@@ -262,13 +213,12 @@ splice (\\[ \f\t\v]*\n)*
}
"=" return EQUAL;
"|" rule_length = 0; return PIPE;
"|" return PIPE;
";" return SEMICOLON;
{id} {
val->symbol = symbol_get (yytext, *loc);
id_loc = *loc;
increment_rule_length (*loc);
BEGIN SC_AFTER_IDENTIFIER;
}
@@ -335,7 +285,6 @@ splice (\\[ \f\t\v]*\n)*
<SC_AFTER_IDENTIFIER>
{
":" {
rule_length = 0;
*loc = id_loc;
BEGIN INITIAL;
return ID_COLON;
@@ -401,7 +350,6 @@ splice (\\[ \f\t\v]*\n)*
STRING_FINISH;
loc->start = token_start;
val->chars = last_string;
increment_rule_length (*loc);
BEGIN INITIAL;
return STRING;
}
@@ -428,7 +376,6 @@ splice (\\[ \f\t\v]*\n)*
last_string_1 = last_string[1];
symbol_user_token_number_set (val->symbol, last_string_1, *loc);
STRING_FREE;
increment_rule_length (*loc);
BEGIN INITIAL;
return ID;
}
@@ -501,7 +448,7 @@ splice (\\[ \f\t\v]*\n)*
<SC_CHARACTER,SC_STRING>
{
{splice}|\\{splice}[^\n$@\[\]] STRING_GROW;
{splice}|\\{splice}[^\n\[\]] STRING_GROW;
}
<SC_CHARACTER>
@@ -622,8 +569,7 @@ splice (\\[ \f\t\v]*\n)*
STRING_FINISH;
loc->start = code_start;
val->chars = last_string;
increment_rule_length (*loc);
last_braced_code_loc = *loc;
gram_last_braced_code_loc = *loc;
BEGIN INITIAL;
return token_type;
}
@@ -633,18 +579,6 @@ splice (\\[ \f\t\v]*\n)*
(as `<' `<%'). */
"<"{splice}"<" STRING_GROW;
"$"("<"{tag}">")?(-?[0-9]+|"$") handle_dollar (token_type, yytext, *loc);
"@"(-?[0-9]+|"$") handle_at (token_type, yytext, *loc);
"$" {
warn_at (*loc, _("stray `$'"));
obstack_sgrow (&obstack_for_string, "$][");
}
"@" {
warn_at (*loc, _("stray `@'"));
obstack_sgrow (&obstack_for_string, "@@");
}
<<EOF>> unexpected_eof (code_start, "}"); BEGIN INITIAL;
}
@@ -684,19 +618,6 @@ splice (\\[ \f\t\v]*\n)*
}
/*-----------------------------------------.
| Escape M4 quoting characters in C code. |
`-----------------------------------------*/
<SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
{
\$ obstack_sgrow (&obstack_for_string, "$][");
\@ obstack_sgrow (&obstack_for_string, "@@");
\[ obstack_sgrow (&obstack_for_string, "@{");
\] obstack_sgrow (&obstack_for_string, "@}");
}
/*-----------------------------------------------------.
| By default, grow the string obstack with the input. |
`-----------------------------------------------------*/
@@ -706,79 +627,6 @@ splice (\\[ \f\t\v]*\n)*
%%
/* Keeps track of the maximum number of semantic values to the left of
a handle (those referenced by $0, $-1, etc.) are required by the
semantic actions of this grammar. */
int max_left_semantic_context = 0;
/* If BUF is null, add BUFSIZE (which in this case must be less than
INT_MAX) to COLUMN; otherwise, add mbsnwidth (BUF, BUFSIZE, 0) to
COLUMN. If an overflow occurs, or might occur but is undetectable,
return INT_MAX. Assume COLUMN is nonnegative. */
static inline int
add_column_width (int column, char const *buf, size_t bufsize)
{
size_t width;
unsigned int remaining_columns = INT_MAX - column;
if (buf)
{
if (INT_MAX / 2 <= bufsize)
return INT_MAX;
width = mbsnwidth (buf, bufsize, 0);
}
else
width = bufsize;
return width <= remaining_columns ? column + width : INT_MAX;
}
/* Set *LOC and adjust scanner cursor to account for token TOKEN of
size SIZE. */
static void
adjust_location (location *loc, char const *token, size_t size)
{
int line = scanner_cursor.line;
int column = scanner_cursor.column;
char const *p0 = token;
char const *p = token;
char const *lim = token + size;
loc->start = scanner_cursor;
for (p = token; p < lim; p++)
switch (*p)
{
case '\n':
line += line < INT_MAX;
column = 1;
p0 = p + 1;
break;
case '\t':
column = add_column_width (column, p0, p - p0);
column = add_column_width (column, NULL, 8 - ((column - 1) & 7));
p0 = p + 1;
break;
default:
break;
}
scanner_cursor.line = line;
scanner_cursor.column = column = add_column_width (column, p0, p - p0);
loc->end = scanner_cursor;
if (line == INT_MAX && loc->start.line != INT_MAX)
warn_at (*loc, _("line number overflow"));
if (column == INT_MAX && loc->start.column != INT_MAX)
warn_at (*loc, _("column number overflow"));
}
/* Read bytes from FP into buffer BUF of size SIZE. Return the
number of bytes read. Remove '\r' from input, treating \r\n
and isolated \r as \n. */
@@ -826,173 +674,6 @@ no_cr_read (FILE *fp, char *buf, size_t size)
}
/*------------------------------------------------------------------.
| TEXT is pointing to a wannabee semantic value (i.e., a `$'). |
| |
| Possible inputs: $[<TYPENAME>]($|integer) |
| |
| Output to OBSTACK_FOR_STRING a reference to this semantic value. |
`------------------------------------------------------------------*/
static inline bool
handle_action_dollar (char *text, location loc)
{
const char *type_name = NULL;
char *cp = text + 1;
if (! current_rule)
return false;
/* Get the type name if explicit. */
if (*cp == '<')
{
type_name = ++cp;
while (*cp != '>')
++cp;
*cp = '\0';
++cp;
}
if (*cp == '$')
{
if (!type_name)
type_name = symbol_list_n_type_name_get (current_rule, loc, 0);
if (!type_name && typed)
complain_at (loc, _("$$ of `%s' has no declared type"),
current_rule->sym->tag);
if (!type_name)
type_name = "";
obstack_fgrow1 (&obstack_for_string,
"]b4_lhs_value([%s])[", type_name);
current_rule->used = true;
}
else
{
long int num = strtol (cp, NULL, 10);
if (1 - INT_MAX + rule_length <= num && num <= rule_length)
{
int n = num;
if (max_left_semantic_context < 1 - n)
max_left_semantic_context = 1 - n;
if (!type_name && 0 < n)
type_name = symbol_list_n_type_name_get (current_rule, loc, n);
if (!type_name && typed)
complain_at (loc, _("$%d of `%s' has no declared type"),
n, current_rule->sym->tag);
if (!type_name)
type_name = "";
obstack_fgrow3 (&obstack_for_string,
"]b4_rhs_value(%d, %d, [%s])[",
rule_length, n, type_name);
symbol_list_n_used_set (current_rule, n, true);
}
else
complain_at (loc, _("integer out of range: %s"), quote (text));
}
return true;
}
/*----------------------------------------------------------------.
| Map `$?' onto the proper M4 symbol, depending on its TOKEN_TYPE |
| (are we in an action?). |
`----------------------------------------------------------------*/
static void
handle_dollar (int token_type, char *text, location loc)
{
switch (token_type)
{
case BRACED_CODE:
if (handle_action_dollar (text, loc))
return;
break;
case PERCENT_DESTRUCTOR:
case PERCENT_INITIAL_ACTION:
case PERCENT_PRINTER:
if (text[1] == '$')
{
obstack_sgrow (&obstack_for_string, "]b4_dollar_dollar[");
return;
}
break;
default:
break;
}
complain_at (loc, _("invalid value: %s"), quote (text));
}
/*------------------------------------------------------.
| TEXT is a location token (i.e., a `@...'). Output to |
| OBSTACK_FOR_STRING a reference to this location. |
`------------------------------------------------------*/
static inline bool
handle_action_at (char *text, location loc)
{
char *cp = text + 1;
locations_flag = true;
if (! current_rule)
return false;
if (*cp == '$')
obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
else
{
long int num = strtol (cp, NULL, 10);
if (1 - INT_MAX + rule_length <= num && num <= rule_length)
{
int n = num;
obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location(%d, %d)[",
rule_length, n);
}
else
complain_at (loc, _("integer out of range: %s"), quote (text));
}
return true;
}
/*----------------------------------------------------------------.
| Map `@?' onto the proper M4 symbol, depending on its TOKEN_TYPE |
| (are we in an action?). |
`----------------------------------------------------------------*/
static void
handle_at (int token_type, char *text, location loc)
{
switch (token_type)
{
case BRACED_CODE:
handle_action_at (text, loc);
return;
case PERCENT_INITIAL_ACTION:
case PERCENT_DESTRUCTOR:
case PERCENT_PRINTER:
if (text[1] == '$')
{
obstack_sgrow (&obstack_for_string, "]b4_at_dollar[");
return;
}
break;
default:
break;
}
complain_at (loc, _("invalid value: %s"), quote (text));
}
/*------------------------------------------------------.
| Scan NUMBER for a base-BASE integer at location LOC. |
@@ -1087,20 +768,8 @@ handle_syncline (char *args, location loc)
warn_at (loc, _("line number overflow"));
lineno = INT_MAX;
}
scanner_cursor.file = current_file = uniqstr_new (file);
scanner_cursor.line = lineno;
scanner_cursor.column = 1;
}
/*---------------------------------.
| Report a rule that is too long. |
`---------------------------------*/
static void
rule_length_overflow (location loc)
{
fatal_at (loc, _("rule is too long"));
current_file = uniqstr_new (file);
boundary_set, (&scanner_cursor, current_file, lineno, 1);
}
@@ -1148,7 +817,7 @@ unexpected_newline (boundary start, char const *token_end)
`-------------------------*/
void
scanner_initialize (void)
gram_scanner_initialize (void)
{
obstack_init (&obstack_for_string);
}
@@ -1159,7 +828,7 @@ scanner_initialize (void)
`-----------------------------------------------*/
void
scanner_free (void)
gram_scanner_free (void)
{
obstack_free (&obstack_for_string, 0);
/* Reclaim Flex's buffers. */