Extract the parsing of user actions from the grammar scanner.

As a consequence, the relation between the grammar scanner and
parser is much simpler.  We can also split "composite tokens" back
into simple tokens.
* src/gram.h (ITEM_NUMBER_MAX, RULE_NUMBER_MAX): New.
* src/scan-gram.l (add_column_width, adjust_location): Move to and
rename as...
* src/location.h, src/location.c (add_column_width)
(location_compute): these.
Fix the column count: the initial column is 0.
(location_print): Be robust to ending column being 0.
* src/location.h (boundary_set): New.
* src/main.c: Adjust to scanner_free being renamed as
gram_scanner_free.
* src/output.c: Include scan-code.h.
* src/parse-gram.y: Include scan-gram.h and scan-code.h.
Use boundary_set.
(PERCENT_DESTRUCTOR, PERCENT_PRINTER, PERCENT_INITIAL_ACTION)
(PERCENT_LEX_PARAM, PERCENT_PARSE_PARAM): Remove the {...} part,
which is now, again, a separate token.
Adjust all dependencies.
Whereever actions with $ and @ are used, use translate_code.
(action): Remove this nonterminal which is now useless.
* src/reader.c: Include assert.h, scan-gram.h and scan-code.h.
(grammar_current_rule_action_append): Use translate_code.
(packgram): Bound check ruleno, itemno, and rule_length.
* src/reader.h (gram_in, gram__flex_debug, scanner_cursor)
(last_string, last_braced_code_loc, max_left_semantic_context)
(scanner_initialize, scanner_free, scanner_last_string_free)
(gram_out, gram_lineno, YY_DECL_): Move to...
* src/scan-gram.h: this new file.
(YY_DECL): Rename as...
(GRAM_DECL): this.
* src/scan-code.h, src/scan-code.l, src/scan-code-c.c: New.
* src/scan-gram.l (gram_get_lineno, gram_get_in, gram_get_out):
(gram_get_leng, gram_get_text, gram_set_lineno, gram_set_in):
(gram_set_out, gram_get_debug, gram_set_debug, gram_lex_destroy):
Move these declarations, and...
(obstack_for_string, STRING_GROW, STRING_FINISH, STRING_FREE):
these to...
* src/flex-scanner.h: this new file.
* src/scan-gram.l (rule_length, rule_length_overflow)
(increment_rule_length): Remove.
(last_braced_code_loc): Rename as...
(gram_last_braced_code_loc): this.
Adjust to the changes of the parser.
Move all the handling of $ and @ into...
* src/scan-code.l: here.
* src/scan-gram.l (handle_dollar, handle_at): Remove.
(handle_action_dollar, handle_action_at): Move to...
* src/scan-code.l: here.
* src/Makefile.am (bison_SOURCES): Add flex-scanner.h,
scan-code.h, scan-code-c.c, scan-gram.h.
(EXTRA_bison_SOURCES): Add scan-code.l.
(BUILT_SOURCES): Add scan-code.c.
(yacc): Be robust to white spaces.
* tests/conflicts.at, tests/input.at, tests/reduce.at,
* tests/regression.at: Adjust the column numbers.
* tests/regression.at: Adjust the error message.
This commit is contained in:
Akim Demaille
2006-06-06 16:40:06 +00:00
parent 184e42f065
commit e9071366c3
21 changed files with 1857 additions and 776 deletions

View File

@@ -32,6 +32,8 @@
#include "quotearg.h"
#include "reader.h"
#include "symlist.h"
#include "scan-gram.h"
#include "scan-code.h"
#include "strverscmp.h"
#define YYLLOC_DEFAULT(Current, Rhs, N) (Current) = lloc_default (Rhs, N)
@@ -84,9 +86,8 @@ static int current_prec = 0;
{
/* Bison's grammar can initial empty locations, hence a default
location is needed. */
@$.start.file = @$.end.file = current_file;
@$.start.line = @$.end.line = 1;
@$.start.column = @$.end.column = 0;
boundary_set (&@$.start, current_file, 1, 0);
boundary_set (&@$.end, current_file, 1, 0);
}
/* Only NUMBERS have a value. */
@@ -109,8 +110,8 @@ static int current_prec = 0;
%token PERCENT_NTERM "%nterm"
%token PERCENT_TYPE "%type"
%token PERCENT_DESTRUCTOR "%destructor {...}"
%token PERCENT_PRINTER "%printer {...}"
%token PERCENT_DESTRUCTOR "%destructor"
%token PERCENT_PRINTER "%printer"
%token PERCENT_UNION "%union {...}"
@@ -137,8 +138,8 @@ static int current_prec = 0;
PERCENT_EXPECT_RR "%expect-rr"
PERCENT_FILE_PREFIX "%file-prefix"
PERCENT_GLR_PARSER "%glr-parser"
PERCENT_INITIAL_ACTION "%initial-action {...}"
PERCENT_LEX_PARAM "%lex-param {...}"
PERCENT_INITIAL_ACTION "%initial-action"
PERCENT_LEX_PARAM "%lex-param"
PERCENT_LOCATIONS "%locations"
PERCENT_NAME_PREFIX "%name-prefix"
PERCENT_NO_DEFAULT_PREC "%no-default-prec"
@@ -146,7 +147,7 @@ static int current_prec = 0;
PERCENT_NONDETERMINISTIC_PARSER
"%nondeterministic-parser"
PERCENT_OUTPUT "%output"
PERCENT_PARSE_PARAM "%parse-param {...}"
PERCENT_PARSE_PARAM "%parse-param"
PERCENT_PURE_PARSER "%pure-parser"
PERCENT_REQUIRE "%require"
PERCENT_SKELETON "%skeleton"
@@ -167,23 +168,14 @@ static int current_prec = 0;
%token EPILOGUE "epilogue"
%token BRACED_CODE "{...}"
%type <chars> STRING string_content
"%destructor {...}"
"%initial-action {...}"
"%lex-param {...}"
"%parse-param {...}"
"%printer {...}"
"{...}"
"%union {...}"
PROLOGUE EPILOGUE
%printer { fprintf (stderr, "\"%s\"", $$); }
STRING string_content
%printer { fprintf (stderr, "{\n%s\n}", $$); }
"%destructor {...}"
"%initial-action {...}"
"%lex-param {...}"
"%parse-param {...}"
"%printer {...}"
"{...}"
"%union {...}"
PROLOGUE EPILOGUE
%type <uniqstr> TYPE
@@ -214,7 +206,8 @@ declarations:
declaration:
grammar_declaration
| PROLOGUE { prologue_augment ($1, @1); }
| PROLOGUE { prologue_augment (translate_code ($1, @1),
@1); }
| "%debug" { debug_flag = true; }
| "%define" string_content
{
@@ -232,17 +225,17 @@ declaration:
nondeterministic_parser = true;
glr_parser = true;
}
| "%initial-action {...}"
| "%initial-action" "{...}"
{
muscle_code_grow ("initial_action", $1, @1);
muscle_code_grow ("initial_action", translate_symbol_action ($2, @2), @2);
}
| "%lex-param {...}" { add_param ("lex_param", $1, @1); }
| "%lex-param" "{...}" { add_param ("lex_param", $2, @2); }
| "%locations" { locations_flag = true; }
| "%name-prefix" "=" string_content { spec_name_prefix = $3; }
| "%no-lines" { no_lines_flag = true; }
| "%nondeterministic-parser" { nondeterministic_parser = true; }
| "%output" "=" string_content { spec_outfile = $3; }
| "%parse-param {...}" { add_param ("parse_param", $1, @1); }
| "%parse-param" "{...}" { add_param ("parse_param", $2, @2); }
| "%pure-parser" { pure_parser = true; }
| "%require" string_content { version_check (&@2, $2); }
| "%skeleton" string_content { skeleton = $2; }
@@ -275,19 +268,21 @@ grammar_declaration:
typed = true;
muscle_code_grow ("stype", body, @1);
}
| "%destructor {...}" symbols.1
| "%destructor" "{...}" symbols.1
{
symbol_list *list;
for (list = $2; list; list = list->next)
symbol_destructor_set (list->sym, $1, @1);
symbol_list_free ($2);
const char *action = translate_symbol_action ($2, @2);
for (list = $3; list; list = list->next)
symbol_destructor_set (list->sym, action, @2);
symbol_list_free ($3);
}
| "%printer {...}" symbols.1
| "%printer" "{...}" symbols.1
{
symbol_list *list;
for (list = $2; list; list = list->next)
symbol_printer_set (list->sym, $1, @1);
symbol_list_free ($2);
const char *action = translate_symbol_action ($2, @2);
for (list = $3; list; list = list->next)
symbol_printer_set (list->sym, action, @2);
symbol_list_free ($3);
}
| "%default-prec"
{
@@ -346,7 +341,6 @@ type.opt:
;
/* One or more nonterminals to be %typed. */
symbols.1:
symbol { $$ = symbol_list_new ($1, @1); }
| symbols.1 symbol { $$ = symbol_list_prepend ($1, $2, @2); }
@@ -426,7 +420,9 @@ rhs:
{ grammar_current_rule_begin (current_lhs, current_lhs_location); }
| rhs symbol
{ grammar_current_rule_symbol_append ($2, @2); }
| rhs action
| rhs "{...}"
{ grammar_current_rule_action_append (gram_last_string,
gram_last_braced_code_loc); }
| rhs "%prec" symbol
{ grammar_current_rule_prec_set ($3, @3); }
| rhs "%dprec" INT
@@ -440,23 +436,6 @@ symbol:
| string_as_id { $$ = $1; }
;
/* Handle the semantics of an action specially, with a mid-rule
action, so that grammar_current_rule_action_append is invoked
immediately after the braced code is read by the scanner.
This implementation relies on the LALR(1) parsing algorithm.
If grammar_current_rule_action_append were executed in a normal
action for this rule, then when the input grammar contains two
successive actions, the scanner would have to read both actions
before reducing this rule. That wouldn't work, since the scanner
relies on all preceding input actions being processed by
grammar_current_rule_action_append before it scans the next
action. */
action:
{ grammar_current_rule_action_append (last_string, last_braced_code_loc); }
BRACED_CODE
;
/* A string used as an ID: quote it. */
string_as_id:
STRING
@@ -477,8 +456,8 @@ epilogue.opt:
/* Nothing. */
| "%%" EPILOGUE
{
muscle_code_grow ("epilogue", $2, @2);
scanner_last_string_free ();
muscle_code_grow ("epilogue", translate_code ($2, @2), @2);
gram_scanner_last_string_free ();
}
;
@@ -563,7 +542,7 @@ add_param (char const *type, char *decl, location loc)
free (name);
}
scanner_last_string_free ();
gram_scanner_last_string_free ();
}
static void