Support parametric types.

There are two issues to handle: first scanning nested angle bracket pairs to support types such as std::pair< std::string, std::list<std::string> > >. Another issue is to address idiosyncracies of C++: do not glue two closing angle brackets together (otherwise it's operator>>), and avoid sticking blindly a TYPE to the opening <, as it can result in '<:' which is a digraph for '['. * src/scan-gram.l (brace_level): Rename as... (nesting): this. (SC_TAG): New. Implement support for complex tags. (tag): Accept \n, but not <. * data/lalr1.cc (b4_symbol_value, b4_symbol_value_template) (b4_symbol_variant): Leave space around types as parameters. * examples/variant.yy: Use nested template types and leading ::. * src/parse-gram.y (TYPE, TYPE_TAG_ANY, TYPE_TAG_NONE, type.opt): Rename as... (TAG, TAG_ANY, TAG_NONE, tag.opt): these. * tests/c++.at: Test parametric types.
2026-04-25 19:19:37 +00:00 · 2008-10-23 20:01:48 -05:00
parent 7b6e67533e
commit cb823b6f0c
6 changed files with 128 additions and 48 deletions
@@ -167,9 +167,9 @@ static int current_prec = 0;
 %token PIPE            "|"
 %token PROLOGUE        "%{...%}"
 %token SEMICOLON       ";"
-%token TYPE            "type"
-%token TYPE_TAG_ANY    "<*>"
-%token TYPE_TAG_NONE   "<>"
+%token TAG             "<tag>"
+%token TAG_ANY         "<*>"
+%token TAG_NONE        "<>"

 %type <character> CHAR
 %printer { fputs (char_name ($$), stderr); } CHAR
@@ -183,8 +183,8 @@ static int current_prec = 0;
 %printer { fprintf (stderr, "{\n%s\n}", $$); }
 	 braceless content.opt "{...}" "%{...%}" EPILOGUE

-%type <uniqstr> TYPE ID ID_COLON variable
-%printer { fprintf (stderr, "<%s>", $$); } TYPE
+%type <uniqstr> TAG ID ID_COLON variable
+%printer { fprintf (stderr, "<%s>", $$); } TAG
 %printer { fputs ($$, stderr); } ID variable
 %printer { fprintf (stderr, "%s:", $$); } ID_COLON

@@ -387,7 +387,7 @@ symbol_declaration:
      current_class = unknown_sym;
      current_type = NULL;
    }
-| "%type" TYPE symbols.1
+| "%type" TAG symbols.1
    {
      symbol_list *list;
      tag_seen = true;
@@ -398,7 +398,7 @@ symbol_declaration:
 ;

 precedence_declaration:
-  precedence_declarator type.opt symbols.prec
+  precedence_declarator tag.opt symbols.prec
    {
      symbol_list *list;
      ++current_prec;
@@ -419,9 +419,9 @@ precedence_declarator:
 | "%precedence" { $$ = precedence_assoc; }
 ;

-type.opt:
+tag.opt:
  /* Nothing. */ { current_type = NULL; }
-| TYPE           { current_type = $1; tag_seen = true; }
+| TAG            { current_type = $1; tag_seen = true; }
 ;

 /* Just like symbols.1 but accept INT for the sake of POSIX.  */
@@ -451,15 +451,15 @@ generic_symlist:
 ;

 generic_symlist_item:
-  symbol            { $$ = symbol_list_sym_new ($1, @1); }
-| TYPE              { $$ = symbol_list_type_new ($1, @1); }
-| "<*>"             { $$ = symbol_list_default_tagged_new (@1); }
-| "<>"             { $$ = symbol_list_default_tagless_new (@1); }
+  symbol    { $$ = symbol_list_sym_new ($1, @1); }
+| TAG       { $$ = symbol_list_type_new ($1, @1); }
+| "<*>"     { $$ = symbol_list_default_tagged_new (@1); }
+| "<>"      { $$ = symbol_list_default_tagless_new (@1); }
 ;

 /* One token definition.  */
 symbol_def:
-  TYPE
+  TAG
     {
       current_type = $1;
       tag_seen = true;
@@ -538,7 +538,7 @@ rhs:
    { grammar_current_rule_prec_set ($3, @3); }
 | rhs "%dprec" INT
    { grammar_current_rule_dprec_set ($3, @3); }
-| rhs "%merge" TYPE
+| rhs "%merge" TAG
    { grammar_current_rule_merge_set ($3, @3); }
 ;

@@ -78,6 +78,8 @@ static void unexpected_newline (boundary, char const *);
 /* A identifier was just read in directives/rules.  Special state
    to capture the sequence `identifier :'. */
 %x SC_AFTER_IDENTIFIER
+ /* A complex tag, with nested angles brackets. */
+%x SC_TAG

 /* Three types of user code:
    - prologue (code between `%{' `%}' in the first section, before %%);
@@ -96,8 +98,10 @@ int	  [0-9]+

 /* POSIX says that a tag must be both an id and a C union member, but
   historically almost any character is allowed in a tag.  We disallow
-   NUL and newline, as this simplifies our implementation.  */
-tag	 [^\0\n>]+
+   NUL, as this simplifies our implementation.  We disallow angle
+   bracket to match them in nested pairs: several languages use them
+   for generics/template types.  */
+tag	 [^\0<>]+

 /* Zero or more instances of backslash-newline.  Following GCC, allow
   white space between the backslash and the newline.  */
@@ -105,8 +109,9 @@ splice	 (\\[ \f\t\v]*\n)*

 %%
 %{
-  /* Nesting level of the current code in braces.  */
-  int braces_level IF_LINT (= 0);
+  /* Nesting level.  Either for nested braces, or nested angle brackets
+     (but not mixed).  */
+  int nesting IF_LINT (= 0);

  /* Parent context state, when applicable.  */
  int context_state IF_LINT (= 0);
@@ -205,8 +210,6 @@ splice	 (\\[ \f\t\v]*\n)*
  "="                     return EQUAL;
  "|"                     return PIPE;
  ";"                     return SEMICOLON;
-  "<*>"                   return TYPE_TAG_ANY;
-  "<>"                    return TYPE_TAG_NONE;

  {id} {
    val->uniqstr = uniqstr_new (yytext);
@@ -235,18 +238,25 @@ splice	 (\\[ \f\t\v]*\n)*
  /* Code in between braces.  */
  "{" {
    STRING_GROW;
-    braces_level = 0;
+    nesting = 0;
    code_start = loc->start;
    BEGIN SC_BRACED_CODE;
  }

  /* A type. */
+  "<*>"       return TAG_ANY;
+  "<>"        return TAG_NONE;
  "<"{tag}">" {
    obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
    STRING_FINISH;
    val->uniqstr = uniqstr_new (last_string);
    STRING_FREE;
-    return TYPE;
+    return TAG;
+  }
+  "<"         {
+    nesting = 0;
+    token_start = loc->start;
+    BEGIN SC_TAG;
  }

  "%%" {
@@ -267,6 +277,17 @@ splice	 (\\[ \f\t\v]*\n)*
 }


+  /*--------------------------------------------------------------.
+  | Supporting \0 complexifies our implementation for no expected |
+  | added value.                                                  |
+  `--------------------------------------------------------------*/
+
+<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
+{
+  \0	    complain_at (*loc, _("invalid null character"));
+}
+
+
  /*-----------------------------------------------------------------.
  | Scanning after an identifier, checking whether a colon is next.  |
  `-----------------------------------------------------------------*/
@@ -386,11 +407,40 @@ splice	 (\\[ \f\t\v]*\n)*
  }
 }

-<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>
-{
-  \0	    complain_at (*loc, _("invalid null character"));
-}
+  /*-----------------------------------------------------------.
+  | Scanning a Bison nested tag.  The initial angle bracket is |
+  | already eaten.                                             |
+  `-----------------------------------------------------------*/

+<SC_TAG>
+{
+  ">" {
+    --nesting;
+    if (nesting < 0)
+      {
+        STRING_FINISH;
+        loc->start = token_start;
+        val->uniqstr = uniqstr_new (last_string);
+        STRING_FREE;
+        BEGIN INITIAL;
+        return TAG;
+      }
+    STRING_GROW;
+  }
+
+  [^<>]+ STRING_GROW;
+  "<"+   STRING_GROW; nesting += yyleng;
+
+  <<EOF>> {
+    unexpected_eof (token_start, ">");
+    STRING_FINISH;
+    loc->start = token_start;
+    val->uniqstr = uniqstr_new (last_string);
+    STRING_FREE;
+    BEGIN INITIAL;
+    return TAG;
+  }
+}

  /*----------------------------.
  | Decode escaped characters.  |
@@ -509,13 +559,13 @@ splice	 (\\[ \f\t\v]*\n)*

 <SC_BRACED_CODE>
 {
-  "{"|"<"{splice}"%"  STRING_GROW; braces_level++;
-  "%"{splice}">"      STRING_GROW; braces_level--;
+  "{"|"<"{splice}"%"  STRING_GROW; nesting++;
+  "%"{splice}">"      STRING_GROW; nesting--;
  "}" {
    obstack_1grow (&obstack_for_string, '}');

-    --braces_level;
-    if (braces_level < 0)
+    --nesting;
+    if (nesting < 0)
      {
 	STRING_FINISH;
 	loc->start = code_start;