bison/src/scan-gram.l

/* Bison Grammar Scanner                             -*- C -*-
   Copyright (C) 2002 Free Software Foundation, Inc.

   This file is part of Bison, the GNU Compiler Compiler.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307  USA
*/

%option debug nodefault noyywrap nounput never-interactive stack
%option prefix="gram_" outfile="lex.yy.c"

%{
#include "system.h"
#include "complain.h"
#include "quote.h"
#include "getargs.h"
#include "gram.h"
#include "reader.h"

/* Each time we match a string, move the end cursor to its end. */
#define YY_USER_ACTION  LOCATION_COLUMNS (*yylloc, yyleng)
#define YY_LINES        LOCATION_LINES (*yylloc, yyleng); lineno += yyleng;
#define YY_STEP         LOCATION_STEP (*yylloc)

/* Appending to the STRING_OBSTACK. */
#define YY_INIT         obstack_init (&string_obstack)
#define YY_GROW         obstack_grow (&string_obstack, yytext, yyleng)
#define YY_FINISH       obstack_1grow (&string_obstack, '\0'); yylval->string = obstack_finish (&string_obstack);

/* This is only to avoid GCC warnings. */
#define YY_USER_INIT    if (yycontrol) {;};

static struct obstack string_obstack;
static int braces_level = 0;
static int percent_percent_count = 0;

static void handle_dollar PARAMS ((char *cp));
static void handle_at PARAMS ((char *cp));

%}
%x SC_COMMENT
%x SC_STRING SC_CHARACTER
%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
%x SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE

id	 [.a-zA-Z][.a-zA-Z_0-9]*
int 	 [0-9]+
eols     (\n|\r|\n\r|\r\n)+
blanks   [ \t\f]+

%%
%{
  /* At each yylex invocation, mark the current position as the
     start of the next token.  */
#define TR_POS 0
#if TR_POS
  fprintf (stderr, "FOO1: ");
  LOCATION_PRINT (stderr, *yylloc);
  fprintf (stderr, "\n");
#endif
  YY_STEP;
#if TR_POS
  fprintf (stderr, "BAR1: ");
  LOCATION_PRINT (stderr, *yylloc);
  fprintf (stderr, "\n");
#endif
%}


  /*----------------------------.
  | Scanning Bison directives.  |
  `----------------------------*/
<INITIAL>
{
  "%binary"               return PERCENT_NONASSOC;
  "%debug"                return PERCENT_DEBUG;
  "%define"               return PERCENT_DEFINE;
  "%defines"              return PERCENT_DEFINES;
  "%error"[-_]"verbose"   return PERCENT_ERROR_VERBOSE;
  "%expect"               return PERCENT_EXPECT;
  "%file-prefix"          return PERCENT_FILE_PREFIX;
  "%fixed"[-_]"output"[-_]"files"   return PERCENT_YACC;
  "%left"                 return PERCENT_LEFT;
  "%locations"            return PERCENT_LOCATIONS;
  "%name"[-_]"prefix"     return PERCENT_NAME_PREFIX;
  "%no"[-_]"lines"        return PERCENT_NO_LINES;
  "%nonassoc"             return PERCENT_NONASSOC;
  "%nterm"                return PERCENT_NTERM;
  "%output"               return PERCENT_OUTPUT;
  "%prec"                 return PERCENT_PREC;
  "%pure"[-_]"parser"     return PERCENT_PURE_PARSER;
  "%right"                return PERCENT_RIGHT;
  "%skeleton"             return PERCENT_SKELETON;
  "%start"                return PERCENT_START;
  "%term"                 return PERCENT_TOKEN;
  "%token"                return PERCENT_TOKEN;
  "%token"[-_]"table"     return PERCENT_TOKEN_TABLE;
  "%type"                 return PERCENT_TYPE;
  "%union"                return PERCENT_UNION;
  "%verbose"              return PERCENT_VERBOSE;
  "%yacc"                 return PERCENT_YACC;

  "="                     return EQUAL;
  ":"                     return COLON;
  "|"                     return PIPE;
  ";"                     return SEMICOLON;

  {eols}      YY_LINES; YY_STEP;
  {blanks}    YY_STEP;
  {id}        {
    YY_INIT; YY_GROW; YY_FINISH;
    yylval->symbol = getsym (yylval->string);
    return ID;
  }

  {int}       yylval->integer = strtol (yytext, 0, 10); return INT;

  /* Characters.  We don't check there is only one.  */
  \'          YY_INIT; YY_GROW; yy_push_state (SC_ESCAPED_CHARACTER);

  /* Strings. */
  \"          YY_INIT; YY_GROW; yy_push_state (SC_ESCAPED_STRING);

  /* Comments. */
  "/*"        yy_push_state (SC_COMMENT);
  "//".*      YY_STEP;

  /* Prologue. */
  "%{"        YY_INIT; yy_push_state (SC_PROLOGUE);

  /* Code in between braces.  */
  "{"         YY_INIT; YY_GROW; ++braces_level; yy_push_state (SC_BRACED_CODE);

  /* A type. */
  "<"[^>]+">" YY_INIT; obstack_grow (&string_obstack, yytext + 1, yyleng - 2); YY_FINISH; return TYPE;

  "%%"   {
    if (++percent_percent_count == 2)
      yy_push_state (SC_EPILOGUE);
    return PERCENT_PERCENT;
  }

  .           {
    LOCATION_PRINT (stderr, *yylloc);
    fprintf (stderr, ": invalid character: `%c'\n", *yytext);
    YY_STEP;
  }
}


  /*------------------------------------------------------------.
  | Whatever the start condition (but those which correspond to |
  | entity `swallowed' by Bison: SC_ESCAPED_STRING and          |
  | SC_ESCAPED_CHARACTER), no M4 character must escape as is.   |
  `------------------------------------------------------------*/

<SC_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
{
  \[          obstack_sgrow (&string_obstack, "@<:@");
  \]          obstack_sgrow (&string_obstack, "@:>@");
}


  /*-----------------------------------------------------------.
  | Scanning a C comment. The initial `/ *' is already eaten.  |
  `-----------------------------------------------------------*/

<SC_COMMENT>
{
  "*/" { /* End of the comment. */
    if (yy_top_state () == INITIAL)
      {
	YY_STEP;
      }
    else
      {
	YY_GROW;
      }
    yy_pop_state ();
  }

  [^\[\]*\n\r]+  	if (yy_top_state () != INITIAL) YY_GROW;
  {eols}	if (yy_top_state () != INITIAL) YY_GROW; YY_LINES;
  .             /* Stray `*'. */if (yy_top_state () != INITIAL) YY_GROW;

  <<EOF>> {
    LOCATION_PRINT (stderr, *yylloc);
    fprintf (stderr, ": unexpected end of file in a comment\n");
    yy_pop_state ();
  }
}


  /*----------------------------------------------------------------.
  | Scanning a C string, including its escapes.  The initial `"' is |
  | already eaten.                                                  |
  `----------------------------------------------------------------*/

<SC_ESCAPED_STRING>
{
  \" {
    assert (yy_top_state () == INITIAL);
    YY_GROW;
    YY_FINISH;
    yy_pop_state ();
    return STRING;
  }

  [^\"\n\r\\]+      YY_GROW;

  {eols}    obstack_1grow (&string_obstack, '\n'); YY_LINES;

  <<EOF>> {
    LOCATION_PRINT (stderr, *yylloc);
    fprintf (stderr, ": unexpected end of file in a string\n");
    assert (yy_top_state () == INITIAL);
    YY_FINISH;
    yy_pop_state ();
    return STRING;
  }
}

  /*---------------------------------------------------------------.
  | Scanning a C character, decoding its escapes.  The initial "'" |
  | is already eaten.                                              |
  `---------------------------------------------------------------*/

<SC_ESCAPED_CHARACTER>
{
  \' {
    YY_GROW;
    assert (yy_top_state () == INITIAL);
    {
      char c;
      YY_FINISH;
      c = yylval->string[1];
      yylval->symbol = getsym (yylval->string);
      symbol_class_set (yylval->symbol, token_sym);
      symbol_user_token_number_set (yylval->symbol, (unsigned int) c);
      yy_pop_state ();
      return ID;
    }
  }

  [^\'\n\r\\]      YY_GROW;

  {eols}    obstack_1grow (&string_obstack, '\n'); YY_LINES;

  <<EOF>> {
    LOCATION_PRINT (stderr, *yylloc);
    fprintf (stderr, ": unexpected end of file in a character\n");
    assert (yy_top_state () == INITIAL);
    YY_FINISH;
    yy_pop_state ();
    return CHARACTER;
  }
}


  /*----------------------------.
  | Decode escaped characters.  |
  `----------------------------*/

<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
{
  \\[0-7]{3}		{
    long c = strtol (yytext + 1, 0, 8);
    if (c > 255)
      {
	LOCATION_PRINT (stderr, *yylloc);
	fprintf (stderr, ": invalid escape: %s\n", yytext);
	YY_STEP;
      }
    else
      obstack_1grow (&string_obstack, c);
  }

  \\x[0-9a-fA-F]{2}	{
    obstack_1grow (&string_obstack, strtol (yytext + 2, 0, 16));
  }

  \\a	obstack_1grow (&string_obstack, '\a');
  \\b	obstack_1grow (&string_obstack, '\b');
  \\f	obstack_1grow (&string_obstack, '\f');
  \\n	obstack_1grow (&string_obstack, '\n');
  \\r	obstack_1grow (&string_obstack, '\r');
  \\t	obstack_1grow (&string_obstack, '\t');
  \\v	obstack_1grow (&string_obstack, '\v');
  \\[\\""]   obstack_1grow (&string_obstack, yytext[1]);
  \\.	{
    LOCATION_PRINT (stderr, *yylloc);
    fprintf (stderr, ": unrecognized escape: %s\n", yytext);
    YY_GROW;
  }
}


  /*----------------------------------------------------------.
  | Scanning a C character without decoding its escapes.  The |
  | initial "'" is already eaten.                             |
  `----------------------------------------------------------*/

<SC_CHARACTER>
{
  \' {
    YY_GROW;
    assert (yy_top_state () != INITIAL);
    yy_pop_state ();
  }

  [^\[\]\'\n\r\\]      YY_GROW;
  \\.                  YY_GROW;

  {eols}               YY_GROW; YY_LINES;

  <<EOF>> {
    LOCATION_PRINT (stderr, *yylloc);
    fprintf (stderr, ": unexpected end of file in a character\n");
    assert (yy_top_state () != INITIAL);
    yy_pop_state ();
  }
}


  /*----------------------------------------------------------------.
  | Scanning a C string, without decoding its escapes.  The initial |
  | `"' is already eaten.                                           |
  `----------------------------------------------------------------*/

<SC_STRING>
{
  \" {
    assert (yy_top_state () != INITIAL);
    YY_GROW;
    yy_pop_state ();
  }

  [^\[\]\"\n\r\\]+      YY_GROW;
  \\.                   YY_GROW;

  {eols}                YY_GROW; YY_LINES;

  <<EOF>> {
    LOCATION_PRINT (stderr, *yylloc);
    fprintf (stderr, ": unexpected end of file in a string\n");
    assert (yy_top_state () != INITIAL);
    yy_pop_state ();
  }
}


  /*---------------------------------------------------.
  | Strings, comments etc. can be found in user code.  |
  `---------------------------------------------------*/

<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
{
  /* Characters.  We don't check there is only one.  */
  \'          YY_GROW; yy_push_state (SC_CHARACTER);

  /* Strings. */
  \"          YY_GROW; yy_push_state (SC_STRING);

  /* Comments. */
  "/*"        YY_GROW; yy_push_state (SC_COMMENT);
  "//".*      YY_GROW;
}


  /*---------------------------------------------------------------.
  | Scanning some code in braces (%union and actions). The initial |
  | "{" is already eaten.                                          |
  `---------------------------------------------------------------*/

<SC_BRACED_CODE>
{
  "}" {
    YY_GROW;
    if (--braces_level == 0)
      {
	yy_pop_state ();
	YY_FINISH;
	return BRACED_CODE;
      }
  }

  "{"  	        YY_GROW; braces_level++;

  "$"("<".*">")?(-?[0-9]+|"$") { handle_dollar (yytext); }
  "@"(-?[0-9]+|"$")            { handle_at (yytext); }

  [^\[\]$/\'\"@\{\}\n\r]+ YY_GROW;
  {eols}	YY_GROW; YY_LINES;

  /* A lose $, or /, or etc. */
  .             YY_GROW;

  <<EOF>> {
    LOCATION_PRINT (stderr, *yylloc);
    fprintf (stderr, ": unexpected end of file in a braced code\n");
    yy_pop_state ();
    YY_FINISH;
    return PROLOGUE;
  }

}


  /*--------------------------------------------------------------.
  | Scanning some prologue: from "%{" (already scanned) to "%}".  |
  `--------------------------------------------------------------*/

<SC_PROLOGUE>
{
  "%}" {
    yy_pop_state ();
    YY_FINISH;
    return PROLOGUE;
  }

  [^\[\]%\n\r]+	  YY_GROW;
  "%"+[^%\}\n\r]+ YY_GROW;
  {eols}	  YY_GROW; YY_LINES;

  <<EOF>> {
    LOCATION_PRINT (stderr, *yylloc);
    fprintf (stderr, ": unexpected end of file in a prologue\n");
    yy_pop_state ();
    YY_FINISH;
    return PROLOGUE;
  }

}


  /*---------------------------------------------------------------.
  | Scanning the epilogue (everything after the second "%%", which |
  | has already been eaten.                                        |
  `---------------------------------------------------------------*/

<SC_EPILOGUE>
{
  ([^\[\]]|{eols})+  YY_GROW;

  <<EOF>> {
    yy_pop_state ();
    YY_FINISH;
    return EPILOGUE;
  }
}


%%

/*------------------------------------------------------------------.
| CP is pointing to a wannabee semantic value (i.e., a `$').        |
|                                                                   |
| Possible inputs: $[<TYPENAME>]($|integer)                         |
|                                                                   |
| Output to the STRING_OBSTACK a reference to this semantic value.  |
`------------------------------------------------------------------*/

static void
handle_dollar (char *cp)
{
  const char *type_name = NULL;

  /* RULE_LENGTH is the number of values in the current rule so far,
     which says where to find `$0' with respect to the top of the
     stack.  It is not the same as the rule->length in the case of mid
     rule actions.  */
  int rule_length = 0;
  symbol_list *rhs;
  for (rhs = current_rule->next; rhs; rhs = rhs->next)
    ++rule_length;

  ++cp;

  /* Get the type name if explicit. */
  if (*cp == '<')
    {
      type_name = ++cp;
      while (*cp != '>')
	++cp;
      *cp = '\0';
      ++cp;
    }

  if (*cp == '$')
    {
      if (!type_name)
	type_name = get_type_name (0, current_rule);
      if (!type_name && typed)
	complain (_("$$ of `%s' has no declared type"),
		  current_rule->sym->tag);
      if (!type_name)
	type_name = "";
      obstack_fgrow1 (&string_obstack,
		      "]b4_lhs_value([%s])[", type_name);
    }
  else if (isdigit (*cp) || *cp == '-')
    {
      int n = strtol (cp, &cp, 10);

      if (n > rule_length)
	complain (_("invalid value: %s%d"), "$", n);
      else
	{
	  if (!type_name && n > 0)
	    type_name = get_type_name (n, current_rule);
	  if (!type_name && typed)
	    complain (_("$%d of `%s' has no declared type"),
		      n, current_rule->sym->tag);
	  if (!type_name)
	    type_name = "";
	  obstack_fgrow3 (&string_obstack,
			  "]b4_rhs_value([%d], [%d], [%s])[",
			  rule_length, n, type_name);
	}
    }
  else
    {
      char buf[] = "$c";
      buf[1] = *cp;
      complain (_("%s is invalid"), quote (buf));
    }
}

/*-------------------------------------------------------.
| CP is pointing to a location (i.e., a `@').  Output to |
| STRING_OBSTACK a reference to this location.           |
`-------------------------------------------------------*/

static void
handle_at (char *cp)
{
  /* RULE_LENGTH is the number of values in the current rule so far,
     which says where to find `$0' with respect to the top of the
     stack.  It is not the same as the rule->length in the case of mid
     rule actions.  */
  int rule_length = 0;
  symbol_list *rhs;
  for (rhs = current_rule->next; rhs; rhs = rhs->next)
    ++rule_length;

  locations_flag = 1;
  ++cp;

  if (*cp == '$')
    {
      obstack_sgrow (&string_obstack, "]b4_lhs_location[");
    }
  else if (isdigit (*cp) || *cp == '-')
    {
      int n = strtol (cp, &cp, 10);
      if (n > rule_length)
	complain (_("invalid value: %s%d"), "@", n);
      else
	obstack_fgrow2 (&string_obstack, "]b4_rhs_location([%d], [%d])[",
			rule_length, n);
    }
  else
    {
      char buf[] = "@c";
      buf[1] = *cp;
      complain (_("%s is invalid"), quote (buf));
    }
}