parsers: support translatable token aliases

In addition to %token NUM "number" accept %token NUM _("number") in which case the token will be translated in error messages. Do not use _() in the output if there are no translatable tokens. * src/symtab.h, src/symtab.c (symbol): Add a 'translatable' member. * src/parse-gram.y (TSTRING): New token. (string_as_id.opt): Replace with... (alias): this. Use it. * src/scan-gram.l (SC_ESCAPED_TSTRING): New start conditions, to match TSTRINGs. * src/output.c (prepare_symbols): Define b4_translatable if there are translatable strings. * data/skeletons/glr.c, data/skeletons/lalr1.cc, * data/skeletons/yacc.c (yytnamerr): Receive b4_translatable, and use it.
2026-07-25 09:20:34 +00:00 · 2020-01-19 21:23:11 +01:00
parent e9d404415a
commit 9096955fba
6 changed files with 79 additions and 18 deletions
@@ -631,8 +631,17 @@ yysymbol_name (int yysymbol)
  static const char *const yy_sname[] =
  {
  ]b4_symbol_names[
+  };]m4_ifdef([b4_translatable], [[
+  /* YYTRANSLATABLE[SYMBOL-NUM] -- Whether YYTNAME[SYMBOL-NUM] is
+     internationalizable.  */
+  static ]b4_int_type_for([b4_translate])[ yytranslatable[] =
+  {
+  ]b4_translatable[
  };
-  return yy_sname[yysymbol];
+  return (yysymbol < YYNTOKENS && yytranslatable[yysymbol]
+          ? _(yy_sname[yysymbol])
+          : yy_sname[yysymbol]);]], [[
+  return yy_sname[yysymbol];]])[
 }]])[
 #endif

@@ -1248,7 +1257,6 @@ yytnamerr (char *yyres, const char *yystr)
    {
      YYPTRDIFF_T yyn = 0;
      char const *yyp = yystr;
-
      for (;;)
        switch (*++yyp)
          {
@@ -50,6 +50,10 @@ static struct obstack format_obstack;
 | result of formatting the FIRST and then TABLE_DATA[BEGIN..END[ (of |
 | TYPE), and to the muscle NAME_max, the max value of the            |
 | TABLE_DATA.                                                        |
+|                                                                    |
+| For the typical case of outputting a complete table from 0, pass   |
+| TABLE[0] as FIRST, and 1 as BEGIN.  For instance                   |
+| muscle_insert_base_table ("pact", base, base[0], 1, nstates);      |
 `-------------------------------------------------------------------*/


@@ -248,6 +252,26 @@ prepare_symbols (void)
  prepare_symbol_names ("tname");
  prepare_symbol_names ("symbol_names");

+  /* translatable -- whether a token is translatable. */
+  {
+    bool translatable = false;
+    for (int i = 0; i < ntokens; ++i)
+      if (symbols[i]->translatable)
+        {
+          translatable = true;
+          break;
+        }
+    if (translatable)
+      {
+        int *values = xnmalloc (nsyms, sizeof *values);
+        for (int i = 0; i < ntokens; ++i)
+          values[i] = symbols[i]->translatable;
+        muscle_insert_int_table ("translatable", values,
+                                 values[0], 1, ntokens);
+        free (values);
+      }
+  }
+
  /* Output YYTOKNUM. */
  {
    int *values = xnmalloc (ntokens, sizeof *values);
@@ -143,6 +143,7 @@
 /* Define the tokens together with their human representation.  */
 %token GRAM_EOF 0 "end of file"
 %token STRING     "string"
+       TSTRING    "translatable string"

 %token PERCENT_TOKEN       "%token"
 %token PERCENT_NTERM       "%nterm"
@@ -216,8 +217,8 @@
 %type <unsigned char> CHAR
 %printer { fputs (char_name ($$), yyo); } <unsigned char>

-%type <char*> "{...}" "%?{...}" "%{...%}" EPILOGUE STRING
-%printer { fputs ($$, yyo); }  <char*>
+%type <char*> "{...}" "%?{...}" "%{...%}" EPILOGUE STRING TSTRING
+%printer { fputs ($$, yyo); } <char*>

 %type <uniqstr>
  BRACKETED_ID ID ID_COLON
@@ -532,7 +533,7 @@ token_decl.1:

 // One symbol declaration for %token or %nterm.
 token_decl:
-  id int.opt[num] string_as_id.opt[alias]
+  id int.opt[num] alias
    {
      $$ = $id;
      symbol_class_set ($id, current_class, @id, true);
@@ -549,6 +550,19 @@ int.opt:
 | INT
 ;

+%type <symbol*> alias;
+alias:
+  %empty         { $$ = NULL; }
+| string_as_id   { $$ = $1; }
+| TSTRING
+    {
+      $$ = symbol_get (quotearg_style (c_quoting_style, $1), @1);
+      symbol_class_set ($$, token_sym, @1, false);
+      $$->translatable = true;
+    }
+;
+
+
 /*-------------------------------------.
 | token_decls_for_prec (%left, etc.).  |
 `-------------------------------------*/
@@ -782,12 +796,6 @@ string_as_id:
    }
 ;

-%type <symbol*> string_as_id.opt;
-string_as_id.opt:
-  %empty             { $$ = NULL; }
-| string_as_id
-;
-
 epilogue.opt:
  %empty
 | "%%" EPILOGUE
@@ -110,8 +110,8 @@ static void unexpected_newline (boundary, char const *);
 %}
 /* A C-like comment in directives/rules. */
 %x SC_YACC_COMMENT
- /* Strings and characters in directives/rules. */
-%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
+ /* Characters and strings in directives/rules. */
+%x SC_ESCAPED_CHARACTER SC_ESCAPED_STRING SC_ESCAPED_TSTRING
 /* A identifier was just read in directives/rules.  Special state
    to capture the sequence 'identifier :'. */
 %x SC_AFTER_IDENTIFIER
@@ -318,6 +318,7 @@ eqopt    ({sp}=)?

  /* Strings. */
  "\""        token_start = loc->start; BEGIN SC_ESCAPED_STRING;
+  "_(\""      token_start = loc->start; BEGIN SC_ESCAPED_TSTRING;

  /* Prologue. */
  "%{"        code_start = loc->start; BEGIN SC_PROLOGUE;
@@ -378,7 +379,7 @@ eqopt    ({sp}=)?
  | added value.                                                  |
  `--------------------------------------------------------------*/

-<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
+<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_TSTRING,SC_TAG>
 {
  \0        complain (loc, complaint, _("invalid null character"));
 }
@@ -539,6 +540,20 @@ eqopt    ({sp}=)?
  {eol}     unexpected_newline (token_start, "\"");
 }

+<SC_ESCAPED_TSTRING>
+{
+  "\")" {
+    STRING_FINISH;
+    BEGIN INITIAL;
+    loc->start = token_start;
+    complain (loc, Wyacc,
+              _("POSIX Yacc does not support string literals"));
+    RETURN_VALUE (TSTRING, last_string);
+  }
+  <<EOF>>   unexpected_eof (token_start, "\"");
+  "\n"      unexpected_newline (token_start, "\"");
+}
+
  /*----------------------------------------------------------.
  | Scanning a Bison character literal, decoding its escapes. |
  | The initial quote is already eaten.                       |
@@ -601,7 +616,7 @@ eqopt    ({sp}=)?
  | Decode escaped characters.  |
  `----------------------------*/

-<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
+<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_TSTRING>
 {
  \\[0-7]{1,3} {
    verify (UCHAR_MAX < ULONG_MAX);
@@ -797,7 +812,7 @@ eqopt    ({sp}=)?
  | By default, grow the string obstack with the input.  |
  `-----------------------------------------------------*/

-<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
+<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_TSTRING>
 {
  /* Accept multibyte characters in one block instead of byte after
     byte, so that add_column_width and mbsnwidth can compute correct
@@ -109,6 +109,7 @@ symbol_new (uniqstr tag, location loc)

  res->tag = tag;
  res->location = loc;
+  res->translatable = false;
  res->location_of_lhs = false;
  res->alias = NULL;
  res->content = sym_content_new (res);
@@ -954,7 +955,7 @@ dummy_symbol_get (location loc)
 }

 bool
-symbol_is_dummy (const symbol *sym)
+symbol_is_dummy (symbol const *sym)
 {
  return sym->tag[0] == '@' || (sym->tag[0] == '$' && sym->tag[1] == '@');
 }
@@ -97,6 +97,9 @@ struct symbol
  /** The "defining" location.  */
  location location;

+  /** Whether this symbol is translatable. */
+  bool translatable;
+
  /** Whether \a location is about the first uses as left-hand side
      symbol of a rule (true), or simply the first occurrence (e.g.,
      in a %type, or as a rhs symbol of a rule).  The former type of
@@ -117,6 +120,8 @@ struct symbol

 struct sym_content
 {
+  /** The main symbol that denotes this content (it contains the
+      possible alias). */
  symbol *symbol;

  /** Its \c \%type.
@@ -179,7 +184,7 @@ symbol *dummy_symbol_get (location loc);
 void symbol_print (symbol const *s, FILE *f);

 /** Is this a dummy nonterminal?  */
-bool symbol_is_dummy (const symbol *sym);
+bool symbol_is_dummy (symbol const *sym);

 /** The name of the code_props type: "\%destructor" or "\%printer".  */
 char const *code_props_type_string (code_props_type kind);