parsers: support translatable token aliases

In addition to

    %token NUM "number"

accept

    %token NUM _("number")

in which case the token will be translated in error messages.
Do not use _() in the output if there are no translatable tokens.

* src/symtab.h, src/symtab.c (symbol): Add a 'translatable' member.
* src/parse-gram.y (TSTRING): New token.
(string_as_id.opt): Replace with...
(alias): this.
Use it.
* src/scan-gram.l (SC_ESCAPED_TSTRING): New start conditions, to match
TSTRINGs.
* src/output.c (prepare_symbols): Define b4_translatable if there are
translatable strings.

* data/skeletons/glr.c, data/skeletons/lalr1.cc,
* data/skeletons/yacc.c (yytnamerr): Receive b4_translatable, and use it.
This commit is contained in:
Akim Demaille
2018-12-28 08:47:04 +01:00
parent e9d404415a
commit 9096955fba
6 changed files with 79 additions and 18 deletions

View File

@@ -631,8 +631,17 @@ yysymbol_name (int yysymbol)
static const char *const yy_sname[] =
{
]b4_symbol_names[
};]m4_ifdef([b4_translatable], [[
/* YYTRANSLATABLE[SYMBOL-NUM] -- Whether YYTNAME[SYMBOL-NUM] is
internationalizable. */
static ]b4_int_type_for([b4_translate])[ yytranslatable[] =
{
]b4_translatable[
};
return yy_sname[yysymbol];
return (yysymbol < YYNTOKENS && yytranslatable[yysymbol]
? _(yy_sname[yysymbol])
: yy_sname[yysymbol]);]], [[
return yy_sname[yysymbol];]])[
}]])[
#endif
@@ -1248,7 +1257,6 @@ yytnamerr (char *yyres, const char *yystr)
{
YYPTRDIFF_T yyn = 0;
char const *yyp = yystr;
for (;;)
switch (*++yyp)
{

View File

@@ -50,6 +50,10 @@ static struct obstack format_obstack;
| result of formatting the FIRST and then TABLE_DATA[BEGIN..END[ (of |
| TYPE), and to the muscle NAME_max, the max value of the |
| TABLE_DATA. |
| |
| For the typical case of outputting a complete table from 0, pass |
| TABLE[0] as FIRST, and 1 as BEGIN. For instance |
| muscle_insert_base_table ("pact", base, base[0], 1, nstates); |
`-------------------------------------------------------------------*/
@@ -248,6 +252,26 @@ prepare_symbols (void)
prepare_symbol_names ("tname");
prepare_symbol_names ("symbol_names");
/* translatable -- whether a token is translatable. */
{
bool translatable = false;
for (int i = 0; i < ntokens; ++i)
if (symbols[i]->translatable)
{
translatable = true;
break;
}
if (translatable)
{
int *values = xnmalloc (nsyms, sizeof *values);
for (int i = 0; i < ntokens; ++i)
values[i] = symbols[i]->translatable;
muscle_insert_int_table ("translatable", values,
values[0], 1, ntokens);
free (values);
}
}
/* Output YYTOKNUM. */
{
int *values = xnmalloc (ntokens, sizeof *values);

View File

@@ -143,6 +143,7 @@
/* Define the tokens together with their human representation. */
%token GRAM_EOF 0 "end of file"
%token STRING "string"
TSTRING "translatable string"
%token PERCENT_TOKEN "%token"
%token PERCENT_NTERM "%nterm"
@@ -216,8 +217,8 @@
%type <unsigned char> CHAR
%printer { fputs (char_name ($$), yyo); } <unsigned char>
%type <char*> "{...}" "%?{...}" "%{...%}" EPILOGUE STRING
%printer { fputs ($$, yyo); } <char*>
%type <char*> "{...}" "%?{...}" "%{...%}" EPILOGUE STRING TSTRING
%printer { fputs ($$, yyo); } <char*>
%type <uniqstr>
BRACKETED_ID ID ID_COLON
@@ -532,7 +533,7 @@ token_decl.1:
// One symbol declaration for %token or %nterm.
token_decl:
id int.opt[num] string_as_id.opt[alias]
id int.opt[num] alias
{
$$ = $id;
symbol_class_set ($id, current_class, @id, true);
@@ -549,6 +550,19 @@ int.opt:
| INT
;
%type <symbol*> alias;
alias:
%empty { $$ = NULL; }
| string_as_id { $$ = $1; }
| TSTRING
{
$$ = symbol_get (quotearg_style (c_quoting_style, $1), @1);
symbol_class_set ($$, token_sym, @1, false);
$$->translatable = true;
}
;
/*-------------------------------------.
| token_decls_for_prec (%left, etc.). |
`-------------------------------------*/
@@ -782,12 +796,6 @@ string_as_id:
}
;
%type <symbol*> string_as_id.opt;
string_as_id.opt:
%empty { $$ = NULL; }
| string_as_id
;
epilogue.opt:
%empty
| "%%" EPILOGUE

View File

@@ -110,8 +110,8 @@ static void unexpected_newline (boundary, char const *);
%}
/* A C-like comment in directives/rules. */
%x SC_YACC_COMMENT
/* Strings and characters in directives/rules. */
%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
/* Characters and strings in directives/rules. */
%x SC_ESCAPED_CHARACTER SC_ESCAPED_STRING SC_ESCAPED_TSTRING
/* A identifier was just read in directives/rules. Special state
to capture the sequence 'identifier :'. */
%x SC_AFTER_IDENTIFIER
@@ -318,6 +318,7 @@ eqopt ({sp}=)?
/* Strings. */
"\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
"_(\"" token_start = loc->start; BEGIN SC_ESCAPED_TSTRING;
/* Prologue. */
"%{" code_start = loc->start; BEGIN SC_PROLOGUE;
@@ -378,7 +379,7 @@ eqopt ({sp}=)?
| added value. |
`--------------------------------------------------------------*/
<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_TSTRING,SC_TAG>
{
\0 complain (loc, complaint, _("invalid null character"));
}
@@ -539,6 +540,20 @@ eqopt ({sp}=)?
{eol} unexpected_newline (token_start, "\"");
}
<SC_ESCAPED_TSTRING>
{
"\")" {
STRING_FINISH;
BEGIN INITIAL;
loc->start = token_start;
complain (loc, Wyacc,
_("POSIX Yacc does not support string literals"));
RETURN_VALUE (TSTRING, last_string);
}
<<EOF>> unexpected_eof (token_start, "\"");
"\n" unexpected_newline (token_start, "\"");
}
/*----------------------------------------------------------.
| Scanning a Bison character literal, decoding its escapes. |
| The initial quote is already eaten. |
@@ -601,7 +616,7 @@ eqopt ({sp}=)?
| Decode escaped characters. |
`----------------------------*/
<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_TSTRING>
{
\\[0-7]{1,3} {
verify (UCHAR_MAX < ULONG_MAX);
@@ -797,7 +812,7 @@ eqopt ({sp}=)?
| By default, grow the string obstack with the input. |
`-----------------------------------------------------*/
<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_TSTRING>
{
/* Accept multibyte characters in one block instead of byte after
byte, so that add_column_width and mbsnwidth can compute correct

View File

@@ -109,6 +109,7 @@ symbol_new (uniqstr tag, location loc)
res->tag = tag;
res->location = loc;
res->translatable = false;
res->location_of_lhs = false;
res->alias = NULL;
res->content = sym_content_new (res);
@@ -954,7 +955,7 @@ dummy_symbol_get (location loc)
}
bool
symbol_is_dummy (const symbol *sym)
symbol_is_dummy (symbol const *sym)
{
return sym->tag[0] == '@' || (sym->tag[0] == '$' && sym->tag[1] == '@');
}

View File

@@ -97,6 +97,9 @@ struct symbol
/** The "defining" location. */
location location;
/** Whether this symbol is translatable. */
bool translatable;
/** Whether \a location is about the first uses as left-hand side
symbol of a rule (true), or simply the first occurrence (e.g.,
in a %type, or as a rhs symbol of a rule). The former type of
@@ -117,6 +120,8 @@ struct symbol
struct sym_content
{
/** The main symbol that denotes this content (it contains the
possible alias). */
symbol *symbol;
/** Its \c \%type.
@@ -179,7 +184,7 @@ symbol *dummy_symbol_get (location loc);
void symbol_print (symbol const *s, FILE *f);
/** Is this a dummy nonterminal? */
bool symbol_is_dummy (const symbol *sym);
bool symbol_is_dummy (symbol const *sym);
/** The name of the code_props type: "\%destructor" or "\%printer". */
char const *code_props_type_string (code_props_type kind);