From 9096955fbac0d45c56829109a7b87805dde89912 Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Fri, 28 Dec 2018 08:47:04 +0100 Subject: [PATCH] parsers: support translatable token aliases In addition to %token NUM "number" accept %token NUM _("number") in which case the token will be translated in error messages. Do not use _() in the output if there are no translatable tokens. * src/symtab.h, src/symtab.c (symbol): Add a 'translatable' member. * src/parse-gram.y (TSTRING): New token. (string_as_id.opt): Replace with... (alias): this. Use it. * src/scan-gram.l (SC_ESCAPED_TSTRING): New start conditions, to match TSTRINGs. * src/output.c (prepare_symbols): Define b4_translatable if there are translatable strings. * data/skeletons/glr.c, data/skeletons/lalr1.cc, * data/skeletons/yacc.c (yytnamerr): Receive b4_translatable, and use it. --- data/skeletons/yacc.c | 12 ++++++++++-- src/output.c | 24 ++++++++++++++++++++++++ src/parse-gram.y | 26 +++++++++++++++++--------- src/scan-gram.l | 25 ++++++++++++++++++++----- src/symtab.c | 3 ++- src/symtab.h | 7 ++++++- 6 files changed, 79 insertions(+), 18 deletions(-) diff --git a/data/skeletons/yacc.c b/data/skeletons/yacc.c index 83777ebb..44904d6c 100644 --- a/data/skeletons/yacc.c +++ b/data/skeletons/yacc.c @@ -631,8 +631,17 @@ yysymbol_name (int yysymbol) static const char *const yy_sname[] = { ]b4_symbol_names[ + };]m4_ifdef([b4_translatable], [[ + /* YYTRANSLATABLE[SYMBOL-NUM] -- Whether YYTNAME[SYMBOL-NUM] is + internationalizable. */ + static ]b4_int_type_for([b4_translate])[ yytranslatable[] = + { + ]b4_translatable[ }; - return yy_sname[yysymbol]; + return (yysymbol < YYNTOKENS && yytranslatable[yysymbol] + ? _(yy_sname[yysymbol]) + : yy_sname[yysymbol]);]], [[ + return yy_sname[yysymbol];]])[ }]])[ #endif @@ -1248,7 +1257,6 @@ yytnamerr (char *yyres, const char *yystr) { YYPTRDIFF_T yyn = 0; char const *yyp = yystr; - for (;;) switch (*++yyp) { diff --git a/src/output.c b/src/output.c index 8de00ed2..6f1e9f45 100644 --- a/src/output.c +++ b/src/output.c @@ -50,6 +50,10 @@ static struct obstack format_obstack; | result of formatting the FIRST and then TABLE_DATA[BEGIN..END[ (of | | TYPE), and to the muscle NAME_max, the max value of the | | TABLE_DATA. | +| | +| For the typical case of outputting a complete table from 0, pass | +| TABLE[0] as FIRST, and 1 as BEGIN. For instance | +| muscle_insert_base_table ("pact", base, base[0], 1, nstates); | `-------------------------------------------------------------------*/ @@ -248,6 +252,26 @@ prepare_symbols (void) prepare_symbol_names ("tname"); prepare_symbol_names ("symbol_names"); + /* translatable -- whether a token is translatable. */ + { + bool translatable = false; + for (int i = 0; i < ntokens; ++i) + if (symbols[i]->translatable) + { + translatable = true; + break; + } + if (translatable) + { + int *values = xnmalloc (nsyms, sizeof *values); + for (int i = 0; i < ntokens; ++i) + values[i] = symbols[i]->translatable; + muscle_insert_int_table ("translatable", values, + values[0], 1, ntokens); + free (values); + } + } + /* Output YYTOKNUM. */ { int *values = xnmalloc (ntokens, sizeof *values); diff --git a/src/parse-gram.y b/src/parse-gram.y index 199dfae8..328d7e62 100644 --- a/src/parse-gram.y +++ b/src/parse-gram.y @@ -143,6 +143,7 @@ /* Define the tokens together with their human representation. */ %token GRAM_EOF 0 "end of file" %token STRING "string" + TSTRING "translatable string" %token PERCENT_TOKEN "%token" %token PERCENT_NTERM "%nterm" @@ -216,8 +217,8 @@ %type CHAR %printer { fputs (char_name ($$), yyo); } -%type "{...}" "%?{...}" "%{...%}" EPILOGUE STRING -%printer { fputs ($$, yyo); } +%type "{...}" "%?{...}" "%{...%}" EPILOGUE STRING TSTRING +%printer { fputs ($$, yyo); } %type BRACKETED_ID ID ID_COLON @@ -532,7 +533,7 @@ token_decl.1: // One symbol declaration for %token or %nterm. token_decl: - id int.opt[num] string_as_id.opt[alias] + id int.opt[num] alias { $$ = $id; symbol_class_set ($id, current_class, @id, true); @@ -549,6 +550,19 @@ int.opt: | INT ; +%type alias; +alias: + %empty { $$ = NULL; } +| string_as_id { $$ = $1; } +| TSTRING + { + $$ = symbol_get (quotearg_style (c_quoting_style, $1), @1); + symbol_class_set ($$, token_sym, @1, false); + $$->translatable = true; + } +; + + /*-------------------------------------. | token_decls_for_prec (%left, etc.). | `-------------------------------------*/ @@ -782,12 +796,6 @@ string_as_id: } ; -%type string_as_id.opt; -string_as_id.opt: - %empty { $$ = NULL; } -| string_as_id -; - epilogue.opt: %empty | "%%" EPILOGUE diff --git a/src/scan-gram.l b/src/scan-gram.l index 2b5eb7f9..94e8a8ba 100644 --- a/src/scan-gram.l +++ b/src/scan-gram.l @@ -110,8 +110,8 @@ static void unexpected_newline (boundary, char const *); %} /* A C-like comment in directives/rules. */ %x SC_YACC_COMMENT - /* Strings and characters in directives/rules. */ -%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER + /* Characters and strings in directives/rules. */ +%x SC_ESCAPED_CHARACTER SC_ESCAPED_STRING SC_ESCAPED_TSTRING /* A identifier was just read in directives/rules. Special state to capture the sequence 'identifier :'. */ %x SC_AFTER_IDENTIFIER @@ -318,6 +318,7 @@ eqopt ({sp}=)? /* Strings. */ "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING; + "_(\"" token_start = loc->start; BEGIN SC_ESCAPED_TSTRING; /* Prologue. */ "%{" code_start = loc->start; BEGIN SC_PROLOGUE; @@ -378,7 +379,7 @@ eqopt ({sp}=)? | added value. | `--------------------------------------------------------------*/ - + { \0 complain (loc, complaint, _("invalid null character")); } @@ -539,6 +540,20 @@ eqopt ({sp}=)? {eol} unexpected_newline (token_start, "\""); } + +{ + "\")" { + STRING_FINISH; + BEGIN INITIAL; + loc->start = token_start; + complain (loc, Wyacc, + _("POSIX Yacc does not support string literals")); + RETURN_VALUE (TSTRING, last_string); + } + <> unexpected_eof (token_start, "\""); + "\n" unexpected_newline (token_start, "\""); +} + /*----------------------------------------------------------. | Scanning a Bison character literal, decoding its escapes. | | The initial quote is already eaten. | @@ -601,7 +616,7 @@ eqopt ({sp}=)? | Decode escaped characters. | `----------------------------*/ - + { \\[0-7]{1,3} { verify (UCHAR_MAX < ULONG_MAX); @@ -797,7 +812,7 @@ eqopt ({sp}=)? | By default, grow the string obstack with the input. | `-----------------------------------------------------*/ - + { /* Accept multibyte characters in one block instead of byte after byte, so that add_column_width and mbsnwidth can compute correct diff --git a/src/symtab.c b/src/symtab.c index 35c0930e..72833653 100644 --- a/src/symtab.c +++ b/src/symtab.c @@ -109,6 +109,7 @@ symbol_new (uniqstr tag, location loc) res->tag = tag; res->location = loc; + res->translatable = false; res->location_of_lhs = false; res->alias = NULL; res->content = sym_content_new (res); @@ -954,7 +955,7 @@ dummy_symbol_get (location loc) } bool -symbol_is_dummy (const symbol *sym) +symbol_is_dummy (symbol const *sym) { return sym->tag[0] == '@' || (sym->tag[0] == '$' && sym->tag[1] == '@'); } diff --git a/src/symtab.h b/src/symtab.h index a92e00a9..66017525 100644 --- a/src/symtab.h +++ b/src/symtab.h @@ -97,6 +97,9 @@ struct symbol /** The "defining" location. */ location location; + /** Whether this symbol is translatable. */ + bool translatable; + /** Whether \a location is about the first uses as left-hand side symbol of a rule (true), or simply the first occurrence (e.g., in a %type, or as a rhs symbol of a rule). The former type of @@ -117,6 +120,8 @@ struct symbol struct sym_content { + /** The main symbol that denotes this content (it contains the + possible alias). */ symbol *symbol; /** Its \c \%type. @@ -179,7 +184,7 @@ symbol *dummy_symbol_get (location loc); void symbol_print (symbol const *s, FILE *f); /** Is this a dummy nonterminal? */ -bool symbol_is_dummy (const symbol *sym); +bool symbol_is_dummy (symbol const *sym); /** The name of the code_props type: "\%destructor" or "\%printer". */ char const *code_props_type_string (code_props_type kind);