From 2bc886dc02f00e7ebbaa008af8fdfce45cebadcd Mon Sep 17 00:00:00 2001 From: Adela Vais Date: Fri, 11 Sep 2020 15:53:45 +0300 Subject: [PATCH] d: make enum SymbolKind idiomatic D Taking into account comments from H. S. Teoh. https://lists.gnu.org/r/bison-patches/2020-09/msg00021.html * data/skeletons/d.m4, data/skeletons/lalr1.d (SymbolKind): Wrap the enum in a structure that contains its string representation. --- TODO | 84 ------------------------------------------ data/skeletons/d.m4 | 65 ++++++++++++++++++++++++++++++-- data/skeletons/lalr1.d | 64 +++++--------------------------- 3 files changed, 71 insertions(+), 142 deletions(-) diff --git a/TODO b/TODO index 98849e7a..3f4d1978 100644 --- a/TODO +++ b/TODO @@ -242,90 +242,6 @@ are. Keep the same variable names. If you change the wording in one place, do it in the others too. In other words: make sure to keep the maintenance *simple* by avoiding any gratuitous difference. -** Rename the D example -Move the current content of examples/d into examples/d/simple. - -** Create a second example -Duplicate examples/d/simple into examples/d/calc. - -** Add location tracking to d/calc -Look at the examples in the other languages to see how to do that. - -** yysymbol_name -The SymbolKind is an enum. For a given SymbolKind we want to get its string -representation. Currently it's a separate table in the parser that does -that: - - /* Symbol kinds. */ - public enum SymbolKind - { - S_YYEMPTY = -2, /* No symbol. */ - S_YYEOF = 0, /* "end of file" */ - S_YYerror = 1, /* error */ - S_YYUNDEF = 2, /* "invalid token" */ - S_EQ = 3, /* "=" */ - ... - S_input = 14, /* input */ - S_line = 15, /* line */ - S_exp = 16, /* exp */ - }; - - ... - - /* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. - First, the terminals, then, starting at \a yyntokens_, nonterminals. */ - private static immutable string[] yytname_ = - [ - "\"end of file\"", "error", "\"invalid token\"", "\"=\"", "\"+\"", - "\"-\"", "\"*\"", "\"/\"", "\"(\"", "\")\"", "\"end of line\"", - "\"number\"", "UNARY", "$accept", "input", "line", "exp", null - ]; - - ... - -So to get a symbol kind, one runs `yytname_[yykind]`. - -Is there a way to attach this conversion to string to SymbolKind? In Java -for instance, we have: - - public enum SymbolKind - { - S_YYEOF(0), /* "end of file" */ - S_YYerror(1), /* error */ - S_YYUNDEF(2), /* "invalid token" */ - ... - S_input(16), /* input */ - S_line(17), /* line */ - S_exp(18); /* exp */ - - private final int yycode_; - - SymbolKind (int n) { - this.yycode_ = n; - } - ... - /* YYNAMES_[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. - First, the terminals, then, starting at \a YYNTOKENS_, nonterminals. */ - private static final String[] yynames_ = yynames_init(); - private static final String[] yynames_init() - { - return new String[] - { - i18n("end of file"), i18n("error"), i18n("invalid token"), "!", "+", "-", "*", - "/", "^", "(", ")", "=", i18n("end of line"), i18n("number"), "NEG", - "$accept", "input", "line", "exp", null - }; - } - - /* The user-facing name of this symbol. */ - public final String getName() { - return yynames_[yycode_]; - } - }; - -which allows to write more naturally `yykind.getName()` rather than -`yytname_[yykind]`. Is there something comparable in (idiomatic) D? - ** Change the return value of yylex Historically people were allowed to return any int from the scanner (which is convenient and allows `return '+'` from the scanner). Akim tends to see diff --git a/data/skeletons/d.m4 b/data/skeletons/d.m4 index edb0c49e..7a9d3efe 100644 --- a/data/skeletons/d.m4 +++ b/data/skeletons/d.m4 @@ -204,13 +204,72 @@ m4_define([b4_symbol_enum], # to use a signed type, which matters for yytoken. m4_define([b4_declare_symbol_enum], [[ /* Symbol kinds. */ - public enum SymbolKind + struct SymbolKind { + enum + { ]b4_symbol(-2, kind_base)[ = -2, /* No symbol. */ ]b4_symbol_foreach([b4_symbol_enum])dnl -[ }; -]])]) +[ } + private int yycode_; + alias yycode_ this; + + this(int code) + { + yycode_ = code; + } + + /* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. + First, the terminals, then, starting at \a YYNTOKENS_, nonterminals. */ + static immutable string[] yytname_ = @{ + ]b4_tname[ + @}; + + /* Return YYSTR after stripping away unnecessary quotes and + backslashes, so that it's suitable for yyerror. The heuristic is + that double-quoting is unnecessary unless the string contains an + apostrophe, a comma, or backslash (other than backslash-backslash). + YYSTR is taken from yytname. */ + final void toString(W)(W sink) const + if (isOutputRange!(W, char)) + { + string yystr = yytname_[yycode_]; + + if (yystr[0] == '"') + { + string yyr; + strip_quotes: + for (int i = 1; i < yystr.length; i++) + switch (yystr[i]) + { + case '\'': + case ',': + break strip_quotes; + + case '\\': + if (yystr[++i] != '\\') + break strip_quotes; + goto default; + default: + yyr ~= yystr[i]; + break; + + case '"': + put(sink, yyr); + return; + } + } + else if (yystr == "$end") + { + put(sink, "end of input"); + return; + } + + put(sink, yystr); + } + } +]]) # b4-case(ID, CODE, [COMMENTS]) diff --git a/data/skeletons/lalr1.d b/data/skeletons/lalr1.d index 761125cb..e879dabf 100644 --- a/data/skeletons/lalr1.d +++ b/data/skeletons/lalr1.d @@ -361,41 +361,6 @@ b4_user_union_members return YYNEWSTATE; } - /* Return YYSTR after stripping away unnecessary quotes and - backslashes, so that it's suitable for yyerror. The heuristic is - that double-quoting is unnecessary unless the string contains an - apostrophe, a comma, or backslash (other than backslash-backslash). - YYSTR is taken from yytname. */ - private final string yytnamerr_ (string yystr) - { - if (yystr[0] == '"') - { - string yyr; - strip_quotes: - for (int i = 1; i < yystr.length; i++) - switch (yystr[i]) - { - case '\'': - case ',': - break strip_quotes; - - case '\\': - if (yystr[++i] != '\\') - break strip_quotes; - goto default; - default: - yyr ~= yystr[i]; - break; - - case '"': - return yyr; - } - } - else if (yystr == "$end") - return "end of input"; - - return yystr; - } ]b4_parse_trace_if([[ /*--------------------------------. | Print this symbol on YYOUTPUT. | @@ -408,12 +373,12 @@ b4_locations_if([, ref ]b4_location_type[ yylocationp])[) if (0 < yydebug) { string message = s ~ (yykind < yyntokens_ ? " token " : " nterm ") - ~ yytname_[yykind] ~ " ("]b4_locations_if([ + ~ format("%s", yykind) ~ " ("]b4_locations_if([ ~ yylocationp.toString() ~ ": "])[; - static if (__traits(compiles, message ~= yyvaluep.toString ())) - message ~= yyvaluep.toString (); + static if (__traits(compiles, message ~= yyvaluep.toString())) + message ~= yyvaluep.toString(); else - message ~= format ("%s", &yyvaluep); + message ~= format("%s", &yyvaluep); message ~= ")"; yycdebugln (message); } @@ -732,7 +697,7 @@ m4_popdef([b4_at_dollar])])dnl // FIXME: This method of building the message is not compatible // with internationalization. string res = "syntax error, unexpected "; - res ~= yytnamerr_ (yytname_[tok]); + res ~= format!"%s"(tok); int yyn = yypact_[yystate]; if (!yy_pact_value_is_default_ (yyn)) { @@ -757,7 +722,7 @@ m4_popdef([b4_at_dollar])])dnl && !yy_table_value_is_error_ (yytable_[x + yyn])) { res ~= count++ == 0 ? ", expecting " : " or "; - res ~= yytnamerr_ (yytname_[x]); + res ~= format!"%s"(SymbolKind(x)); } } } @@ -795,13 +760,6 @@ m4_popdef([b4_at_dollar])])dnl ]b4_parser_tables_define[ - /* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. - First, the terminals, then, starting at \a yyntokens_, nonterminals. */ - private static immutable string[] yytname_ = - @{ - ]b4_tname[ - @}; - ]b4_parse_trace_if([[ /* YYRLINE[YYN] -- Source line where rule number YYN was defined. */ private static immutable ]b4_int_type_for([b4_rline])[[] yyrline_ = @@ -831,11 +789,10 @@ m4_popdef([b4_at_dollar])])dnl } ]])[ - private static SymbolKind yytranslate_ (int t) + private static auto yytranslate_ (int t) { ]b4_api_token_raw_if( -[[ import std.conv : to; - return to!SymbolKind (t);]], +[[ return SymbolKind(t);]], [[ /* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */ immutable ]b4_int_type_for([b4_translate])[[] translate_table = @{ @@ -848,10 +805,7 @@ m4_popdef([b4_at_dollar])])dnl if (t <= 0) return ]b4_symbol(0, kind)[; else if (t <= code_max) - { - import std.conv : to; - return to!SymbolKind (translate_table[t]); - } + return SymbolKind(translate_table[t]); else return ]b4_symbol(2, kind)[;]])[ }