d: make enum SymbolKind idiomatic D

Taking into account comments from H. S. Teoh.
https://lists.gnu.org/r/bison-patches/2020-09/msg00021.html

* data/skeletons/d.m4, data/skeletons/lalr1.d (SymbolKind): Wrap the
enum in a structure that contains its string representation.
This commit is contained in:
Adela Vais
2020-09-11 15:53:45 +03:00
committed by Akim Demaille
parent b5e6d9c4ca
commit 2bc886dc02
3 changed files with 71 additions and 142 deletions

84
TODO
View File

@@ -242,90 +242,6 @@ are. Keep the same variable names. If you change the wording in one place,
do it in the others too. In other words: make sure to keep the
maintenance *simple* by avoiding any gratuitous difference.
** Rename the D example
Move the current content of examples/d into examples/d/simple.
** Create a second example
Duplicate examples/d/simple into examples/d/calc.
** Add location tracking to d/calc
Look at the examples in the other languages to see how to do that.
** yysymbol_name
The SymbolKind is an enum. For a given SymbolKind we want to get its string
representation. Currently it's a separate table in the parser that does
that:
/* Symbol kinds. */
public enum SymbolKind
{
S_YYEMPTY = -2, /* No symbol. */
S_YYEOF = 0, /* "end of file" */
S_YYerror = 1, /* error */
S_YYUNDEF = 2, /* "invalid token" */
S_EQ = 3, /* "=" */
...
S_input = 14, /* input */
S_line = 15, /* line */
S_exp = 16, /* exp */
};
...
/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
First, the terminals, then, starting at \a yyntokens_, nonterminals. */
private static immutable string[] yytname_ =
[
"\"end of file\"", "error", "\"invalid token\"", "\"=\"", "\"+\"",
"\"-\"", "\"*\"", "\"/\"", "\"(\"", "\")\"", "\"end of line\"",
"\"number\"", "UNARY", "$accept", "input", "line", "exp", null
];
...
So to get a symbol kind, one runs `yytname_[yykind]`.
Is there a way to attach this conversion to string to SymbolKind? In Java
for instance, we have:
public enum SymbolKind
{
S_YYEOF(0), /* "end of file" */
S_YYerror(1), /* error */
S_YYUNDEF(2), /* "invalid token" */
...
S_input(16), /* input */
S_line(17), /* line */
S_exp(18); /* exp */
private final int yycode_;
SymbolKind (int n) {
this.yycode_ = n;
}
...
/* YYNAMES_[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
First, the terminals, then, starting at \a YYNTOKENS_, nonterminals. */
private static final String[] yynames_ = yynames_init();
private static final String[] yynames_init()
{
return new String[]
{
i18n("end of file"), i18n("error"), i18n("invalid token"), "!", "+", "-", "*",
"/", "^", "(", ")", "=", i18n("end of line"), i18n("number"), "NEG",
"$accept", "input", "line", "exp", null
};
}
/* The user-facing name of this symbol. */
public final String getName() {
return yynames_[yycode_];
}
};
which allows to write more naturally `yykind.getName()` rather than
`yytname_[yykind]`. Is there something comparable in (idiomatic) D?
** Change the return value of yylex
Historically people were allowed to return any int from the scanner (which
is convenient and allows `return '+'` from the scanner). Akim tends to see

View File

@@ -204,13 +204,72 @@ m4_define([b4_symbol_enum],
# to use a signed type, which matters for yytoken.
m4_define([b4_declare_symbol_enum],
[[ /* Symbol kinds. */
public enum SymbolKind
struct SymbolKind
{
enum
{
]b4_symbol(-2, kind_base)[ = -2, /* No symbol. */
]b4_symbol_foreach([b4_symbol_enum])dnl
[ };
]])])
[ }
private int yycode_;
alias yycode_ this;
this(int code)
{
yycode_ = code;
}
/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
First, the terminals, then, starting at \a YYNTOKENS_, nonterminals. */
static immutable string[] yytname_ = @{
]b4_tname[
@};
/* Return YYSTR after stripping away unnecessary quotes and
backslashes, so that it's suitable for yyerror. The heuristic is
that double-quoting is unnecessary unless the string contains an
apostrophe, a comma, or backslash (other than backslash-backslash).
YYSTR is taken from yytname. */
final void toString(W)(W sink) const
if (isOutputRange!(W, char))
{
string yystr = yytname_[yycode_];
if (yystr[0] == '"')
{
string yyr;
strip_quotes:
for (int i = 1; i < yystr.length; i++)
switch (yystr[i])
{
case '\'':
case ',':
break strip_quotes;
case '\\':
if (yystr[++i] != '\\')
break strip_quotes;
goto default;
default:
yyr ~= yystr[i];
break;
case '"':
put(sink, yyr);
return;
}
}
else if (yystr == "$end")
{
put(sink, "end of input");
return;
}
put(sink, yystr);
}
}
]])
# b4-case(ID, CODE, [COMMENTS])

View File

@@ -361,41 +361,6 @@ b4_user_union_members
return YYNEWSTATE;
}
/* Return YYSTR after stripping away unnecessary quotes and
backslashes, so that it's suitable for yyerror. The heuristic is
that double-quoting is unnecessary unless the string contains an
apostrophe, a comma, or backslash (other than backslash-backslash).
YYSTR is taken from yytname. */
private final string yytnamerr_ (string yystr)
{
if (yystr[0] == '"')
{
string yyr;
strip_quotes:
for (int i = 1; i < yystr.length; i++)
switch (yystr[i])
{
case '\'':
case ',':
break strip_quotes;
case '\\':
if (yystr[++i] != '\\')
break strip_quotes;
goto default;
default:
yyr ~= yystr[i];
break;
case '"':
return yyr;
}
}
else if (yystr == "$end")
return "end of input";
return yystr;
}
]b4_parse_trace_if([[
/*--------------------------------.
| Print this symbol on YYOUTPUT. |
@@ -408,12 +373,12 @@ b4_locations_if([, ref ]b4_location_type[ yylocationp])[)
if (0 < yydebug)
{
string message = s ~ (yykind < yyntokens_ ? " token " : " nterm ")
~ yytname_[yykind] ~ " ("]b4_locations_if([
~ format("%s", yykind) ~ " ("]b4_locations_if([
~ yylocationp.toString() ~ ": "])[;
static if (__traits(compiles, message ~= yyvaluep.toString ()))
message ~= yyvaluep.toString ();
static if (__traits(compiles, message ~= yyvaluep.toString()))
message ~= yyvaluep.toString();
else
message ~= format ("%s", &yyvaluep);
message ~= format("%s", &yyvaluep);
message ~= ")";
yycdebugln (message);
}
@@ -732,7 +697,7 @@ m4_popdef([b4_at_dollar])])dnl
// FIXME: This method of building the message is not compatible
// with internationalization.
string res = "syntax error, unexpected ";
res ~= yytnamerr_ (yytname_[tok]);
res ~= format!"%s"(tok);
int yyn = yypact_[yystate];
if (!yy_pact_value_is_default_ (yyn))
{
@@ -757,7 +722,7 @@ m4_popdef([b4_at_dollar])])dnl
&& !yy_table_value_is_error_ (yytable_[x + yyn]))
{
res ~= count++ == 0 ? ", expecting " : " or ";
res ~= yytnamerr_ (yytname_[x]);
res ~= format!"%s"(SymbolKind(x));
}
}
}
@@ -795,13 +760,6 @@ m4_popdef([b4_at_dollar])])dnl
]b4_parser_tables_define[
/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
First, the terminals, then, starting at \a yyntokens_, nonterminals. */
private static immutable string[] yytname_ =
@{
]b4_tname[
@};
]b4_parse_trace_if([[
/* YYRLINE[YYN] -- Source line where rule number YYN was defined. */
private static immutable ]b4_int_type_for([b4_rline])[[] yyrline_ =
@@ -831,11 +789,10 @@ m4_popdef([b4_at_dollar])])dnl
}
]])[
private static SymbolKind yytranslate_ (int t)
private static auto yytranslate_ (int t)
{
]b4_api_token_raw_if(
[[ import std.conv : to;
return to!SymbolKind (t);]],
[[ return SymbolKind(t);]],
[[ /* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */
immutable ]b4_int_type_for([b4_translate])[[] translate_table =
@{
@@ -848,10 +805,7 @@ m4_popdef([b4_at_dollar])])dnl
if (t <= 0)
return ]b4_symbol(0, kind)[;
else if (t <= code_max)
{
import std.conv : to;
return to!SymbolKind (translate_table[t]);
}
return SymbolKind(translate_table[t]);
else
return ]b4_symbol(2, kind)[;]])[
}