doc: java: SymbolKind, etc.

Why didn't I think about this before???  symbolName should be a method
of SymbolKind.

* data/skeletons/lalr1.java (YYParser::yysymbolName): Move as...
* data/skeletons/java.m4 (SymbolKind::getName): this.
Make the table a static final table, not a local variable.
Adjust dependencies.
* doc/bison.texi (Java Parser Interface): Document i18n.
(Java Parser Context Interface): Document SymbolKind.
* examples/java/calc/Calc.y, tests/local.at: Adjust.
This commit is contained in:
Akim Demaille
2020-04-13 16:12:36 +02:00
parent 9a33570493
commit 258c2c967f
6 changed files with 151 additions and 121 deletions

25
TODO
View File

@@ -1,8 +1,21 @@
* Bison 3.6 * Bison 3.6
** Questions
*** Java
- Should i18n be part of the Lexer? Currently it's a static method of
Lexer.
- is there a migration path that would allow to use TokenKinds in
yylex?
*** D
- is there a way to attach yysymbol_name to the enum itself? As we did
in Java.
- It would be better to have TokenKind as return value. Can we use
reflection to support both output types?
** Documentation ** Documentation
- yyexpected_tokens/expected_tokens/expectedTokens in all the languages.
- YYERRCODE, YYUNDEF, YYEOF - YYERRCODE, YYUNDEF, YYEOF
- i18n in Java
- symbol.type_get should be kind_get, and it's not documented. - symbol.type_get should be kind_get, and it's not documented.
- YYERRCODE and "end of file" and translation - YYERRCODE and "end of file" and translation
@@ -11,9 +24,6 @@ You can explicitly specify the numeric code for a token type...
The token numbered as 0. The token numbered as 0.
Therefore each time the scanner returns an (external) token number,
it must be mapped to the (internal) symbol number.
** Java: EOF ** Java: EOF
We should be able to redefine EOF like we do in C. We should be able to redefine EOF like we do in C.
@@ -120,11 +130,6 @@ https://www.cs.tufts.edu/~nr/cs257/archive/clinton-jefferey/lr-error-messages.pd
https://research.swtch.com/yyerror https://research.swtch.com/yyerror
http://gallium.inria.fr/~fpottier/publis/fpottier-reachability-cc2016.pdf http://gallium.inria.fr/~fpottier/publis/fpottier-reachability-cc2016.pdf
* D
** yylex
It would be better to have TokenKind as return value. Can we use reflexion
to support both output types?
* Modernization * Modernization
Fix data/skeletons/yacc.c so that it defines YYPTRDIFF_T properly for modern Fix data/skeletons/yacc.c so that it defines YYPTRDIFF_T properly for modern
and older C++ compilers. Currently the code defaults to defining it to and older C++ compilers. Currently the code defaults to defining it to

View File

@@ -174,10 +174,10 @@ m4_define([b4_declare_symbol_enum],
{ {
]b4_symbol_foreach([b4_symbol_enum])[ ]b4_symbol_foreach([b4_symbol_enum])[
private final int code_; private final int yycode_;
SymbolKind (int n) { SymbolKind (int n) {
this.code_ = n; this.yycode_ = n;
} }
private static final SymbolKind[] values_ = { private static final SymbolKind[] values_ = {
@@ -185,13 +185,66 @@ m4_define([b4_declare_symbol_enum],
], b4_symbol_numbers)[ ], b4_symbol_numbers)[
}; };
static final SymbolKind get (int code) { static final SymbolKind get(int code) {
return values_[code]; return values_[code];
} }
public final int getCode () { public final int getCode() {
return this.code_; return this.yycode_;
} }
]b4_parse_error_bmatch(
[simple\|verbose],
[[ /* Return YYSTR after stripping away unnecessary quotes and
backslashes, so that it's suitable for yyerror. The heuristic is
that double-quoting is unnecessary unless the string contains an
apostrophe, a comma, or backslash (other than backslash-backslash).
YYSTR is taken from yytname. */
private static String yytnamerr_(String yystr)
{
if (yystr.charAt (0) == '"')
{
StringBuffer yyr = new StringBuffer();
strip_quotes: for (int i = 1; i < yystr.length(); i++)
switch (yystr.charAt(i))
{
case '\'':
case ',':
break strip_quotes;
case '\\':
if (yystr.charAt(++i) != '\\')
break strip_quotes;
/* Fall through. */
default:
yyr.append(yystr.charAt(i));
break;
case '"':
return yyr.toString();
}
}
return yystr;
}
/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
First, the terminals, then, starting at \a YYNTOKENS_, nonterminals. */
]b4_typed_parser_table_define([String], [tname], [b4_tname])[
/* The user-facing name of this symbol. */
public final String getName() {
return yytnamerr_(yytname_[yycode_]);
}
]],
[custom\|detailed],
[[ /* YYNAMES_[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
First, the terminals, then, starting at \a YYNTOKENS_, nonterminals. */
]b4_typed_parser_table_define([String], [names], [b4_symbol_names])[
/* The user-facing name of this symbol. */
public final String getName() {
return yynames_[yycode_];
}]])[
}; };
]])]) ]])])

View File

@@ -507,7 +507,7 @@ import java.text.MessageFormat;
b4_locations_if([, Object yylocationp])[) b4_locations_if([, Object yylocationp])[)
{ {
yycdebug (s + (yykind.getCode () < YYNTOKENS_ ? " token " : " nterm ") yycdebug (s + (yykind.getCode () < YYNTOKENS_ ? " token " : " nterm ")
+ yysymbolName (yykind) + " ("]b4_locations_if([ + yykind.getName() + " ("]b4_locations_if([
+ yylocationp + ": "])[ + yylocationp + ": "])[
+ (yyvaluep == null ? "(null)" : yyvaluep.toString ()) + ")"); + (yyvaluep == null ? "(null)" : yyvaluep.toString ()) + ")");
}]])[ }]])[
@@ -880,7 +880,7 @@ b4_dollar_popdef[]dnl
/** /**
* The symbol kind of the lookahead token. * The symbol kind of the lookahead token.
*/ */
public SymbolKind getToken () public final SymbolKind getToken ()
{ {
return yytoken; return yytoken;
} }
@@ -890,7 +890,7 @@ b4_dollar_popdef[]dnl
/** /**
* The location of the lookahead. * The location of the lookahead.
*/ */
public ]b4_location_type[ getLocation () public final ]b4_location_type[ getLocation ()
{ {
return yylocation; return yylocation;
} }
@@ -937,15 +937,6 @@ b4_dollar_popdef[]dnl
} }
return yycount - yyoffset; return yycount - yyoffset;
} }
/**
* The user-facing name of the symbol whose (internal) number is
* YYSYMBOL. No bounds checking.
*/
static String yysymbolName (SymbolKind yysymbol)
{
return ]b4_parser_class[.yysymbolName (yysymbol);
}
} }
]b4_parse_error_bmatch( ]b4_parse_error_bmatch(
@@ -1005,7 +996,7 @@ b4_dollar_popdef[]dnl
int yycount = yysyntaxErrorArguments (yyctx, yyarg, argmax); int yycount = yysyntaxErrorArguments (yyctx, yyarg, argmax);
String[] yystr = new String[yycount]; String[] yystr = new String[yycount];
for (int yyi = 0; yyi < yycount; ++yyi) for (int yyi = 0; yyi < yycount; ++yyi)
yystr[yyi] = yysymbolName (yyarg[yyi]); yystr[yyi] = yyarg[yyi].getName();
String yyformat; String yyformat;
switch (yycount) switch (yycount)
{ {
@@ -1049,63 +1040,6 @@ b4_dollar_popdef[]dnl
]b4_parser_tables_define[ ]b4_parser_tables_define[
]b4_parse_error_bmatch(
[simple\|verbose],
[[ /* Return YYSTR after stripping away unnecessary quotes and
backslashes, so that it's suitable for yyerror. The heuristic is
that double-quoting is unnecessary unless the string contains an
apostrophe, a comma, or backslash (other than backslash-backslash).
YYSTR is taken from yytname. */
private static String yytnamerr_ (String yystr)
{
if (yystr.charAt (0) == '"')
{
StringBuffer yyr = new StringBuffer ();
strip_quotes: for (int i = 1; i < yystr.length (); i++)
switch (yystr.charAt (i))
{
case '\'':
case ',':
break strip_quotes;
case '\\':
if (yystr.charAt(++i) != '\\')
break strip_quotes;
/* Fall through. */
default:
yyr.append (yystr.charAt (i));
break;
case '"':
return yyr.toString ();
}
}
return yystr;
}
/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
First, the terminals, then, starting at \a YYNTOKENS_, nonterminals. */
]b4_typed_parser_table_define([String], [tname], [b4_tname])[
/* The user-facing name of the symbol whose (internal) number is
YYSYMBOL. No bounds checking. */
static String yysymbolName (SymbolKind yysymbol)
{
return yytnamerr_ (yytname_[yysymbol.getCode ()]);
}
]],
[custom\|detailed],
[[ /* The user-facing name of the symbol whose (internal) number is
YYSYMBOL. No bounds checking. */
static String yysymbolName (SymbolKind yysymbol)
{
String[] yy_sname =
{
]b4_symbol_names[
};
return yy_sname[yysymbol.getCode ()];
}]])[
]b4_parse_trace_if([[ ]b4_parse_trace_if([[
]b4_integral_parser_table_define([rline], [b4_rline], ]b4_integral_parser_table_define([rline], [b4_rline],
[[YYRLINE[YYN] -- Source line where rule number YYN was defined.]])[ [[YYRLINE[YYN] -- Source line where rule number YYN was defined.]])[

View File

@@ -13125,6 +13125,22 @@ or nonzero, full tracing.
Identify the Bison version and skeleton used to generate this parser. Identify the Bison version and skeleton used to generate this parser.
@end deftypecv @end deftypecv
If you enabled token internationalization (@pxref{Token I18n}), you must
provide the parser with the following function:
@deftypecv {Static Method} {YYParser} {String} {i18n} (@code{string} @var{s})
Return the translation of @var{s} in the user's language. As an example:
@example
%code @{
static ResourceBundle myResources
= ResourceBundle.getBundle("domain-name");
static final String i18n(String s) @{
return myResources.getString(s);
@}
@}
@end example
@end deftypecv
@node Java Parser Context Interface @node Java Parser Context Interface
@subsection Java Parser Context Interface @subsection Java Parser Context Interface
@@ -13132,9 +13148,35 @@ Identify the Bison version and skeleton used to generate this parser.
The parser context provides information to build error reports when you The parser context provides information to build error reports when you
invoke @samp{%define parse.error custom}. invoke @samp{%define parse.error custom}.
@defcv {Type} {YYParser} {SymbolKind}
An enum that includes all the grammar symbols, tokens and nonterminals. Its
enumerators are forged from the symbol names:
@example
public enum SymbolKind
@{
S_YYEOF(0), /* "end of file" */
S_YYERROR(1), /* error */
S_YYUNDEF(2), /* "invalid token" */
S_BANG(3), /* "!" */
S_PLUS(4), /* "+" */
S_MINUS(5), /* "-" */
[...]
S_NUM(13), /* "number" */
S_NEG(14), /* NEG */
S_YYACCEPT(15), /* $accept */
S_input(16), /* input */
S_line(17); /* line */
@};
@end example
@end defcv
@deftypemethod {YYParser.SymbolKind} {String} getName ()
The name of this symbol, possibly translated.
@end deftypemethod
@deftypemethod {YYParser.Context} {YYParser.SymbolKind} getToken () @deftypemethod {YYParser.Context} {YYParser.SymbolKind} getToken ()
The kind of the lookahead. Maybe return @code{null} when there is no The kind of the lookahead. Return @code{null} iff there is no lookahead.
lookahead.
@end deftypemethod @end deftypemethod
@deftypemethod {YYParser.Context} {YYParser.Location} getLocation () @deftypemethod {YYParser.Context} {YYParser.Location} getLocation ()
@@ -13143,14 +13185,12 @@ The location of the lookahead.
@deftypemethod {YYParser.Context} {int} getExpectedTokens (@code{YYParser.SymbolKind[]} @var{argv}, @code{int} @var{argc}) @deftypemethod {YYParser.Context} {int} getExpectedTokens (@code{YYParser.SymbolKind[]} @var{argv}, @code{int} @var{argc})
Fill @var{argv} with the expected tokens, which never includes Fill @var{argv} with the expected tokens, which never includes
@code{YYSYMBOL_YYEMPTY}, @code{YYSYMBOL_YYERROR}, or @code{SymbolKind.S_YYERROR}, or @code{SymbolKind.S_YYUNDEF}.
@code{YYSYMBOL_YYUNDEF}.
Never put more than @var{argc} elements into @var{argv}, and on success Never put more than @var{argc} elements into @var{argv}, and on success
return the effective number of tokens stored in @var{argv}. Return 0 if return the effective number of tokens stored in @var{argv}. Return 0 if
there are more than @var{argc} expected tokens, yet fill @var{argv} up to there are more than @var{argc} expected tokens, yet fill @var{argv} up to
@var{argc}. When LAC is enabled, may return a negative number on errors, @var{argc}.
such as @code{YYENOMEM} on memory exhaustion.
If @var{argv} is null, return the size needed to store all the possible If @var{argv} is null, return the size needed to store all the possible
values, which is always less than @code{YYNTOKENS}. values, which is always less than @code{YYNTOKENS}.
@@ -13227,28 +13267,28 @@ Declarations}), then the parser no longer passes syntax error messages to
Whether it uses @code{yyerror} is up to the user. Whether it uses @code{yyerror} is up to the user.
Here is a typical example of a reporting function. Here is an example of a reporting function (@pxref{Java Parser Context
Interface}).
@example @example
public void yyreportSyntaxError (YYParser.Context ctx) public void reportSyntaxError(YYParser.Context ctx) @{
@{ System.err.print(ctx.getLocation() + ": syntax error");
System.err.print (ctx.getLocation () + ": syntax error");
// Report the expected tokens. // Report the expected tokens.
@{ @{
final int TOKENMAX = 5; final int TOKENMAX = 5;
YYParser.SymbolKind[] arg = new YYParser.SymbolKind[TOKENMAX]; YYParser.SymbolKind[] arg = new YYParser.SymbolKind[TOKENMAX];
int n = ctx.getExpectedTokens (arg, TOKENMAX); int n = ctx.getExpectedTokens(arg, TOKENMAX);
for (int i = 0; i < n; ++i) for (int i = 0; i < n; ++i)
System.err.print ((i == 0 ? ": expected " : " or ") System.err.print((i == 0 ? ": expected " : " or ")
+ ctx.yysymbolName (arg[i])); + arg[i].getName());
@} @}
// Report the unexpected token which triggered the error. // Report the unexpected token which triggered the error.
@{ @{
YYParser.SymbolKind lookahead = ctx.getToken (); YYParser.SymbolKind lookahead = ctx.getToken();
if (lookahead != null) if (lookahead != null)
System.err.print (" before " + ctx.yysymbolName (lookahead)); System.err.print(" before " + lookahead.getName());
@} @}
System.err.println (""); System.err.println("");
@} @}
@end example @end example
@end deftypemethod @end deftypemethod

View File

@@ -121,12 +121,12 @@ class CalcLexer implements Calc.Lexer {
int n = ctx.getExpectedTokens(arg, TOKENMAX); int n = ctx.getExpectedTokens(arg, TOKENMAX);
for (int i = 0; i < n; ++i) for (int i = 0; i < n; ++i)
System.err.print((i == 0 ? ": expected " : " or ") System.err.print((i == 0 ? ": expected " : " or ")
+ ctx.yysymbolName(arg[i])); + arg[i].getName());
} }
{ {
Calc.SymbolKind lookahead = ctx.getToken(); Calc.SymbolKind lookahead = ctx.getToken();
if (lookahead != null) if (lookahead != null)
System.err.print(" before " + ctx.yysymbolName(lookahead)); System.err.print(" before " + lookahead.getName());
} }
System.err.println(""); System.err.println("");
} }

View File

@@ -958,13 +958,13 @@ class PositionReader extends BufferedReader {
# FIXME: We should not hard-code "Calc". # FIXME: We should not hard-code "Calc".
m4_define([AT_YYERROR_DEFINE(java)], m4_define([AT_YYERROR_DEFINE(java)],
[AT_LOCATION_IF([[public void yyerror (Calc.Location l, String m) [AT_LOCATION_IF([[public void yyerror (Calc.Location l, String m)
{]m4_bmatch(m4_defn([AT_PARSE_PARAMS]), [nerrs],[[ {]m4_bmatch(m4_defn([AT_PARSE_PARAMS]), [nerrs], [[
++global_nerrs; ++global_nerrs;
++*nerrs;]])[ ++*nerrs;]])[
if (l == null) if (l == null)
System.err.println (m); System.err.println(m);
else else
System.err.println (l + ": " + m); System.err.println(l + ": " + m);
} }
]], [[ ]], [[
public void yyerror (String m) public void yyerror (String m)
@@ -976,27 +976,25 @@ m4_define([AT_YYERROR_DEFINE(java)],
]])[ ]])[
]AT_ERROR_CUSTOM_IF([[ ]AT_ERROR_CUSTOM_IF([[
public void reportSyntaxError (Calc.Context ctx) public void reportSyntaxError(Calc.Context ctx) {
{ System.err.print(]AT_LOCATION_IF([[ctx.getLocation() + ": "
System.err.print (]AT_LOCATION_IF([[ctx.getLocation () + ": "]] + ]])["syntax error");
+ )["syntax error");
{ {
Calc.SymbolKind token = ctx.getToken (); Calc.SymbolKind token = ctx.getToken();
if (token != null) if (token != null)
System.err.print (" on token @<:@" + ctx.yysymbolName (token) + "@:>@"); System.err.print(" on token @<:@" + token.getName() + "@:>@");
} }
{ {
Calc.SymbolKind[] arg = new Calc.SymbolKind[ctx.NTOKENS]; Calc.SymbolKind[] arg = new Calc.SymbolKind[ctx.NTOKENS];
int n = ctx.getExpectedTokens (arg, ctx.NTOKENS); int n = ctx.getExpectedTokens(arg, ctx.NTOKENS);
if (0 < n) if (0 < n) {
{ System.err.print(" (expected:");
System.err.print (" (expected:"); for (int i = 0; i < n; ++i)
for (int i = 0; i < n; ++i) System.err.print(" @<:@" + arg[i].getName() + "@:>@");
System.err.print (" @<:@" + ctx.yysymbolName (arg[i]) + "@:>@"); System.err.print(")");
System.err.print (")"); }
}
} }
System.err.println (""); System.err.println("");
} }
]]) ]])
]) ])