doc: java: SymbolKind, etc.

Why didn't I think about this before???  symbolName should be a method
of SymbolKind.

* data/skeletons/lalr1.java (YYParser::yysymbolName): Move as...
* data/skeletons/java.m4 (SymbolKind::getName): this.
Make the table a static final table, not a local variable.
Adjust dependencies.
* doc/bison.texi (Java Parser Interface): Document i18n.
(Java Parser Context Interface): Document SymbolKind.
* examples/java/calc/Calc.y, tests/local.at: Adjust.
This commit is contained in:
Akim Demaille
2020-04-13 16:12:36 +02:00
parent 9a33570493
commit 258c2c967f
6 changed files with 151 additions and 121 deletions

25
TODO
View File

@@ -1,8 +1,21 @@
* Bison 3.6
** Questions
*** Java
- Should i18n be part of the Lexer? Currently it's a static method of
Lexer.
- is there a migration path that would allow to use TokenKinds in
yylex?
*** D
- is there a way to attach yysymbol_name to the enum itself? As we did
in Java.
- It would be better to have TokenKind as return value. Can we use
reflection to support both output types?
** Documentation
- yyexpected_tokens/expected_tokens/expectedTokens in all the languages.
- YYERRCODE, YYUNDEF, YYEOF
- i18n in Java
- symbol.type_get should be kind_get, and it's not documented.
- YYERRCODE and "end of file" and translation
@@ -11,9 +24,6 @@ You can explicitly specify the numeric code for a token type...
The token numbered as 0.
Therefore each time the scanner returns an (external) token number,
it must be mapped to the (internal) symbol number.
** Java: EOF
We should be able to redefine EOF like we do in C.
@@ -120,11 +130,6 @@ https://www.cs.tufts.edu/~nr/cs257/archive/clinton-jefferey/lr-error-messages.pd
https://research.swtch.com/yyerror
http://gallium.inria.fr/~fpottier/publis/fpottier-reachability-cc2016.pdf
* D
** yylex
It would be better to have TokenKind as return value. Can we use reflexion
to support both output types?
* Modernization
Fix data/skeletons/yacc.c so that it defines YYPTRDIFF_T properly for modern
and older C++ compilers. Currently the code defaults to defining it to

View File

@@ -174,10 +174,10 @@ m4_define([b4_declare_symbol_enum],
{
]b4_symbol_foreach([b4_symbol_enum])[
private final int code_;
private final int yycode_;
SymbolKind (int n) {
this.code_ = n;
this.yycode_ = n;
}
private static final SymbolKind[] values_ = {
@@ -185,13 +185,66 @@ m4_define([b4_declare_symbol_enum],
], b4_symbol_numbers)[
};
static final SymbolKind get (int code) {
static final SymbolKind get(int code) {
return values_[code];
}
public final int getCode () {
return this.code_;
public final int getCode() {
return this.yycode_;
}
]b4_parse_error_bmatch(
[simple\|verbose],
[[ /* Return YYSTR after stripping away unnecessary quotes and
backslashes, so that it's suitable for yyerror. The heuristic is
that double-quoting is unnecessary unless the string contains an
apostrophe, a comma, or backslash (other than backslash-backslash).
YYSTR is taken from yytname. */
private static String yytnamerr_(String yystr)
{
if (yystr.charAt (0) == '"')
{
StringBuffer yyr = new StringBuffer();
strip_quotes: for (int i = 1; i < yystr.length(); i++)
switch (yystr.charAt(i))
{
case '\'':
case ',':
break strip_quotes;
case '\\':
if (yystr.charAt(++i) != '\\')
break strip_quotes;
/* Fall through. */
default:
yyr.append(yystr.charAt(i));
break;
case '"':
return yyr.toString();
}
}
return yystr;
}
/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
First, the terminals, then, starting at \a YYNTOKENS_, nonterminals. */
]b4_typed_parser_table_define([String], [tname], [b4_tname])[
/* The user-facing name of this symbol. */
public final String getName() {
return yytnamerr_(yytname_[yycode_]);
}
]],
[custom\|detailed],
[[ /* YYNAMES_[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
First, the terminals, then, starting at \a YYNTOKENS_, nonterminals. */
]b4_typed_parser_table_define([String], [names], [b4_symbol_names])[
/* The user-facing name of this symbol. */
public final String getName() {
return yynames_[yycode_];
}]])[
};
]])])

View File

@@ -507,7 +507,7 @@ import java.text.MessageFormat;
b4_locations_if([, Object yylocationp])[)
{
yycdebug (s + (yykind.getCode () < YYNTOKENS_ ? " token " : " nterm ")
+ yysymbolName (yykind) + " ("]b4_locations_if([
+ yykind.getName() + " ("]b4_locations_if([
+ yylocationp + ": "])[
+ (yyvaluep == null ? "(null)" : yyvaluep.toString ()) + ")");
}]])[
@@ -880,7 +880,7 @@ b4_dollar_popdef[]dnl
/**
* The symbol kind of the lookahead token.
*/
public SymbolKind getToken ()
public final SymbolKind getToken ()
{
return yytoken;
}
@@ -890,7 +890,7 @@ b4_dollar_popdef[]dnl
/**
* The location of the lookahead.
*/
public ]b4_location_type[ getLocation ()
public final ]b4_location_type[ getLocation ()
{
return yylocation;
}
@@ -937,15 +937,6 @@ b4_dollar_popdef[]dnl
}
return yycount - yyoffset;
}
/**
* The user-facing name of the symbol whose (internal) number is
* YYSYMBOL. No bounds checking.
*/
static String yysymbolName (SymbolKind yysymbol)
{
return ]b4_parser_class[.yysymbolName (yysymbol);
}
}
]b4_parse_error_bmatch(
@@ -1005,7 +996,7 @@ b4_dollar_popdef[]dnl
int yycount = yysyntaxErrorArguments (yyctx, yyarg, argmax);
String[] yystr = new String[yycount];
for (int yyi = 0; yyi < yycount; ++yyi)
yystr[yyi] = yysymbolName (yyarg[yyi]);
yystr[yyi] = yyarg[yyi].getName();
String yyformat;
switch (yycount)
{
@@ -1049,63 +1040,6 @@ b4_dollar_popdef[]dnl
]b4_parser_tables_define[
]b4_parse_error_bmatch(
[simple\|verbose],
[[ /* Return YYSTR after stripping away unnecessary quotes and
backslashes, so that it's suitable for yyerror. The heuristic is
that double-quoting is unnecessary unless the string contains an
apostrophe, a comma, or backslash (other than backslash-backslash).
YYSTR is taken from yytname. */
private static String yytnamerr_ (String yystr)
{
if (yystr.charAt (0) == '"')
{
StringBuffer yyr = new StringBuffer ();
strip_quotes: for (int i = 1; i < yystr.length (); i++)
switch (yystr.charAt (i))
{
case '\'':
case ',':
break strip_quotes;
case '\\':
if (yystr.charAt(++i) != '\\')
break strip_quotes;
/* Fall through. */
default:
yyr.append (yystr.charAt (i));
break;
case '"':
return yyr.toString ();
}
}
return yystr;
}
/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
First, the terminals, then, starting at \a YYNTOKENS_, nonterminals. */
]b4_typed_parser_table_define([String], [tname], [b4_tname])[
/* The user-facing name of the symbol whose (internal) number is
YYSYMBOL. No bounds checking. */
static String yysymbolName (SymbolKind yysymbol)
{
return yytnamerr_ (yytname_[yysymbol.getCode ()]);
}
]],
[custom\|detailed],
[[ /* The user-facing name of the symbol whose (internal) number is
YYSYMBOL. No bounds checking. */
static String yysymbolName (SymbolKind yysymbol)
{
String[] yy_sname =
{
]b4_symbol_names[
};
return yy_sname[yysymbol.getCode ()];
}]])[
]b4_parse_trace_if([[
]b4_integral_parser_table_define([rline], [b4_rline],
[[YYRLINE[YYN] -- Source line where rule number YYN was defined.]])[

View File

@@ -13125,6 +13125,22 @@ or nonzero, full tracing.
Identify the Bison version and skeleton used to generate this parser.
@end deftypecv
If you enabled token internationalization (@pxref{Token I18n}), you must
provide the parser with the following function:
@deftypecv {Static Method} {YYParser} {String} {i18n} (@code{string} @var{s})
Return the translation of @var{s} in the user's language. As an example:
@example
%code @{
static ResourceBundle myResources
= ResourceBundle.getBundle("domain-name");
static final String i18n(String s) @{
return myResources.getString(s);
@}
@}
@end example
@end deftypecv
@node Java Parser Context Interface
@subsection Java Parser Context Interface
@@ -13132,9 +13148,35 @@ Identify the Bison version and skeleton used to generate this parser.
The parser context provides information to build error reports when you
invoke @samp{%define parse.error custom}.
@defcv {Type} {YYParser} {SymbolKind}
An enum that includes all the grammar symbols, tokens and nonterminals. Its
enumerators are forged from the symbol names:
@example
public enum SymbolKind
@{
S_YYEOF(0), /* "end of file" */
S_YYERROR(1), /* error */
S_YYUNDEF(2), /* "invalid token" */
S_BANG(3), /* "!" */
S_PLUS(4), /* "+" */
S_MINUS(5), /* "-" */
[...]
S_NUM(13), /* "number" */
S_NEG(14), /* NEG */
S_YYACCEPT(15), /* $accept */
S_input(16), /* input */
S_line(17); /* line */
@};
@end example
@end defcv
@deftypemethod {YYParser.SymbolKind} {String} getName ()
The name of this symbol, possibly translated.
@end deftypemethod
@deftypemethod {YYParser.Context} {YYParser.SymbolKind} getToken ()
The kind of the lookahead. Maybe return @code{null} when there is no
lookahead.
The kind of the lookahead. Return @code{null} iff there is no lookahead.
@end deftypemethod
@deftypemethod {YYParser.Context} {YYParser.Location} getLocation ()
@@ -13143,14 +13185,12 @@ The location of the lookahead.
@deftypemethod {YYParser.Context} {int} getExpectedTokens (@code{YYParser.SymbolKind[]} @var{argv}, @code{int} @var{argc})
Fill @var{argv} with the expected tokens, which never includes
@code{YYSYMBOL_YYEMPTY}, @code{YYSYMBOL_YYERROR}, or
@code{YYSYMBOL_YYUNDEF}.
@code{SymbolKind.S_YYERROR}, or @code{SymbolKind.S_YYUNDEF}.
Never put more than @var{argc} elements into @var{argv}, and on success
return the effective number of tokens stored in @var{argv}. Return 0 if
there are more than @var{argc} expected tokens, yet fill @var{argv} up to
@var{argc}. When LAC is enabled, may return a negative number on errors,
such as @code{YYENOMEM} on memory exhaustion.
@var{argc}.
If @var{argv} is null, return the size needed to store all the possible
values, which is always less than @code{YYNTOKENS}.
@@ -13227,28 +13267,28 @@ Declarations}), then the parser no longer passes syntax error messages to
Whether it uses @code{yyerror} is up to the user.
Here is a typical example of a reporting function.
Here is an example of a reporting function (@pxref{Java Parser Context
Interface}).
@example
public void yyreportSyntaxError (YYParser.Context ctx)
@{
System.err.print (ctx.getLocation () + ": syntax error");
public void reportSyntaxError(YYParser.Context ctx) @{
System.err.print(ctx.getLocation() + ": syntax error");
// Report the expected tokens.
@{
final int TOKENMAX = 5;
YYParser.SymbolKind[] arg = new YYParser.SymbolKind[TOKENMAX];
int n = ctx.getExpectedTokens (arg, TOKENMAX);
int n = ctx.getExpectedTokens(arg, TOKENMAX);
for (int i = 0; i < n; ++i)
System.err.print ((i == 0 ? ": expected " : " or ")
+ ctx.yysymbolName (arg[i]));
System.err.print((i == 0 ? ": expected " : " or ")
+ arg[i].getName());
@}
// Report the unexpected token which triggered the error.
@{
YYParser.SymbolKind lookahead = ctx.getToken ();
YYParser.SymbolKind lookahead = ctx.getToken();
if (lookahead != null)
System.err.print (" before " + ctx.yysymbolName (lookahead));
System.err.print(" before " + lookahead.getName());
@}
System.err.println ("");
System.err.println("");
@}
@end example
@end deftypemethod

View File

@@ -121,12 +121,12 @@ class CalcLexer implements Calc.Lexer {
int n = ctx.getExpectedTokens(arg, TOKENMAX);
for (int i = 0; i < n; ++i)
System.err.print((i == 0 ? ": expected " : " or ")
+ ctx.yysymbolName(arg[i]));
+ arg[i].getName());
}
{
Calc.SymbolKind lookahead = ctx.getToken();
if (lookahead != null)
System.err.print(" before " + ctx.yysymbolName(lookahead));
System.err.print(" before " + lookahead.getName());
}
System.err.println("");
}

View File

@@ -958,13 +958,13 @@ class PositionReader extends BufferedReader {
# FIXME: We should not hard-code "Calc".
m4_define([AT_YYERROR_DEFINE(java)],
[AT_LOCATION_IF([[public void yyerror (Calc.Location l, String m)
{]m4_bmatch(m4_defn([AT_PARSE_PARAMS]), [nerrs],[[
{]m4_bmatch(m4_defn([AT_PARSE_PARAMS]), [nerrs], [[
++global_nerrs;
++*nerrs;]])[
if (l == null)
System.err.println (m);
System.err.println(m);
else
System.err.println (l + ": " + m);
System.err.println(l + ": " + m);
}
]], [[
public void yyerror (String m)
@@ -976,27 +976,25 @@ m4_define([AT_YYERROR_DEFINE(java)],
]])[
]AT_ERROR_CUSTOM_IF([[
public void reportSyntaxError (Calc.Context ctx)
{
System.err.print (]AT_LOCATION_IF([[ctx.getLocation () + ": "]]
+ )["syntax error");
public void reportSyntaxError(Calc.Context ctx) {
System.err.print(]AT_LOCATION_IF([[ctx.getLocation() + ": "
+ ]])["syntax error");
{
Calc.SymbolKind token = ctx.getToken ();
Calc.SymbolKind token = ctx.getToken();
if (token != null)
System.err.print (" on token @<:@" + ctx.yysymbolName (token) + "@:>@");
System.err.print(" on token @<:@" + token.getName() + "@:>@");
}
{
Calc.SymbolKind[] arg = new Calc.SymbolKind[ctx.NTOKENS];
int n = ctx.getExpectedTokens (arg, ctx.NTOKENS);
if (0 < n)
{
System.err.print (" (expected:");
for (int i = 0; i < n; ++i)
System.err.print (" @<:@" + ctx.yysymbolName (arg[i]) + "@:>@");
System.err.print (")");
}
int n = ctx.getExpectedTokens(arg, ctx.NTOKENS);
if (0 < n) {
System.err.print(" (expected:");
for (int i = 0; i < n; ++i)
System.err.print(" @<:@" + arg[i].getName() + "@:>@");
System.err.print(")");
}
}
System.err.println ("");
System.err.println("");
}
]])
])