d: add the custom error message feature

Parser.Context class returns a const YYLocation, so Lexer's method
yyerror() needs to receive the location as a const parameter.

Internal error reporting flow is changed to be similar to that of
the other skeletons. Before, case YYERRLAB was calling yyerror()
with the result of yysyntax_error() as the string parameter. As the
custom error message lets the user decide if they want to use
yyerror() or not, this flow needed to be changed. Now, case YYERRLAB
calls yyreportSyntaxError(), that builds the error message using
yysyntaxErrorArguments(). Then yyreportSyntaxError() passes the
error message to the user defined syntax_error() in case of a custom
message, or to yyerror() otherwise.

In the tests in tests/calc.at, the order of the tokens needs to be
changed in order of precedence, so that the D program outputs the
expected tokens in the same order as the other parsers.

* data/skeletons/lalr1.d: Add the custom error message feature.
* doc/bison.texi: Document it.
* examples/d/calc/calc.y: Adjust.
* tests/calc.at, tests/local.at: Test it.
This commit is contained in:
Adela Vais
2020-10-24 16:43:46 +03:00
committed by Akim Demaille
parent 4252134ba4
commit dc15b62a7c
5 changed files with 159 additions and 26 deletions

View File

@@ -77,7 +77,15 @@ public interface Lexer
* @@param loc The location of the element to which the
* error message is related]])[
* @@param s The string for the error message. */
void yyerror (]b4_locations_if([b4_location_type[ loc, ]])[string s);
void yyerror (]b4_locations_if([[const ]b4_location_type[ loc, ]])[string s);
]b4_parse_error_bmatch([custom], [[
/**
* Build and emit a "syntax error" message in a user-defined way.
*
* @@param ctx The context of the error.
*/
void syntax_error(]b4_parser_class[.Context ctx);
]])[
}
]b4_locations_if([b4_position_type_if([[
@@ -276,7 +284,7 @@ b4_user_union_members
return yylexer.yylex ();
}
protected final void yyerror (]b4_locations_if(ref [b4_location_type[ loc, ]])[string s) {
protected final void yyerror (]b4_locations_if([[const ]b4_location_type[ loc, ]])[string s) {
yylexer.yyerror (]b4_locations_if([loc, ])[s);
}
@@ -560,7 +568,7 @@ m4_popdef([b4_at_dollar])])dnl
++yynerrs_;
if (yychar == TokenKind.]b4_symbol(empty, id)[)
yytoken = ]b4_symbol(empty, kind)[;
yyerror (]b4_locations_if([yylloc, ])[yysyntax_error(new Context(yystack, yytoken]b4_locations_if([[, yylloc]])[)));
yyreportSyntaxError(new Context(yystack, yytoken]b4_locations_if([[, yylloc]])[));
}
]b4_locations_if([
yyerrloc = yylloc;])[
@@ -664,8 +672,11 @@ m4_popdef([b4_at_dollar])])dnl
}
// Generate an error message.
private final string yysyntax_error(Context yyctx)
{]b4_parse_error_case([verbose], [[
private final void yyreportSyntaxError(Context yyctx)
{]b4_parse_error_bmatch(
[custom], [[
yylexer.syntax_error(yyctx);]],
[detailed\|verbose], [[
/* There are many possibilities here to consider:
- Assume YYFAIL is not used. It's too flawed to consider.
See
@@ -701,24 +712,67 @@ m4_popdef([b4_at_dollar])])dnl
{
// FIXME: This method of building the message is not compatible
// with internationalization.
string res = "syntax error, unexpected ";
res ~= format!"%s"(yyctx.getToken);
immutable int argmax = 5;
SymbolKind[] yyarg = new SymbolKind[argmax];
int yycount = yyctx.getExpectedTokens(yyarg, argmax);
if (yycount < argmax)
int yycount = yysyntaxErrorArguments(yyctx, yyarg, argmax);
string res = "syntax error, unexpected ";
res ~= format!"%s"(yyarg[0]);
if (yycount < argmax + 1)
{
for (int yyi = 0; yyi < yycount; yyi++)
for (int yyi = 1; yyi < yycount; yyi++)
{
res ~= yyi == 0 ? ", expecting " : " or ";
res ~= yyi == 1 ? ", expecting " : " or ";
res ~= format!"%s"(SymbolKind(yyarg[yyi]));
}
}
return res;
}]])[
return "syntax error";
yyerror(]b4_locations_if([yyctx.getLocation(), ])[res);
}]],
[[simple]], [[
yyerror(]b4_locations_if([yyctx.getLocation(), ])["syntax error");]])[
}
]b4_parse_error_bmatch(
[detailed\|verbose], [[
private int yysyntaxErrorArguments(Context yyctx, SymbolKind[] yyarg, int yyargn) {
/* There are many possibilities here to consider:
- If this state is a consistent state with a default action,
then the only way this function was invoked is if the
default action is an error action. In that case, don't
check for expected tokens because there are none.
- The only way there can be no lookahead present (in tok) is
if this state is a consistent state with a default action.
Thus, detecting the absence of a lookahead is sufficient to
determine that there is no unexpected or expected token to
report. In that case, just report a simple "syntax error".
- Don't assume there isn't a lookahead just because this
state is a consistent state with a default action. There
might have been a previous inconsistent state, consistent
state with a non-default action, or user semantic action
that manipulated yychar. (However, yychar is currently out
of scope during semantic actions.)
- Of course, the expected token list depends on states to
have correct lookahead information, and it depends on the
parser not to perform extra reductions after fetching a
lookahead from the scanner and before detecting a syntax
error. Thus, state merging (from LALR or IELR) and default
reductions corrupt the expected token list. However, the
list is correct for canonical LR with one exception: it
will still contain any token that will not be accepted due
to an error action in a later state.
*/
int yycount = 0;
if (yyctx.getToken() != ]b4_symbol(empty, kind)[)
{
if (yyarg !is null)
yyarg[yycount] = yyctx.getToken();
yycount += 1;
yycount += yyctx.getExpectedTokens(yyarg, 1, yyargn);
}
return yycount;
}
]])[
/**
* Information needed to get the list of expected tokens and to forge
* a syntax error diagnostic.