diff --git a/data/skeletons/lalr1.d b/data/skeletons/lalr1.d index 2334e8c6..c85c712c 100644 --- a/data/skeletons/lalr1.d +++ b/data/skeletons/lalr1.d @@ -77,7 +77,15 @@ public interface Lexer * @@param loc The location of the element to which the * error message is related]])[ * @@param s The string for the error message. */ - void yyerror (]b4_locations_if([b4_location_type[ loc, ]])[string s); + void yyerror (]b4_locations_if([[const ]b4_location_type[ loc, ]])[string s); +]b4_parse_error_bmatch([custom], [[ + /** + * Build and emit a "syntax error" message in a user-defined way. + * + * @@param ctx The context of the error. + */ + void syntax_error(]b4_parser_class[.Context ctx); +]])[ } ]b4_locations_if([b4_position_type_if([[ @@ -276,7 +284,7 @@ b4_user_union_members return yylexer.yylex (); } - protected final void yyerror (]b4_locations_if(ref [b4_location_type[ loc, ]])[string s) { + protected final void yyerror (]b4_locations_if([[const ]b4_location_type[ loc, ]])[string s) { yylexer.yyerror (]b4_locations_if([loc, ])[s); } @@ -560,7 +568,7 @@ m4_popdef([b4_at_dollar])])dnl ++yynerrs_; if (yychar == TokenKind.]b4_symbol(empty, id)[) yytoken = ]b4_symbol(empty, kind)[; - yyerror (]b4_locations_if([yylloc, ])[yysyntax_error(new Context(yystack, yytoken]b4_locations_if([[, yylloc]])[))); + yyreportSyntaxError(new Context(yystack, yytoken]b4_locations_if([[, yylloc]])[)); } ]b4_locations_if([ yyerrloc = yylloc;])[ @@ -664,8 +672,11 @@ m4_popdef([b4_at_dollar])])dnl } // Generate an error message. - private final string yysyntax_error(Context yyctx) - {]b4_parse_error_case([verbose], [[ + private final void yyreportSyntaxError(Context yyctx) + {]b4_parse_error_bmatch( +[custom], [[ + yylexer.syntax_error(yyctx);]], +[detailed\|verbose], [[ /* There are many possibilities here to consider: - Assume YYFAIL is not used. It's too flawed to consider. See @@ -701,24 +712,67 @@ m4_popdef([b4_at_dollar])])dnl { // FIXME: This method of building the message is not compatible // with internationalization. - string res = "syntax error, unexpected "; - res ~= format!"%s"(yyctx.getToken); immutable int argmax = 5; SymbolKind[] yyarg = new SymbolKind[argmax]; - int yycount = yyctx.getExpectedTokens(yyarg, argmax); - if (yycount < argmax) + int yycount = yysyntaxErrorArguments(yyctx, yyarg, argmax); + string res = "syntax error, unexpected "; + res ~= format!"%s"(yyarg[0]); + if (yycount < argmax + 1) { - for (int yyi = 0; yyi < yycount; yyi++) + for (int yyi = 1; yyi < yycount; yyi++) { - res ~= yyi == 0 ? ", expecting " : " or "; + res ~= yyi == 1 ? ", expecting " : " or "; res ~= format!"%s"(SymbolKind(yyarg[yyi])); } } - return res; - }]])[ - return "syntax error"; + yyerror(]b4_locations_if([yyctx.getLocation(), ])[res); + }]], +[[simple]], [[ + yyerror(]b4_locations_if([yyctx.getLocation(), ])["syntax error");]])[ } +]b4_parse_error_bmatch( +[detailed\|verbose], [[ + private int yysyntaxErrorArguments(Context yyctx, SymbolKind[] yyarg, int yyargn) { + /* There are many possibilities here to consider: + - If this state is a consistent state with a default action, + then the only way this function was invoked is if the + default action is an error action. In that case, don't + check for expected tokens because there are none. + - The only way there can be no lookahead present (in tok) is + if this state is a consistent state with a default action. + Thus, detecting the absence of a lookahead is sufficient to + determine that there is no unexpected or expected token to + report. In that case, just report a simple "syntax error". + - Don't assume there isn't a lookahead just because this + state is a consistent state with a default action. There + might have been a previous inconsistent state, consistent + state with a non-default action, or user semantic action + that manipulated yychar. (However, yychar is currently out + of scope during semantic actions.) + - Of course, the expected token list depends on states to + have correct lookahead information, and it depends on the + parser not to perform extra reductions after fetching a + lookahead from the scanner and before detecting a syntax + error. Thus, state merging (from LALR or IELR) and default + reductions corrupt the expected token list. However, the + list is correct for canonical LR with one exception: it + will still contain any token that will not be accepted due + to an error action in a later state. + */ + int yycount = 0; + if (yyctx.getToken() != ]b4_symbol(empty, kind)[) + { + if (yyarg !is null) + yyarg[yycount] = yyctx.getToken(); + yycount += 1; + yycount += yyctx.getExpectedTokens(yyarg, 1, yyargn); + } + return yycount; + } +]])[ + + /** * Information needed to get the list of expected tokens and to forge * a syntax error diagnostic. diff --git a/doc/bison.texi b/doc/bison.texi index 0a5da8f4..4a1b090c 100644 --- a/doc/bison.texi +++ b/doc/bison.texi @@ -14026,6 +14026,42 @@ They should return new objects for each call, to avoid that all the symbol share the same Position boundaries. @end deftypemethod +@deftypemethod {Lexer} {void} syntax_error(@code{YYParser.Context} @var{ctx}) +If you invoke @samp{%define parse.error custom} (@pxref{Bison +Declarations}), then the parser no longer passes syntax error messages to +@code{yyerror}, rather it delegates that task to the user by calling the +@code{reportSyntaxError} function. + +Whether it uses @code{yyerror} is up to the user. + +Here is an example of a reporting function (@pxref{D Parser Context +Interface}). + +@example +public void syntax_error(YYParser.Context ctx) +@{ + stderr.write(ctx.getLocation(), ": syntax error"); + // Report the expected tokens. + @{ + immutable int TOKENMAX = 5; + YYParser.SymbolKind[] arg = new YYParser.SymbolKind[TOKENMAX]; + int n = ctx.getExpectedTokens(arg, TOKENMAX); + if (n < TOKENMAX) + for (int i = 0; i < n; ++i) + stderr.write((i == 0 ? ": expected " : " or "), arg[i]); + @} + // Report the unexpected token which triggered the error. + @{ + YYParser.SymbolKind lookahead = ctx.getToken(); + stderr.writeln(" before ", lookahead); + @} +@} +@end example + +@noindent +This implementation is inappropriate for internationalization, see +the @file{c/bistromathic} example for a better alternative. +@end deftypemethod @node D Action Features @subsection Special Features for Use in D Actions @@ -14049,7 +14085,6 @@ errors. This is useful primarily in error rules. @xref{Error Recovery}. @end deffn - @node Java Parsers @section Java Parsers diff --git a/examples/d/calc/calc.y b/examples/d/calc/calc.y index c3a0e9fd..bee3fc94 100644 --- a/examples/d/calc/calc.y +++ b/examples/d/calc/calc.y @@ -101,7 +101,7 @@ if (isInputRange!R && is(ElementType!R : dchar)) // Should be a local in main, shared with %parse-param. int exit_status = 0; - void yyerror(YYLocation loc, string s) + void yyerror(const YYLocation loc, string s) { exit_status = 1; stderr.writeln(loc.toString(), ": ", s); diff --git a/tests/calc.at b/tests/calc.at index 139cc5c5..fd42a40b 100644 --- a/tests/calc.at +++ b/tests/calc.at @@ -364,7 +364,7 @@ void location_print (FILE *o, Span s); }]])[ /* Bison Declarations */ -%token CALC_EOF 0 ]AT_TOKEN_TRANSLATE_IF([_("end of input")], ["end of input"])[ +%token CALC_EOF 0 ]AT_TOKEN_TRANSLATE_IF([_("end of file")], ["end of input"])[ %token <]AT_VALUE_UNION_IF([int], [ival])[> NUM "number" %type <]AT_VALUE_UNION_IF([int], [ival])[> exp @@ -666,20 +666,20 @@ m4_define([_AT_DATA_CALC_Y(d)], %printer { fprintf (yyo, "%d", $$); } ; /* Bison Declarations */ -%token EOF 0 ]AT_TOKEN_TRANSLATE_IF([_("end of input")], ["end of input"])[ +%token EOF 0 ]AT_TOKEN_TRANSLATE_IF([_("end of file")], ["end of input"])[ %token NUM "number" %type exp -%token PLUS "+" +%token EQUAL "=" MINUS "-" + PLUS "+" STAR "*" SLASH "/" + POW "^" + EOL "\n" LPAR "(" RPAR ")" - EQUAL "=" - POW "^" NOT "!" - EOL "\n" %nonassoc "=" /* comparison */ %left "-" "+" @@ -736,7 +736,6 @@ power (int base, int exponent) return res; } -]AT_YYERROR_DEFINE[ ]AT_CALC_YYLEX AT_CALC_MAIN]) ])# _AT_DATA_CALC_Y(d) @@ -861,7 +860,7 @@ m4_define([_AT_DATA_CALC_Y(java)], } /* Bison Declarations */ -%token CALC_EOF 0 ]AT_TOKEN_TRANSLATE_IF([_("end of input")], ["end of input"])[ +%token CALC_EOF 0 ]AT_TOKEN_TRANSLATE_IF([_("end of file")], ["end of input"])[ %token NUM "number" %type exp @@ -1452,6 +1451,11 @@ AT_CHECK_CALC_LALR1_D([%define parse.error verbose %define api.prefix {calc} %ve AT_CHECK_CALC_LALR1_D([%debug]) +AT_CHECK_CALC_LALR1_D([%define parse.error custom]) +AT_CHECK_CALC_LALR1_D([%locations %define parse.error custom]) +AT_CHECK_CALC_LALR1_D([%locations %define parse.error detailed]) +AT_CHECK_CALC_LALR1_D([%locations %define parse.error simple]) +AT_CHECK_CALC_LALR1_D([%locations %define parse.error verbose]) AT_CHECK_CALC_LALR1_D([%define parse.error verbose %debug %verbose]) AT_CHECK_CALC_LALR1_D([%define parse.error verbose %debug %define api.token.prefix {TOK_} %verbose]) AT_CHECK_CALC_LALR1_D([%define parse.error verbose %debug %define api.symbol.prefix {SYMB_} %verbose]) diff --git a/tests/local.at b/tests/local.at index 22fae30a..e1f37ba2 100644 --- a/tests/local.at +++ b/tests/local.at @@ -868,10 +868,50 @@ m4_define([AT_YYERROR_DECLARE_EXTERN(d)], []) m4_define([AT_YYERROR_DEFINE(d)], [[/* An error reporting function. */ -public void yyerror (]AT_LOCATION_IF([[YYLocation l, ]])[string m) +public void yyerror (]AT_LOCATION_IF([[const YYLocation l, ]])[string m) { stderr.writeln (]AT_LOCATION_IF([[l, ": ", ]])[m); -}]]) +} +]AT_ERROR_CUSTOM_IF([[ +// In the case of D, there are no single quotes around the symbols +// so they need to be added here +public string transformToken(]AT_API_PREFIX[Parser.SymbolKind token) +{ + string res; + foreach (i; format("%s", token)) + { + if (i == '\"') + res ~= '\''; + else + res ~= i; + } + if (res.length == 1) + return '\'' ~ res ~ '\''; + else + return res; +} + +public void syntax_error(]AT_API_PREFIX[Parser.Context ctx) +{ + stderr.write(]AT_LOCATION_IF([[ctx.getLocation(), ": ",]])["syntax error"); + { + ]AT_API_PREFIX[Parser.SymbolKind token = ctx.getToken(); + stderr.write(" on token @<:@", transformToken(token), "@:>@"); + } + { + immutable int argmax = 7; + ]AT_API_PREFIX[Parser.SymbolKind[] arg = new ]AT_API_PREFIX[Parser.SymbolKind[argmax]; + int n = ctx.getExpectedTokens(arg, argmax); + if (0 < n) + { + stderr.write(" (expected:"); + for (int i = 0; i < n; ++i) + stderr.write(" @<:@", transformToken(arg[i]), "@:>@"); + stderr.writeln(")"); + } + } +} +]])[]]) m4_define([AT_MAIN_DEFINE(d)],