From 99efa3536911fe72af311380a729b3d66ac40e40 Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Wed, 29 Apr 2020 08:23:55 +0200 Subject: [PATCH] doc: document YYEOF, YYUNDEF and YYerror * doc/bison.texi (Special Tokens): New. * examples/c/bistromathic/parse.y: Formatting changes. --- NEWS | 5 ++++ TODO | 45 ++++++++++++++------------------- doc/bison.texi | 45 +++++++++++++++++++++++++++++++++ examples/c/bistromathic/parse.y | 6 ++--- 4 files changed, 72 insertions(+), 29 deletions(-) diff --git a/NEWS b/NEWS index 26342ab7..2d6ede09 100644 --- a/NEWS +++ b/NEWS @@ -16,6 +16,11 @@ GNU Bison NEWS error-recovery mode without triggering an additional error message. See the bistromathic for an example. +*** The bistromathic features internationalization + + Its way to build the error message is more general and is easy to use in + other projects. + * Noteworthy changes in release 3.5.90 (2020-04-18) [beta] ** Backward incompatible changes diff --git a/TODO b/TODO index ea1f2d28..23cab933 100644 --- a/TODO +++ b/TODO @@ -18,14 +18,6 @@ - It would be better to have TokenKind as return value. Can we use reflection to support both output types? -** Documentation -- YYerror, YYUNDEF, YYEOF -- YYerror and translation - -** yypcontext_expected_tokens -Beware that returning 0 is unclear: does it mean there are no possible -lookahead, or that there are too many? - ** YYerror https://git.savannah.gnu.org/gitweb/?p=gettext.git;a=blob;f=gettext-runtime/intl/plural.y;h=a712255af4f2f739c93336d4ff6556d932a426a5;hb=HEAD @@ -39,24 +31,6 @@ Have an example with a push parser. Use autocompletion in that case. *** calc.at Stop hard-coding "Calc". Adjust local.at (look for FIXME). -** doc -I feel it's ugly to use the GNU style to declare functions in the doc. It -generates tons of white space in the page, and may contribute to bad page -breaks. - -** consistency -token vs terminal, variable vs non terminal. - -** api.token.raw -The YYUNDEFTOK could be assigned a semantic value so that yyerror could be -used to report invalid lexemes. - -** push parsers -Consider deprecating impure push parsers. They add a lot of complexity, for -a bad feature. On the other hand, that would make it much harder to sit -push parsers on top of pull parser. Which is currently not relevant, since -push parsers are measurably slower. - * Bison 3.7 ** Counter example generation See https://github.com/akimd/bison/pull/15. @@ -79,6 +53,25 @@ would probably create many conflicts in Vincent's work (see previous point). Maybe we should check for m4_ and b4_ leaking out of the m4 processing, as Autoconf does. It would have caught overquotation issues. +** doc +I feel it's ugly to use the GNU style to declare functions in the doc. It +generates tons of white space in the page, and may contribute to bad page +breaks. + +** consistency +token vs terminal, variable vs non terminal. + +** api.token.raw +The YYUNDEFTOK could be assigned a semantic value so that yyerror could be +used to report invalid lexemes. + +** push parsers +Consider deprecating impure push parsers. They add a lot of complexity, for +a bad feature. On the other hand, that would make it much harder to sit +push parsers on top of pull parser. Which is currently not relevant, since +push parsers are measurably slower. + + * Bison 3.8 ** Unit rules / Injection rules (Akim Demaille) Maybe we could expand unit rules (or "injections", see diff --git a/doc/bison.texi b/doc/bison.texi index f7a70b73..d3e6de20 100644 --- a/doc/bison.texi +++ b/doc/bison.texi @@ -314,6 +314,7 @@ Parser C-Language Interface The Lexical Analyzer Function @code{yylex} * Calling Convention:: How @code{yyparse} calls @code{yylex}. +* Special Tokens:: Signaling end-of-file and errors to the parser. * Tokens from Literals:: Finding token kinds from string aliases. * Token Values:: How @code{yylex} must return the semantic value of the token it has read. @@ -7115,6 +7116,7 @@ that need it. @xref{Invocation}. @menu * Calling Convention:: How @code{yyparse} calls @code{yylex}. +* Special Tokens:: Signaling end-of-file and errors to the parser. * Tokens from Literals:: Finding token kinds from string aliases. * Token Values:: How @code{yylex} must return the semantic value of the token it has read. @@ -7168,6 +7170,49 @@ This interface has been designed so that the output from the @code{lex} utility can be used without change as the definition of @code{yylex}. +@node Special Tokens +@subsection Special Tokens + +In addition to the user defined tokens, Bison generates a few special tokens +that @code{yylex} may return. + +The @code{YYEOF} token denotes the end of file, and signals to the parser +that there is nothing left afterwards. @xref{Calling Convention}, for an +example. + +Returning @code{YYUNDEF} tells the parser that some lexical error was found. +It will emit an error message about an ``invalid token'', and enter +error-recovery (@pxref{Error Recovery}). Returning an unknown token kind +results in the exact same behavior. + +Returning @code{YYerror} requires the parser to enter error-recovery +@emph{without} emitting an error message. This way the lexical analyzer can +produce an accurate error messages about the invalid input (something the +parser cannot do), and yet benefit from the error-recovery features of the +parser. + +@example +int +yylex (void) +@{ + @dots{} + switch (c) + @{ + @dots{} + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + @dots{} + return TOK_NUM; + @dots{} + case EOF: + return YYEOF; + default: + yyerror ("syntax error: invalid character: %c", c); + return YYerror; + @} +@} +@end example + @node Tokens from Literals @subsection Finding Tokens by String Literals diff --git a/examples/c/bistromathic/parse.y b/examples/c/bistromathic/parse.y index 925e06f5..ffed690e 100644 --- a/examples/c/bistromathic/parse.y +++ b/examples/c/bistromathic/parse.y @@ -267,9 +267,9 @@ yylex (const char **line, YYSTYPE *yylval, YYLTYPE *yylloc) { int nchars = 0; sscanf (*line - 1, "%lf%n", &yylval->TOK_NUM, &nchars); - *line += nchars - 1; - yylloc->last_column += nchars - 1; - return TOK_NUM; + *line += nchars - 1; + yylloc->last_column += nchars - 1; + return TOK_NUM; } // Identifiers.