d: put YYEMPTY in the TokenKind

* data/skeletons/d.m4, data/skeletons/lalr1.d (b4_token_enums): Rename YYTokenType as TokenKind. Define YYEMPTY. * examples/d/calc.y, tests/calc.at, tests/scanner.at: Adjust.
2026-07-23 11:00:33 +00:00 · 2020-04-13 16:49:54 +02:00
parent 3877b7210e
commit 71e3f6d4da
7 changed files with 39 additions and 39 deletions
@@ -120,6 +120,11 @@ https://www.cs.tufts.edu/~nr/cs257/archive/clinton-jefferey/lr-error-messages.pd
 https://research.swtch.com/yyerror
 http://gallium.inria.fr/~fpottier/publis/fpottier-reachability-cc2016.pdf

+* D
+** yylex
+It would be better to have TokenKind as return value.  Can we use reflexion
+to support both output types?
+
 * Modernization
 Fix data/skeletons/yacc.c so that it defines YYPTRDIFF_T properly for modern
 and older C++ compilers.  Currently the code defaults to defining it to
@@ -264,7 +269,7 @@ It would be a very nice source of inspiration for the other languages.

 Valentin Tolmer is working on this.

-** yychar == yyempty_
+** yychar == YYEMPTY
 The code in yyerrlab reads:

      if (yychar <= YYEOF)
@@ -27,7 +27,7 @@ public interface Lexer
   * to the next token and prepares to return the semantic value
   * and beginning/ending positions of the token.
   * @return the token identifier corresponding to the next token. */
-  YYTokenType yylex ();
+  TokenKind yylex ();

  /**
   * Entry point for error reporting.  Emits an error
@@ -39,7 +39,7 @@ public interface Lexer
   void yyerror (YYLocation loc, string s);
 }

- semantic types are handled by D usions (same as for C/C++ parsers)
+- semantic types are handled by D unions (same as for C/C++ parsers)

 - the following (non-standard) %defines are supported:

@@ -164,11 +164,9 @@ m4_define([b4_token_enum],
 # Output the definition of the tokens as enums.
 m4_define([b4_token_enums],
 [/* Token kinds.  */
-public enum YYTokenType {
-
-  /** Token returned by the scanner to signal the end of its input.  */
-  EOF = 0,
-b4_symbol_foreach([b4_token_enum])
+public enum TokenKind {
+  ]b4_symbol_kind([-2])[ = -2,
+b4_symbol_foreach([b4_token_enum])dnl
 }
 ])

@@ -429,7 +429,7 @@ b4_locations_if([, ref ]b4_location_type[ yylocationp])[)
  public bool parse ()
  {
    /// Lookahead and lookahead in internal form.
-    int yychar = yyempty_;
+    int yychar = TokenKind.YYEMPTY;
    SymbolKind yytoken = SymbolKind.]b4_symbol_prefix[YYEMPTY;

    /* State.  */
@@ -493,7 +493,7 @@ m4_popdef([b4_at_dollar])])dnl
        }

        /* Read a lookahead token.  */
-        if (yychar == yyempty_)
+        if (yychar == TokenKind.YYEMPTY)
        {]b4_parse_trace_if([[
          yycdebugln ("Reading a token");]])[
          yychar = yylex ();]b4_locations_if([[
@@ -532,7 +532,7 @@ m4_popdef([b4_at_dollar])])dnl
          yy_symbol_print ("Shifting", yytoken, yylval]b4_locations_if([, yylloc])[);]])[

          /* Discard the token being shifted.  */
-          yychar = yyempty_;
+          yychar = TokenKind.YYEMPTY;

          /* Count tokens shifted since error; after three, turn off error
           * status.  */
@@ -573,7 +573,7 @@ m4_popdef([b4_at_dollar])])dnl
        if (yyerrstatus_ == 0)
        {
          ++yynerrs_;
-          if (yychar == yyempty_)
+          if (yychar == TokenKind.YYEMPTY)
            yytoken = SymbolKind.]b4_symbol_prefix[YYEMPTY;
          yyerror (]b4_locations_if([yylloc, ])[yysyntax_error (yystate, yytoken));
        }
@@ -584,14 +584,14 @@ m4_popdef([b4_at_dollar])])dnl
          /* If just tried and failed to reuse lookahead token after an
           * error, discard it.  */

-          if (yychar <= YYTokenType.EOF)
+          if (yychar <= TokenKind.]b4_symbol(0, [id])[)
          {
            /* Return failure if at end of input.  */
-            if (yychar == YYTokenType.EOF)
+            if (yychar == TokenKind.]b4_symbol(0, [id])[)
             return false;
          }
          else
-            yychar = yyempty_;
+            yychar = TokenKind.YYEMPTY;
        }

        /* Else will try to reuse lookahead token after shifting the error
@@ -841,7 +841,6 @@ m4_popdef([b4_at_dollar])])dnl

  private static immutable int yylast_ = ]b4_last[;
  private static immutable int yynnts_ = ]b4_nterms_number[;
-  private static immutable int yyempty_ = -2;
  private static immutable int yyfinal_ = ]b4_final_state_number[;
  private static immutable int yyntokens_ = ]b4_tokens_number[;

@@ -99,16 +99,15 @@ class CalcLexer(R) : Lexer
    while (!input.empty && input.front != '\n' && isWhite (input.front))
      input.popFront;

-    // Handle EOF.
    if (input.empty)
-      return YYTokenType.EOF;
+      return TokenKind.YYEOF;

    // Numbers.
    if (input.front.isNumber)
      {
        import std.conv : parse;
        semanticVal_.ival = input.parse!int;
-        return YYTokenType.NUM;
+        return TokenKind.NUM;
      }

    // Individual characters
@@ -116,16 +115,15 @@ class CalcLexer(R) : Lexer
    input.popFront;
    switch (ch)
      {
-      case EOF: return YYTokenType.EOF;
-      case '=': return YYTokenType.EQ;
-      case '+': return YYTokenType.PLUS;
-      case '-': return YYTokenType.MINUS;
-      case '*': return YYTokenType.STAR;
-      case '/': return YYTokenType.SLASH;
-      case '(': return YYTokenType.LPAR;
-      case ')': return YYTokenType.RPAR;
-      case '\n': return YYTokenType.EOL;
-      default:  assert(0);
+      case '=':  return TokenKind.EQ;
+      case '+':  return TokenKind.PLUS;
+      case '-':  return TokenKind.MINUS;
+      case '*':  return TokenKind.STAR;
+      case '/':  return TokenKind.SLASH;
+      case '(':  return TokenKind.LPAR;
+      case ')':  return TokenKind.RPAR;
+      case '\n': return TokenKind.EOL;
+      default: assert(0);
      }
  }
 }
@@ -306,13 +306,13 @@ class CalcLexer(R) : Lexer

    // Handle EOF.
    if (input.empty)
-      return YYTokenType.EOF;
+      return TokenKind.CALC_EOF;

    // Numbers.
    if (input.front.isNumber)
      {
        semanticVal_.ival = parseInt;
-        return YYTokenType.NUM;
+        return TokenKind.NUM;
      }

    // Individual characters
@@ -126,7 +126,7 @@ class YYLexer(R) : Lexer
    import std.uni : isNumber;
    // Handle EOF.
    if (input.empty)
-      return YYTokenType.EOF;
+      return TokenKind.END;

    auto c = input.front;
    input.popFront;
@@ -136,13 +136,13 @@ class YYLexer(R) : Lexer
    {
    case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
      semanticVal_.val = c - '0';
-      return YYTokenType.NUM;
-    case '+': return YYTokenType.PLUS;
-    case '-': return YYTokenType.MINUS;
-    case '*': return YYTokenType.STAR;
-    case '/': return YYTokenType.SLASH;
-    case '(': return YYTokenType.LPAR;
-    case ')': return YYTokenType.RPAR;
+      return TokenKind.NUM;
+    case '+': return TokenKind.PLUS;
+    case '-': return TokenKind.MINUS;
+    case '*': return TokenKind.STAR;
+    case '/': return TokenKind.SLASH;
+    case '(': return TokenKind.LPAR;
+    case ')': return TokenKind.RPAR;
    default: assert(0);
    }
  }