skeletons: use "end of file" instead of "$end"

The name "$end" is nice in the report, in particular it avoids that pointed-rules (aka items) be too long. It also helps keeping them "standard". But it is bad in error messages, we should report "end of file" (or maybe "end of input", this is debatable). So, unless the user already defined the alias for the error token herself, make it "end of file". It should even be translated if the user already translated some tokens, so that there is now no strong reason to redefine the $end token. * src/output.c (prepare_symbol_names): Issue "end of file" instead of "$end". * data/skeletons/lalr1.java (yytnamerr_): Remove the renaming hack. * build-aux/update-test: Accept files with names containing a "+", such as c++.at. * tests/actions.at, tests/c++.at, tests/conflicts.at, * tests/glr-regression.at, tests/regression.at, tests/skeletons.at: Adjust.
2026-04-23 18:19:38 +00:00 · 2020-04-12 09:54:46 +02:00
parent a555b41990
commit 72c9fa4510
13 changed files with 53 additions and 50 deletions
@@ -6,6 +6,7 @@
 - i18n in Java
 - symbol.type_get should be kind_get, and it's not documented.
 - token code instead of token user number etc.
+- YYERRCODE and "end of file" and translation

 ** User token number, internal symbol number, external token number, etc.
 There is some confusion over these terms, which is even a problem for
@@ -43,10 +44,6 @@ I feel it's ugly to use the GNU style to declare functions in the doc.  It
 generates tons of white space in the page, and may contribute to bad page
 breaks.

-** improve syntax errors (UTF-8, internationalization)
-While at it, we should stop using "$end" by default, in favor of "end of
-file", or "end of input", whatever.  See how lalr1.java does that.
-
 ** consistency
 token vs terminal, variable vs non terminal.

@@ -102,7 +102,7 @@ def update(at_file, logfile):
 def process(logfile):
    log = contents(logfile)
    # Look for the file to update.
-    m = re.search(r'^\d+\. ([-\w]+\.at):\d+: ', log, re.MULTILINE)
+    m = re.search(r'^\d+\. ([\-\+\w]+\.at):\d+: ', log, re.MULTILINE)
    if not m:
        trace("no diff found:", logfile)
        return
@@ -1080,9 +1080,6 @@ b4_dollar_popdef[]dnl
              return yyr.toString ();
            }
      }
-    else if (yystr.equals ("$end"))
-      return "end of input";
-
    return yystr;
  }

@@ -29,9 +29,9 @@ run -noerr 0 9 -p
 cat >input <<EOF
 (1+2) *
 EOF
-run 1 'err: 1.8-2.0: syntax error, unexpected end-of-line, expecting ( or number'
+run 1 'err: 1.8-2.0: syntax error, unexpected end of line, expecting ( or number'

 cat >input <<EOF
 1 / (2 - 2)
 EOF
-run 1 'err: 1.1-11: error: division by zero"
+run 1 'err: 1.1-11: error: division by zero'
@@ -49,8 +49,7 @@
  SLASH  "/"
  LPAREN "("
  RPAREN ")"
-  EOL    "end-of-line"
-  EOF 0  "end-of-file"
+  EOL    "end of line"
 ;

 %token <int> NUM "number"
@@ -54,6 +54,6 @@

 .        yyerror (yylloc, nerrs, "syntax error, invalid character"); continue;

-<<EOF>>  return TOK_EOF;
+<<EOF>>  return TOK_YYEOF;
 %%
 /* Epilogue (C code). */
@@ -192,6 +192,9 @@ xescape_trigraphs (const char *src)
 static void
 prepare_symbol_names (char const *muscle_name)
 {
+  const bool eof_is_user_defined
+    = !endtoken->alias || STRNEQ (endtoken->alias->tag, "$end");
+
  /* We assume that the table will be output starting at column 2. */
  const bool quote = STREQ (muscle_name, "tname");
  bool has_translations = false;
@@ -201,10 +204,16 @@ prepare_symbol_names (char const *muscle_name)
  set_quoting_flags (qo, QA_SPLIT_TRIGRAPHS);
  for (int i = 0; i < nsyms; i++)
    {
+      /* Use "end of file" rather than "$end".  But keep "$end" in the
+         reports, it's shorter and more consistent.  */
+      const char *tag
+        = !eof_is_user_defined && symbols[i]->content == endtoken->content
+        ? "\"end of file\""
+        : symbols[i]->tag;
      char *cp
-        = symbols[i]->tag[0] == '"' && !quote
-        ? xescape_trigraphs (symbols[i]->tag)
-        : quotearg_alloc (symbols[i]->tag, -1, qo);
+        = tag[0] == '"' && !quote
+        ? xescape_trigraphs (tag)
+        : quotearg_alloc (tag, -1, qo);
      /* Width of the next token, including the two quotes, the
         comma and the space.  */
      int width
@@ -1146,7 +1146,7 @@ Entering state 6
 Stack now 0 1 3 5 6
 Reading a token
 Now at end of input.
-1.5: syntax error, unexpected $end, expecting 'e'
+1.5: syntax error, unexpected end of file, expecting 'e'
 Error: popping token 'd' (1.4: <> printer for 'd' @ 4)
 Stack now 0 1 3 5
 Error: popping token 'c' (1.3: 'b'/'c' printer for 'c' @ 3)
@@ -1155,7 +1155,7 @@ Error: popping token 'b' (1.2: 'b'/'c' printer for 'b' @ 2)
 Stack now 0 1
 Error: popping token 'a' (1.1: <> printer for 'a' @ 1)
 Stack now 0
-Cleanup: discarding lookahead token $end (1.5: )
+Cleanup: discarding lookahead token "end of file" (1.5: )
 Stack now 0
 ]])

@@ -1275,7 +1275,7 @@ Entering state 8
 Stack now 0 1 3 5 6 7 8
 Reading a token
 Now at end of input.
-syntax error, unexpected $end, expecting 'g'
+syntax error, unexpected end of file, expecting 'g'
 Error: popping token 'f' (<*>/<field2>/e printer)
 Stack now 0 1 3 5 6 7
 Error: popping token 'e' (<*>/<field2>/e printer)
@@ -1288,7 +1288,7 @@ Error: popping token 'b' (<field1> printer)
 Stack now 0 1
 Error: popping token 'a' (<*>/<field2>/e printer)
 Stack now 0
-Cleanup: discarding lookahead token $end ()
+Cleanup: discarding lookahead token "end of file" ()
 Stack now 0
 ]])

@@ -1511,7 +1511,7 @@ Entering state 3
 Stack now 0 1 3
 Reading a token
 Now at end of input.
-Cleanup: discarding lookahead token $end ()
+Cleanup: discarding lookahead token "end of file" ()
 Stack now 0 1 3
 Cleanup: popping token error ()
 Cleanup: popping token 'a' ('a')
@@ -1685,7 +1685,7 @@ DESTROY 2
 Stack now 0 2
 Error: popping nterm $@1 (: )
 Stack now 0
-Cleanup: discarding lookahead token $end (: )
+Cleanup: discarding lookahead token "end of file" (: )
 Stack now 0
 ]])

@@ -1330,7 +1330,7 @@ AT_PARSER_CHECK([[input aaaae]], [[2]], [[]],
 ]])

 AT_PARSER_CHECK([[input aaaaE]], [[2]], [[]],
-[[exception caught: syntax error, unexpected $end, expecting 'a'
+[[exception caught: syntax error, unexpected end of file, expecting 'a'
 ]])

 AT_PARSER_CHECK([[input aaaaT]], [[1]])
@@ -346,7 +346,7 @@ m4_pushdef([AT_NONASSOC_AND_EOF_CHECK],
 [AT_BISON_CHECK([$1[ -o input.c input.y]])
 AT_COMPILE([input])

-m4_pushdef([AT_EXPECTING], [m4_if($2, [correct], [[, expecting $end]])])
+m4_pushdef([AT_EXPECTING], [m4_if($2, [correct], [[, expecting end of file]])])

 AT_PARSER_CHECK([input '0<0'])
 AT_PARSER_CHECK([input '0<0<0'], [1], [],
@@ -509,50 +509,50 @@ m4_pushdef([AT_PREVIOUS_STATE_INPUT], [[a]])
 AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr]],
                           [AT_PREVIOUS_STATE_GRAMMAR],
                           [AT_PREVIOUS_STATE_INPUT],
-                           [[$end]], [[none]])
+                           [[end of file]], [[none]])
 AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr
                             %glr-parser]],
                           [AT_PREVIOUS_STATE_GRAMMAR],
                           [AT_PREVIOUS_STATE_INPUT],
-                           [[$end]], [[none]])
+                           [[end of file]], [[none]])
 AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr
                             %language "c++"]],
                           [AT_PREVIOUS_STATE_GRAMMAR],
                           [AT_PREVIOUS_STATE_INPUT],
-                           [[$end]], [[none]])
+                           [[end of file]], [[none]])
 AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr
                             %language "java"]],
                           [AT_PREVIOUS_STATE_GRAMMAR],
                           [AT_PREVIOUS_STATE_INPUT],
-                           [[end of input]], [[none]])
+                           [[end of file]], [[none]])

 # Even canonical LR doesn't foresee the error for 'a'!
 AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr
                             %define lr.default-reduction consistent]],
                           [AT_PREVIOUS_STATE_GRAMMAR],
                           [AT_PREVIOUS_STATE_INPUT],
-                           [[$end]], [[ab]])
+                           [[end of file]], [[ab]])
 AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr
                             %define lr.default-reduction accepting]],
                           [AT_PREVIOUS_STATE_GRAMMAR],
                           [AT_PREVIOUS_STATE_INPUT],
-                           [[$end]], [[ab]])
+                           [[end of file]], [[ab]])
 AT_CONSISTENT_ERRORS_CHECK([[%define lr.type canonical-lr]],
                           [AT_PREVIOUS_STATE_GRAMMAR],
                           [AT_PREVIOUS_STATE_INPUT],
-                           [[$end]], [[ab]])
+                           [[end of file]], [[ab]])

 # Only LAC gets it right.  In C.
 AT_CONSISTENT_ERRORS_CHECK([[%define lr.type canonical-lr
                             %define parse.lac full]],
                           [AT_PREVIOUS_STATE_GRAMMAR],
                           [AT_PREVIOUS_STATE_INPUT],
-                           [[$end]], [[b]])
+                           [[end of file]], [[b]])
 AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr
                             %define parse.lac full]],
                           [AT_PREVIOUS_STATE_GRAMMAR],
                           [AT_PREVIOUS_STATE_INPUT],
-                           [[$end]], [[b]])
+                           [[end of file]], [[b]])

 # Only LAC gets it right.  In C++.
 AT_CONSISTENT_ERRORS_CHECK([[%language "c++"
@@ -560,13 +560,13 @@ AT_CONSISTENT_ERRORS_CHECK([[%language "c++"
                             %define parse.lac full]],
                           [AT_PREVIOUS_STATE_GRAMMAR],
                           [AT_PREVIOUS_STATE_INPUT],
-                           [[$end]], [[b]])
+                           [[end of file]], [[b]])
 AT_CONSISTENT_ERRORS_CHECK([[%language "c++"
                             %define lr.type ielr
                             %define parse.lac full]],
                           [AT_PREVIOUS_STATE_GRAMMAR],
                           [AT_PREVIOUS_STATE_INPUT],
-                           [[$end]], [[b]])
+                           [[end of file]], [[b]])

 m4_popdef([AT_PREVIOUS_STATE_GRAMMAR])
 m4_popdef([AT_PREVIOUS_STATE_INPUT])
@@ -638,11 +638,11 @@ AT_CONSISTENT_ERRORS_CHECK([[%define lr.default-reduction consistent]],
 AT_CONSISTENT_ERRORS_CHECK([[%define lr.default-reduction accepting]],
                           [AT_USER_ACTION_GRAMMAR],
                           [AT_USER_ACTION_INPUT],
-                           [[$end]], [[a]])
+                           [[end of file]], [[a]])
 AT_CONSISTENT_ERRORS_CHECK([[%define lr.type canonical-lr]],
                           [AT_USER_ACTION_GRAMMAR],
                           [AT_USER_ACTION_INPUT],
-                           [[$end]], [[a]])
+                           [[end of file]], [[a]])

 AT_CONSISTENT_ERRORS_CHECK([[%define parse.lac full]],
                           [AT_USER_ACTION_GRAMMAR],
@@ -652,7 +652,7 @@ AT_CONSISTENT_ERRORS_CHECK([[%define parse.lac full
                             %define lr.default-reduction accepting]],
                           [AT_USER_ACTION_GRAMMAR],
                           [AT_USER_ACTION_INPUT],
-                           [[$end]], [[none]])
+                           [[end of file]], [[none]])

 m4_popdef([AT_USER_ACTION_GRAMMAR])
 m4_popdef([AT_USER_ACTION_INPUT])
@@ -1742,7 +1742,7 @@ Stack 1 Entering state 2
 Now at end of input.
 Removing dead stacks.
 Rename stack 1 -> 0.
-On stack 0, shifting token $end ()
+On stack 0, shifting token "end of file" ()
 Stack 0 now in state #5
 Ambiguity detected.
 Option 1,
@@ -1760,7 +1760,7 @@ Option 2,
    d <empty>

 syntax is ambiguous
-Cleanup: popping token $end ()
+Cleanup: popping token "end of file" ()
 Cleanup: popping unresolved nterm start ()
 Cleanup: popping nterm d ()
 Cleanup: popping token 'c' ()
@@ -701,8 +701,9 @@ static const yytype_int8 yyrline[] =
 };
 static const char *const yytname[] =
 {
-  "$end", "error", "$undefined", "\"if\"", "\"const\"", "\"then\"",
-  "\"else\"", "$accept", "statement", "struct_stat", "if", "else", YY_NULLPTR
+  "\"end of file\"", "error", "$undefined", "\"if\"", "\"const\"",
+  "\"then\"", "\"else\"", "$accept", "statement", "struct_stat", "if",
+  "else", YY_NULLPTR
 };
 static const yytype_int16 yytoknum[] =
 {
@@ -967,7 +968,7 @@ Entering state 1
 Stack now 0 1
 Reading a token
 Next token is token 'a' (PRINTER)
-syntax error, unexpected 'a', expecting $end
+syntax error, unexpected 'a', expecting end of file
 Error: popping nterm start ()
 Stack now 0
 Cleanup: discarding lookahead token 'a' (PRINTER)
@@ -1177,7 +1178,7 @@ AT_BISON_CHECK([[-o input.c input.y]])
 AT_COMPILE([[input]])
 AT_PARSER_CHECK([[input]], [[1]], [],
 [[syntax error, unexpected 'a', expecting 123456789112345678921234567893123456789412345678951234567896123A or 123456789112345678921234567893123456789412345678951234567896123B
-syntax error, unexpected $end, expecting 123456789112345678921234567893123456789412345678951234567896123A or 123456789112345678921234567893123456789412345678951234567896123B
+syntax error, unexpected end of file, expecting 123456789112345678921234567893123456789412345678951234567896123A or 123456789112345678921234567893123456789412345678951234567896123B
 ]])

 AT_CLEANUP
@@ -1445,10 +1446,10 @@ Entering state 0
 Stack now 0
 Reading a token
 Now at end of input.
-LAC: initial context established for $end
-LAC: checking lookahead $end: R2 G3 R2 G5 R2 G6 R2 G7 R2 G8 R2 G9 R2 G10 R2 G11 R2 (max size exceeded)
+LAC: initial context established for "end of file"
+LAC: checking lookahead "end of file": R2 G3 R2 G5 R2 G6 R2 G7 R2 G8 R2 G9 R2 G10 R2 G11 R2 (max size exceeded)
 memory exhausted
-Cleanup: discarding lookahead token $end ()
+Cleanup: discarding lookahead token "end of file" ()
 Stack now 0
 ]])

@@ -1464,7 +1465,7 @@ Next token is token $undefined ()
 LAC: initial context established for $undefined
 LAC: checking lookahead $undefined: Always Err
 Constructing syntax error message
-LAC: checking lookahead $end: R2 G3 R2 G5 R2 G6 R2 G7 R2 G8 R2 G9 R2 G10 R2 G11 R2 (max size exceeded)
+LAC: checking lookahead "end of file": R2 G3 R2 G5 R2 G6 R2 G7 R2 G8 R2 G9 R2 G10 R2 G11 R2 (max size exceeded)
 syntax error
 memory exhausted
 Cleanup: discarding lookahead token $undefined ()
@@ -120,13 +120,13 @@ AT_GRAM])
 AT_BISON_CHECK([[--skeleton=yacc.c -o input-cmd-line.c input-cmd-line.y]])
 AT_COMPILE([[input-cmd-line]])
 AT_PARSER_CHECK([[input-cmd-line]], [[1]], [],
-[[syntax error, unexpected 'a', expecting $end
+[[syntax error, unexpected 'a', expecting end of file
 ]])

 AT_BISON_CHECK([[-o input-gram.c input-gram.y]])
 AT_COMPILE([[input-gram]])
 AT_PARSER_CHECK([[input-gram]], [[1]], [],
-[[syntax error, unexpected 'a', expecting $end
+[[syntax error, unexpected 'a', expecting end of file
 ]])

 m4_popdef([AT_GRAM])