skeletons: use "end of file" instead of "$end"

The name "$end" is nice in the report, in particular it avoids that
pointed-rules (aka items) be too long.  It also helps keeping them
"standard".

But it is bad in error messages, we should report "end of file" (or
maybe "end of input", this is debatable).  So, unless the user already
defined the alias for the error token herself, make it "end of file".
It should even be translated if the user already translated some
tokens, so that there is now no strong reason to redefine the $end
token.

* src/output.c (prepare_symbol_names): Issue "end of file" instead of
"$end".

* data/skeletons/lalr1.java (yytnamerr_): Remove the renaming hack.

* build-aux/update-test: Accept files with names containing a "+",
such as c++.at.
* tests/actions.at, tests/c++.at, tests/conflicts.at,
* tests/glr-regression.at, tests/regression.at, tests/skeletons.at:
Adjust.
This commit is contained in:
Akim Demaille
2020-04-12 09:54:46 +02:00
parent a555b41990
commit 72c9fa4510
13 changed files with 53 additions and 50 deletions

5
TODO
View File

@@ -6,6 +6,7 @@
- i18n in Java - i18n in Java
- symbol.type_get should be kind_get, and it's not documented. - symbol.type_get should be kind_get, and it's not documented.
- token code instead of token user number etc. - token code instead of token user number etc.
- YYERRCODE and "end of file" and translation
** User token number, internal symbol number, external token number, etc. ** User token number, internal symbol number, external token number, etc.
There is some confusion over these terms, which is even a problem for There is some confusion over these terms, which is even a problem for
@@ -43,10 +44,6 @@ I feel it's ugly to use the GNU style to declare functions in the doc. It
generates tons of white space in the page, and may contribute to bad page generates tons of white space in the page, and may contribute to bad page
breaks. breaks.
** improve syntax errors (UTF-8, internationalization)
While at it, we should stop using "$end" by default, in favor of "end of
file", or "end of input", whatever. See how lalr1.java does that.
** consistency ** consistency
token vs terminal, variable vs non terminal. token vs terminal, variable vs non terminal.

View File

@@ -102,7 +102,7 @@ def update(at_file, logfile):
def process(logfile): def process(logfile):
log = contents(logfile) log = contents(logfile)
# Look for the file to update. # Look for the file to update.
m = re.search(r'^\d+\. ([-\w]+\.at):\d+: ', log, re.MULTILINE) m = re.search(r'^\d+\. ([\-\+\w]+\.at):\d+: ', log, re.MULTILINE)
if not m: if not m:
trace("no diff found:", logfile) trace("no diff found:", logfile)
return return

View File

@@ -1080,9 +1080,6 @@ b4_dollar_popdef[]dnl
return yyr.toString (); return yyr.toString ();
} }
} }
else if (yystr.equals ("$end"))
return "end of input";
return yystr; return yystr;
} }

View File

@@ -29,9 +29,9 @@ run -noerr 0 9 -p
cat >input <<EOF cat >input <<EOF
(1+2) * (1+2) *
EOF EOF
run 1 'err: 1.8-2.0: syntax error, unexpected end-of-line, expecting ( or number' run 1 'err: 1.8-2.0: syntax error, unexpected end of line, expecting ( or number'
cat >input <<EOF cat >input <<EOF
1 / (2 - 2) 1 / (2 - 2)
EOF EOF
run 1 'err: 1.1-11: error: division by zero" run 1 'err: 1.1-11: error: division by zero'

View File

@@ -49,8 +49,7 @@
SLASH "/" SLASH "/"
LPAREN "(" LPAREN "("
RPAREN ")" RPAREN ")"
EOL "end-of-line" EOL "end of line"
EOF 0 "end-of-file"
; ;
%token <int> NUM "number" %token <int> NUM "number"

View File

@@ -54,6 +54,6 @@
. yyerror (yylloc, nerrs, "syntax error, invalid character"); continue; . yyerror (yylloc, nerrs, "syntax error, invalid character"); continue;
<<EOF>> return TOK_EOF; <<EOF>> return TOK_YYEOF;
%% %%
/* Epilogue (C code). */ /* Epilogue (C code). */

View File

@@ -192,6 +192,9 @@ xescape_trigraphs (const char *src)
static void static void
prepare_symbol_names (char const *muscle_name) prepare_symbol_names (char const *muscle_name)
{ {
const bool eof_is_user_defined
= !endtoken->alias || STRNEQ (endtoken->alias->tag, "$end");
/* We assume that the table will be output starting at column 2. */ /* We assume that the table will be output starting at column 2. */
const bool quote = STREQ (muscle_name, "tname"); const bool quote = STREQ (muscle_name, "tname");
bool has_translations = false; bool has_translations = false;
@@ -201,10 +204,16 @@ prepare_symbol_names (char const *muscle_name)
set_quoting_flags (qo, QA_SPLIT_TRIGRAPHS); set_quoting_flags (qo, QA_SPLIT_TRIGRAPHS);
for (int i = 0; i < nsyms; i++) for (int i = 0; i < nsyms; i++)
{ {
/* Use "end of file" rather than "$end". But keep "$end" in the
reports, it's shorter and more consistent. */
const char *tag
= !eof_is_user_defined && symbols[i]->content == endtoken->content
? "\"end of file\""
: symbols[i]->tag;
char *cp char *cp
= symbols[i]->tag[0] == '"' && !quote = tag[0] == '"' && !quote
? xescape_trigraphs (symbols[i]->tag) ? xescape_trigraphs (tag)
: quotearg_alloc (symbols[i]->tag, -1, qo); : quotearg_alloc (tag, -1, qo);
/* Width of the next token, including the two quotes, the /* Width of the next token, including the two quotes, the
comma and the space. */ comma and the space. */
int width int width

View File

@@ -1146,7 +1146,7 @@ Entering state 6
Stack now 0 1 3 5 6 Stack now 0 1 3 5 6
Reading a token Reading a token
Now at end of input. Now at end of input.
1.5: syntax error, unexpected $end, expecting 'e' 1.5: syntax error, unexpected end of file, expecting 'e'
Error: popping token 'd' (1.4: <> printer for 'd' @ 4) Error: popping token 'd' (1.4: <> printer for 'd' @ 4)
Stack now 0 1 3 5 Stack now 0 1 3 5
Error: popping token 'c' (1.3: 'b'/'c' printer for 'c' @ 3) Error: popping token 'c' (1.3: 'b'/'c' printer for 'c' @ 3)
@@ -1155,7 +1155,7 @@ Error: popping token 'b' (1.2: 'b'/'c' printer for 'b' @ 2)
Stack now 0 1 Stack now 0 1
Error: popping token 'a' (1.1: <> printer for 'a' @ 1) Error: popping token 'a' (1.1: <> printer for 'a' @ 1)
Stack now 0 Stack now 0
Cleanup: discarding lookahead token $end (1.5: ) Cleanup: discarding lookahead token "end of file" (1.5: )
Stack now 0 Stack now 0
]]) ]])
@@ -1275,7 +1275,7 @@ Entering state 8
Stack now 0 1 3 5 6 7 8 Stack now 0 1 3 5 6 7 8
Reading a token Reading a token
Now at end of input. Now at end of input.
syntax error, unexpected $end, expecting 'g' syntax error, unexpected end of file, expecting 'g'
Error: popping token 'f' (<*>/<field2>/e printer) Error: popping token 'f' (<*>/<field2>/e printer)
Stack now 0 1 3 5 6 7 Stack now 0 1 3 5 6 7
Error: popping token 'e' (<*>/<field2>/e printer) Error: popping token 'e' (<*>/<field2>/e printer)
@@ -1288,7 +1288,7 @@ Error: popping token 'b' (<field1> printer)
Stack now 0 1 Stack now 0 1
Error: popping token 'a' (<*>/<field2>/e printer) Error: popping token 'a' (<*>/<field2>/e printer)
Stack now 0 Stack now 0
Cleanup: discarding lookahead token $end () Cleanup: discarding lookahead token "end of file" ()
Stack now 0 Stack now 0
]]) ]])
@@ -1511,7 +1511,7 @@ Entering state 3
Stack now 0 1 3 Stack now 0 1 3
Reading a token Reading a token
Now at end of input. Now at end of input.
Cleanup: discarding lookahead token $end () Cleanup: discarding lookahead token "end of file" ()
Stack now 0 1 3 Stack now 0 1 3
Cleanup: popping token error () Cleanup: popping token error ()
Cleanup: popping token 'a' ('a') Cleanup: popping token 'a' ('a')
@@ -1685,7 +1685,7 @@ DESTROY 2
Stack now 0 2 Stack now 0 2
Error: popping nterm $@1 (: ) Error: popping nterm $@1 (: )
Stack now 0 Stack now 0
Cleanup: discarding lookahead token $end (: ) Cleanup: discarding lookahead token "end of file" (: )
Stack now 0 Stack now 0
]]) ]])

View File

@@ -1330,7 +1330,7 @@ AT_PARSER_CHECK([[input aaaae]], [[2]], [[]],
]]) ]])
AT_PARSER_CHECK([[input aaaaE]], [[2]], [[]], AT_PARSER_CHECK([[input aaaaE]], [[2]], [[]],
[[exception caught: syntax error, unexpected $end, expecting 'a' [[exception caught: syntax error, unexpected end of file, expecting 'a'
]]) ]])
AT_PARSER_CHECK([[input aaaaT]], [[1]]) AT_PARSER_CHECK([[input aaaaT]], [[1]])

View File

@@ -346,7 +346,7 @@ m4_pushdef([AT_NONASSOC_AND_EOF_CHECK],
[AT_BISON_CHECK([$1[ -o input.c input.y]]) [AT_BISON_CHECK([$1[ -o input.c input.y]])
AT_COMPILE([input]) AT_COMPILE([input])
m4_pushdef([AT_EXPECTING], [m4_if($2, [correct], [[, expecting $end]])]) m4_pushdef([AT_EXPECTING], [m4_if($2, [correct], [[, expecting end of file]])])
AT_PARSER_CHECK([input '0<0']) AT_PARSER_CHECK([input '0<0'])
AT_PARSER_CHECK([input '0<0<0'], [1], [], AT_PARSER_CHECK([input '0<0<0'], [1], [],
@@ -509,50 +509,50 @@ m4_pushdef([AT_PREVIOUS_STATE_INPUT], [[a]])
AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr]], AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr]],
[AT_PREVIOUS_STATE_GRAMMAR], [AT_PREVIOUS_STATE_GRAMMAR],
[AT_PREVIOUS_STATE_INPUT], [AT_PREVIOUS_STATE_INPUT],
[[$end]], [[none]]) [[end of file]], [[none]])
AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr
%glr-parser]], %glr-parser]],
[AT_PREVIOUS_STATE_GRAMMAR], [AT_PREVIOUS_STATE_GRAMMAR],
[AT_PREVIOUS_STATE_INPUT], [AT_PREVIOUS_STATE_INPUT],
[[$end]], [[none]]) [[end of file]], [[none]])
AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr
%language "c++"]], %language "c++"]],
[AT_PREVIOUS_STATE_GRAMMAR], [AT_PREVIOUS_STATE_GRAMMAR],
[AT_PREVIOUS_STATE_INPUT], [AT_PREVIOUS_STATE_INPUT],
[[$end]], [[none]]) [[end of file]], [[none]])
AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr
%language "java"]], %language "java"]],
[AT_PREVIOUS_STATE_GRAMMAR], [AT_PREVIOUS_STATE_GRAMMAR],
[AT_PREVIOUS_STATE_INPUT], [AT_PREVIOUS_STATE_INPUT],
[[end of input]], [[none]]) [[end of file]], [[none]])
# Even canonical LR doesn't foresee the error for 'a'! # Even canonical LR doesn't foresee the error for 'a'!
AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr
%define lr.default-reduction consistent]], %define lr.default-reduction consistent]],
[AT_PREVIOUS_STATE_GRAMMAR], [AT_PREVIOUS_STATE_GRAMMAR],
[AT_PREVIOUS_STATE_INPUT], [AT_PREVIOUS_STATE_INPUT],
[[$end]], [[ab]]) [[end of file]], [[ab]])
AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr
%define lr.default-reduction accepting]], %define lr.default-reduction accepting]],
[AT_PREVIOUS_STATE_GRAMMAR], [AT_PREVIOUS_STATE_GRAMMAR],
[AT_PREVIOUS_STATE_INPUT], [AT_PREVIOUS_STATE_INPUT],
[[$end]], [[ab]]) [[end of file]], [[ab]])
AT_CONSISTENT_ERRORS_CHECK([[%define lr.type canonical-lr]], AT_CONSISTENT_ERRORS_CHECK([[%define lr.type canonical-lr]],
[AT_PREVIOUS_STATE_GRAMMAR], [AT_PREVIOUS_STATE_GRAMMAR],
[AT_PREVIOUS_STATE_INPUT], [AT_PREVIOUS_STATE_INPUT],
[[$end]], [[ab]]) [[end of file]], [[ab]])
# Only LAC gets it right. In C. # Only LAC gets it right. In C.
AT_CONSISTENT_ERRORS_CHECK([[%define lr.type canonical-lr AT_CONSISTENT_ERRORS_CHECK([[%define lr.type canonical-lr
%define parse.lac full]], %define parse.lac full]],
[AT_PREVIOUS_STATE_GRAMMAR], [AT_PREVIOUS_STATE_GRAMMAR],
[AT_PREVIOUS_STATE_INPUT], [AT_PREVIOUS_STATE_INPUT],
[[$end]], [[b]]) [[end of file]], [[b]])
AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr
%define parse.lac full]], %define parse.lac full]],
[AT_PREVIOUS_STATE_GRAMMAR], [AT_PREVIOUS_STATE_GRAMMAR],
[AT_PREVIOUS_STATE_INPUT], [AT_PREVIOUS_STATE_INPUT],
[[$end]], [[b]]) [[end of file]], [[b]])
# Only LAC gets it right. In C++. # Only LAC gets it right. In C++.
AT_CONSISTENT_ERRORS_CHECK([[%language "c++" AT_CONSISTENT_ERRORS_CHECK([[%language "c++"
@@ -560,13 +560,13 @@ AT_CONSISTENT_ERRORS_CHECK([[%language "c++"
%define parse.lac full]], %define parse.lac full]],
[AT_PREVIOUS_STATE_GRAMMAR], [AT_PREVIOUS_STATE_GRAMMAR],
[AT_PREVIOUS_STATE_INPUT], [AT_PREVIOUS_STATE_INPUT],
[[$end]], [[b]]) [[end of file]], [[b]])
AT_CONSISTENT_ERRORS_CHECK([[%language "c++" AT_CONSISTENT_ERRORS_CHECK([[%language "c++"
%define lr.type ielr %define lr.type ielr
%define parse.lac full]], %define parse.lac full]],
[AT_PREVIOUS_STATE_GRAMMAR], [AT_PREVIOUS_STATE_GRAMMAR],
[AT_PREVIOUS_STATE_INPUT], [AT_PREVIOUS_STATE_INPUT],
[[$end]], [[b]]) [[end of file]], [[b]])
m4_popdef([AT_PREVIOUS_STATE_GRAMMAR]) m4_popdef([AT_PREVIOUS_STATE_GRAMMAR])
m4_popdef([AT_PREVIOUS_STATE_INPUT]) m4_popdef([AT_PREVIOUS_STATE_INPUT])
@@ -638,11 +638,11 @@ AT_CONSISTENT_ERRORS_CHECK([[%define lr.default-reduction consistent]],
AT_CONSISTENT_ERRORS_CHECK([[%define lr.default-reduction accepting]], AT_CONSISTENT_ERRORS_CHECK([[%define lr.default-reduction accepting]],
[AT_USER_ACTION_GRAMMAR], [AT_USER_ACTION_GRAMMAR],
[AT_USER_ACTION_INPUT], [AT_USER_ACTION_INPUT],
[[$end]], [[a]]) [[end of file]], [[a]])
AT_CONSISTENT_ERRORS_CHECK([[%define lr.type canonical-lr]], AT_CONSISTENT_ERRORS_CHECK([[%define lr.type canonical-lr]],
[AT_USER_ACTION_GRAMMAR], [AT_USER_ACTION_GRAMMAR],
[AT_USER_ACTION_INPUT], [AT_USER_ACTION_INPUT],
[[$end]], [[a]]) [[end of file]], [[a]])
AT_CONSISTENT_ERRORS_CHECK([[%define parse.lac full]], AT_CONSISTENT_ERRORS_CHECK([[%define parse.lac full]],
[AT_USER_ACTION_GRAMMAR], [AT_USER_ACTION_GRAMMAR],
@@ -652,7 +652,7 @@ AT_CONSISTENT_ERRORS_CHECK([[%define parse.lac full
%define lr.default-reduction accepting]], %define lr.default-reduction accepting]],
[AT_USER_ACTION_GRAMMAR], [AT_USER_ACTION_GRAMMAR],
[AT_USER_ACTION_INPUT], [AT_USER_ACTION_INPUT],
[[$end]], [[none]]) [[end of file]], [[none]])
m4_popdef([AT_USER_ACTION_GRAMMAR]) m4_popdef([AT_USER_ACTION_GRAMMAR])
m4_popdef([AT_USER_ACTION_INPUT]) m4_popdef([AT_USER_ACTION_INPUT])

View File

@@ -1742,7 +1742,7 @@ Stack 1 Entering state 2
Now at end of input. Now at end of input.
Removing dead stacks. Removing dead stacks.
Rename stack 1 -> 0. Rename stack 1 -> 0.
On stack 0, shifting token $end () On stack 0, shifting token "end of file" ()
Stack 0 now in state #5 Stack 0 now in state #5
Ambiguity detected. Ambiguity detected.
Option 1, Option 1,
@@ -1760,7 +1760,7 @@ Option 2,
d <empty> d <empty>
syntax is ambiguous syntax is ambiguous
Cleanup: popping token $end () Cleanup: popping token "end of file" ()
Cleanup: popping unresolved nterm start () Cleanup: popping unresolved nterm start ()
Cleanup: popping nterm d () Cleanup: popping nterm d ()
Cleanup: popping token 'c' () Cleanup: popping token 'c' ()

View File

@@ -701,8 +701,9 @@ static const yytype_int8 yyrline[] =
}; };
static const char *const yytname[] = static const char *const yytname[] =
{ {
"$end", "error", "$undefined", "\"if\"", "\"const\"", "\"then\"", "\"end of file\"", "error", "$undefined", "\"if\"", "\"const\"",
"\"else\"", "$accept", "statement", "struct_stat", "if", "else", YY_NULLPTR "\"then\"", "\"else\"", "$accept", "statement", "struct_stat", "if",
"else", YY_NULLPTR
}; };
static const yytype_int16 yytoknum[] = static const yytype_int16 yytoknum[] =
{ {
@@ -967,7 +968,7 @@ Entering state 1
Stack now 0 1 Stack now 0 1
Reading a token Reading a token
Next token is token 'a' (PRINTER) Next token is token 'a' (PRINTER)
syntax error, unexpected 'a', expecting $end syntax error, unexpected 'a', expecting end of file
Error: popping nterm start () Error: popping nterm start ()
Stack now 0 Stack now 0
Cleanup: discarding lookahead token 'a' (PRINTER) Cleanup: discarding lookahead token 'a' (PRINTER)
@@ -1177,7 +1178,7 @@ AT_BISON_CHECK([[-o input.c input.y]])
AT_COMPILE([[input]]) AT_COMPILE([[input]])
AT_PARSER_CHECK([[input]], [[1]], [], AT_PARSER_CHECK([[input]], [[1]], [],
[[syntax error, unexpected 'a', expecting 123456789112345678921234567893123456789412345678951234567896123A or 123456789112345678921234567893123456789412345678951234567896123B [[syntax error, unexpected 'a', expecting 123456789112345678921234567893123456789412345678951234567896123A or 123456789112345678921234567893123456789412345678951234567896123B
syntax error, unexpected $end, expecting 123456789112345678921234567893123456789412345678951234567896123A or 123456789112345678921234567893123456789412345678951234567896123B syntax error, unexpected end of file, expecting 123456789112345678921234567893123456789412345678951234567896123A or 123456789112345678921234567893123456789412345678951234567896123B
]]) ]])
AT_CLEANUP AT_CLEANUP
@@ -1445,10 +1446,10 @@ Entering state 0
Stack now 0 Stack now 0
Reading a token Reading a token
Now at end of input. Now at end of input.
LAC: initial context established for $end LAC: initial context established for "end of file"
LAC: checking lookahead $end: R2 G3 R2 G5 R2 G6 R2 G7 R2 G8 R2 G9 R2 G10 R2 G11 R2 (max size exceeded) LAC: checking lookahead "end of file": R2 G3 R2 G5 R2 G6 R2 G7 R2 G8 R2 G9 R2 G10 R2 G11 R2 (max size exceeded)
memory exhausted memory exhausted
Cleanup: discarding lookahead token $end () Cleanup: discarding lookahead token "end of file" ()
Stack now 0 Stack now 0
]]) ]])
@@ -1464,7 +1465,7 @@ Next token is token $undefined ()
LAC: initial context established for $undefined LAC: initial context established for $undefined
LAC: checking lookahead $undefined: Always Err LAC: checking lookahead $undefined: Always Err
Constructing syntax error message Constructing syntax error message
LAC: checking lookahead $end: R2 G3 R2 G5 R2 G6 R2 G7 R2 G8 R2 G9 R2 G10 R2 G11 R2 (max size exceeded) LAC: checking lookahead "end of file": R2 G3 R2 G5 R2 G6 R2 G7 R2 G8 R2 G9 R2 G10 R2 G11 R2 (max size exceeded)
syntax error syntax error
memory exhausted memory exhausted
Cleanup: discarding lookahead token $undefined () Cleanup: discarding lookahead token $undefined ()

View File

@@ -120,13 +120,13 @@ AT_GRAM])
AT_BISON_CHECK([[--skeleton=yacc.c -o input-cmd-line.c input-cmd-line.y]]) AT_BISON_CHECK([[--skeleton=yacc.c -o input-cmd-line.c input-cmd-line.y]])
AT_COMPILE([[input-cmd-line]]) AT_COMPILE([[input-cmd-line]])
AT_PARSER_CHECK([[input-cmd-line]], [[1]], [], AT_PARSER_CHECK([[input-cmd-line]], [[1]], [],
[[syntax error, unexpected 'a', expecting $end [[syntax error, unexpected 'a', expecting end of file
]]) ]])
AT_BISON_CHECK([[-o input-gram.c input-gram.y]]) AT_BISON_CHECK([[-o input-gram.c input-gram.y]])
AT_COMPILE([[input-gram]]) AT_COMPILE([[input-gram]])
AT_PARSER_CHECK([[input-gram]], [[1]], [], AT_PARSER_CHECK([[input-gram]], [[1]], [],
[[syntax error, unexpected 'a', expecting $end [[syntax error, unexpected 'a', expecting end of file
]]) ]])
m4_popdef([AT_GRAM]) m4_popdef([AT_GRAM])