m4: catch suspicions of unevaluated macros

Check in m4's output if there are sequences such as m4_foo or b4_foo,
which are probably resulting from incorrect m4 processing.

It actually already is useful:

- it caught a leaking b4_lac_if leaking from glr.c, where LAC is not
  supported, hence b4_lac_if is not defined.

- it also caught references to location.hh in position.hh when
  location.hh does not exist.

- while making "Code injection" robust to these new warnings (it is
  its very purpose to let b4_canary pass unevaluated), I saw that it
  did not check lalr1.d, and when adding lalr1.d, it revealed it did
  underquote ocurrences of token value types.

* src/scan-skel.l (macro): New abbreviation.
Use it.
* data/skeletons/glr.c: Don't use b4_lac_if, we don't have it.
* data/skeletons/location.cc: Don't generate position.hh when we don't
generate location.hh.
* data/skeletons/d.m4 (b4_basic_symbol_constructor_define): Fix
underquotation.
* data/skeletons/bison.m4 (b4_canary): New.
* tests/input.at (Code injection): Use it, and check lalr1.d too.
This commit is contained in:
Akim Demaille
2021-08-07 09:30:24 +02:00
parent 6118406c3e
commit 80db1029e6
7 changed files with 57 additions and 40 deletions
-4
View File
@@ -108,10 +108,6 @@ enough.
*** calc.at
Stop hard-coding "Calc". Adjust local.at (look for FIXME).
** A dev warning for b4_
Maybe we should check for m4_ and b4_ leaking out of the m4 processing, as
Autoconf does. It would have caught over-quotation issues.
** doc
I feel it's ugly to use the GNU style to declare functions in the doc. It
generates tons of white space in the page, and may contribute to bad page
+7
View File
@@ -261,6 +261,13 @@ m4_define([b4_fatal_at],
[b4_error([[fatal]], $@)dnl
m4_exit(1)])
# b4_canary(MSG)
# --------------
# Issue a warning on stderr and in the output. Used in the test suite
# to catch spurious m4 evaluations.
m4_define([b4_canary],
[m4_errprintn([dead canary: $1])DEAD CANARY($1)])
## ------------ ##
## Data Types. ##
+2 -2
View File
@@ -586,7 +586,7 @@ alias ACCEPT = ]b4_parser_class[.YYACCEPT;]])[]])[
# Create Symbol struct constructors for all the visible types.
m4_define([b4_basic_symbol_constructor_define],
[b4_token_visible_if([$1],
[ this(TokenKind token]b4_symbol_if([$1], [has_type],
[[ this(TokenKind token]b4_symbol_if([$1], [has_type],
[[, ]b4_union_if([], [[typeof(YYSemanticType.]])b4_symbol([$1], [type])dnl
[]b4_union_if([], [[) ]])[ val]])[]b4_locations_if([[, Location loc]])[)
{
@@ -601,7 +601,7 @@ m4_define([b4_basic_symbol_constructor_define],
value_.]b4_symbol([$1], [type])[ = val;]])])[]b4_locations_if([
location_ = loc;])[
}
])])
]])])
# b4_symbol_type_define
+2 -7
View File
@@ -2204,12 +2204,7 @@ yy_syntax_error_arguments (const yyGLRStack* yystackp,
- Don't assume there isn't a lookahead just because this state is a
consistent state with a default action. There might have been a
previous inconsistent state, consistent state with a non-default
action, or user semantic action that manipulated yychar.]b4_lac_if([[
In the first two cases, it might appear that the current syntax
error should have been detected in the previous state when yy_lac
was invoked. However, at that time, there might have been a
different syntax error that discarded a different initial context
during error recovery, leaving behind the current lookahead.]], [[
action, or user semantic action that manipulated yychar.
- Of course, the expected token list depends on states to have
correct lookahead information, and it depends on the parser not
to perform extra reductions after fetching a lookahead from the
@@ -2217,7 +2212,7 @@ yy_syntax_error_arguments (const yyGLRStack* yystackp,
(from LALR or IELR) and default reductions corrupt the expected
token list. However, the list is correct for canonical LR with
one exception: it will still contain any token that will not be
accepted due to an error action in a later state.]])[
accepted due to an error action in a later state.
*/
if (yytoken != ]b4_symbol(empty, kind)[)
{
+10 -7
View File
@@ -19,13 +19,6 @@ m4_pushdef([b4_copyright_years],
[2002-2015, 2018-2021])
# b4_position_file
# ----------------
# Name of the file containing the position class, if we want this file.
b4_header_if([b4_required_version_if([30200], [],
[m4_define([b4_position_file], [position.hh])])])])
# b4_location_file
# ----------------
# Name of the file containing the position/location class,
@@ -54,6 +47,16 @@ m4_ifdef([b4_location_file],
])
# b4_position_file
# ----------------
# Name of the file containing the position class, if we want this file.
b4_header_if(
[b4_required_version_if(
[30200], [],
[m4_ifdef([b4_location_file],
[m4_define([b4_position_file], [position.hh])])])])
# b4_location_define
# ------------------
+18 -1
View File
@@ -53,6 +53,9 @@ static void fail_for_invalid_at (char const *at);
static void output_mapped_file (char const *name);
%}
/* Identifiers of our M4 macros. */
macro [bm]4_[a-zA-Z_0-9]*
%x SC_AT_DIRECTIVE_ARGS
%x SC_AT_DIRECTIVE_SKIP_WS
@@ -87,7 +90,21 @@ static void output_mapped_file (char const *name);
/* This pattern must not match more than the previous @ patterns. */
@[^@{}''(\n]* fail_for_invalid_at (yytext);
\n out_lineno++; ECHO;
[^@\n]+ ECHO;
[^bm@\n]+ ECHO;
/* If there are still identifiers that look like macros, such as
b4_synbol, this probably an error, say a typo in M4, or
overquotation. */
{macro} {
location loc = empty_loc;
loc.start.file = map_file_name (out_name);
loc.start.line = out_lineno;
loc.end = loc.start;
complain (&loc, Wother,
"suspicious sequence in the output: %s", yytext);
ECHO;
}
. ECHO;
<INITIAL><<EOF>> {
if (out_name)
+18 -19
View File
@@ -2889,26 +2889,26 @@ m4_pattern_allow([^m4_errprintn$])
# Try to have MACRO be run by bison.
m4_pushdef([AT_TEST],
[AT_DATA([[input.y]],
[[%type <$1(DEAD %type)> exp
%token <$1(DEAD %token)> a
[[%type <$1(%type)> exp
%token <$1(%token)> a
%token b
%initial-action
{
$$;
$<$1(DEAD %initial-action)>$
$<$1(%initial-action)>$
};
%printer
{
$$
$<$1(DEAD %printer)>$
$<$1(%printer)>$
} <> <*>;
%lex-param
{
$1(DEAD %lex-param)
$1(%lex-param)
};
%parse-param
{
$1(DEAD %parse-param)
$1(%parse-param)
};
%%
exp:
@@ -2916,25 +2916,24 @@ exp:
{
$$;
$][1;
$<$1(DEAD action 1)>$
$<$1(DEAD action 2)>1
$<$1(DEAD action 3)>name
$<$1(DEAD action 4)>0
$<$1(action 1)>$
$<$1(action 2)>1
$<$1(action 3)>name
$<$1(action 4)>0
;
};
]])
# FIXME: Provide a means to iterate over all the skeletons.
AT_BISON_CHECK([[-d input.y]])
AT_BISON_CHECK([[-d -S glr.c input.y]])
AT_BISON_CHECK([[-d -S lalr1.cc input.y]])
AT_BISON_CHECK([[-d -S glr.cc input.y]])
AT_BISON_CHECK([[-d -S glr2.cc input.y]])
AT_BISON_CHECK([[ -S lalr1.java input.y]])
# Disable -Wother to avoid the warnings about the suspicious presence
# of `b4_canary` in the output.
m4_foreach([b4_skel],
[[yacc.c], [glr.c], [lalr1.cc], [glr.cc], [glr2.cc], [lalr1.d], [lalr1.java]],
[AT_BISON_CHECK([[-Wno-other -S ]b4_skel[ ]m4_bmatch(b4_skel, [.*\.java$], [], [-d])[ input.y]])
])
])
AT_TEST([m4_errprintn])
AT_TEST([@:>@m4_errprintn])
AT_TEST([b4_canary])
AT_TEST([@:>@b4_canary])
m4_popdef([AT_TEST])