m4: catch suspicions of unevaluated macros

Check in m4's output if there are sequences such as m4_foo or b4_foo,
which are probably resulting from incorrect m4 processing.

It actually already is useful:

- it caught a leaking b4_lac_if leaking from glr.c, where LAC is not
  supported, hence b4_lac_if is not defined.

- it also caught references to location.hh in position.hh when
  location.hh does not exist.

- while making "Code injection" robust to these new warnings (it is
  its very purpose to let b4_canary pass unevaluated), I saw that it
  did not check lalr1.d, and when adding lalr1.d, it revealed it did
  underquote ocurrences of token value types.

* src/scan-skel.l (macro): New abbreviation.
Use it.
* data/skeletons/glr.c: Don't use b4_lac_if, we don't have it.
* data/skeletons/location.cc: Don't generate position.hh when we don't
generate location.hh.
* data/skeletons/d.m4 (b4_basic_symbol_constructor_define): Fix
underquotation.
* data/skeletons/bison.m4 (b4_canary): New.
* tests/input.at (Code injection): Use it, and check lalr1.d too.
This commit is contained in:
Akim Demaille
2021-08-07 09:30:24 +02:00
parent 6118406c3e
commit 80db1029e6
7 changed files with 57 additions and 40 deletions

4
TODO
View File

@@ -108,10 +108,6 @@ enough.
*** calc.at
Stop hard-coding "Calc". Adjust local.at (look for FIXME).
** A dev warning for b4_
Maybe we should check for m4_ and b4_ leaking out of the m4 processing, as
Autoconf does. It would have caught over-quotation issues.
** doc
I feel it's ugly to use the GNU style to declare functions in the doc. It
generates tons of white space in the page, and may contribute to bad page

View File

@@ -261,6 +261,13 @@ m4_define([b4_fatal_at],
[b4_error([[fatal]], $@)dnl
m4_exit(1)])
# b4_canary(MSG)
# --------------
# Issue a warning on stderr and in the output. Used in the test suite
# to catch spurious m4 evaluations.
m4_define([b4_canary],
[m4_errprintn([dead canary: $1])DEAD CANARY($1)])
## ------------ ##
## Data Types. ##

View File

@@ -586,7 +586,7 @@ alias ACCEPT = ]b4_parser_class[.YYACCEPT;]])[]])[
# Create Symbol struct constructors for all the visible types.
m4_define([b4_basic_symbol_constructor_define],
[b4_token_visible_if([$1],
[ this(TokenKind token]b4_symbol_if([$1], [has_type],
[[ this(TokenKind token]b4_symbol_if([$1], [has_type],
[[, ]b4_union_if([], [[typeof(YYSemanticType.]])b4_symbol([$1], [type])dnl
[]b4_union_if([], [[) ]])[ val]])[]b4_locations_if([[, Location loc]])[)
{
@@ -601,7 +601,7 @@ m4_define([b4_basic_symbol_constructor_define],
value_.]b4_symbol([$1], [type])[ = val;]])])[]b4_locations_if([
location_ = loc;])[
}
])])
]])])
# b4_symbol_type_define

View File

@@ -2204,12 +2204,7 @@ yy_syntax_error_arguments (const yyGLRStack* yystackp,
- Don't assume there isn't a lookahead just because this state is a
consistent state with a default action. There might have been a
previous inconsistent state, consistent state with a non-default
action, or user semantic action that manipulated yychar.]b4_lac_if([[
In the first two cases, it might appear that the current syntax
error should have been detected in the previous state when yy_lac
was invoked. However, at that time, there might have been a
different syntax error that discarded a different initial context
during error recovery, leaving behind the current lookahead.]], [[
action, or user semantic action that manipulated yychar.
- Of course, the expected token list depends on states to have
correct lookahead information, and it depends on the parser not
to perform extra reductions after fetching a lookahead from the
@@ -2217,7 +2212,7 @@ yy_syntax_error_arguments (const yyGLRStack* yystackp,
(from LALR or IELR) and default reductions corrupt the expected
token list. However, the list is correct for canonical LR with
one exception: it will still contain any token that will not be
accepted due to an error action in a later state.]])[
accepted due to an error action in a later state.
*/
if (yytoken != ]b4_symbol(empty, kind)[)
{

View File

@@ -19,13 +19,6 @@ m4_pushdef([b4_copyright_years],
[2002-2015, 2018-2021])
# b4_position_file
# ----------------
# Name of the file containing the position class, if we want this file.
b4_header_if([b4_required_version_if([30200], [],
[m4_define([b4_position_file], [position.hh])])])])
# b4_location_file
# ----------------
# Name of the file containing the position/location class,
@@ -54,6 +47,16 @@ m4_ifdef([b4_location_file],
])
# b4_position_file
# ----------------
# Name of the file containing the position class, if we want this file.
b4_header_if(
[b4_required_version_if(
[30200], [],
[m4_ifdef([b4_location_file],
[m4_define([b4_position_file], [position.hh])])])])
# b4_location_define
# ------------------

View File

@@ -53,6 +53,9 @@ static void fail_for_invalid_at (char const *at);
static void output_mapped_file (char const *name);
%}
/* Identifiers of our M4 macros. */
macro [bm]4_[a-zA-Z_0-9]*
%x SC_AT_DIRECTIVE_ARGS
%x SC_AT_DIRECTIVE_SKIP_WS
@@ -87,7 +90,21 @@ static void output_mapped_file (char const *name);
/* This pattern must not match more than the previous @ patterns. */
@[^@{}''(\n]* fail_for_invalid_at (yytext);
\n out_lineno++; ECHO;
[^@\n]+ ECHO;
[^bm@\n]+ ECHO;
/* If there are still identifiers that look like macros, such as
b4_synbol, this probably an error, say a typo in M4, or
overquotation. */
{macro} {
location loc = empty_loc;
loc.start.file = map_file_name (out_name);
loc.start.line = out_lineno;
loc.end = loc.start;
complain (&loc, Wother,
"suspicious sequence in the output: %s", yytext);
ECHO;
}
. ECHO;
<INITIAL><<EOF>> {
if (out_name)

View File

@@ -2889,26 +2889,26 @@ m4_pattern_allow([^m4_errprintn$])
# Try to have MACRO be run by bison.
m4_pushdef([AT_TEST],
[AT_DATA([[input.y]],
[[%type <$1(DEAD %type)> exp
%token <$1(DEAD %token)> a
[[%type <$1(%type)> exp
%token <$1(%token)> a
%token b
%initial-action
{
$$;
$<$1(DEAD %initial-action)>$
$<$1(%initial-action)>$
};
%printer
{
$$
$<$1(DEAD %printer)>$
$<$1(%printer)>$
} <> <*>;
%lex-param
{
$1(DEAD %lex-param)
$1(%lex-param)
};
%parse-param
{
$1(DEAD %parse-param)
$1(%parse-param)
};
%%
exp:
@@ -2916,25 +2916,24 @@ exp:
{
$$;
$][1;
$<$1(DEAD action 1)>$
$<$1(DEAD action 2)>1
$<$1(DEAD action 3)>name
$<$1(DEAD action 4)>0
$<$1(action 1)>$
$<$1(action 2)>1
$<$1(action 3)>name
$<$1(action 4)>0
;
};
]])
# FIXME: Provide a means to iterate over all the skeletons.
AT_BISON_CHECK([[-d input.y]])
AT_BISON_CHECK([[-d -S glr.c input.y]])
AT_BISON_CHECK([[-d -S lalr1.cc input.y]])
AT_BISON_CHECK([[-d -S glr.cc input.y]])
AT_BISON_CHECK([[-d -S glr2.cc input.y]])
AT_BISON_CHECK([[ -S lalr1.java input.y]])
# Disable -Wother to avoid the warnings about the suspicious presence
# of `b4_canary` in the output.
m4_foreach([b4_skel],
[[yacc.c], [glr.c], [lalr1.cc], [glr.cc], [glr2.cc], [lalr1.d], [lalr1.java]],
[AT_BISON_CHECK([[-Wno-other -S ]b4_skel[ ]m4_bmatch(b4_skel, [.*\.java$], [], [-d])[ input.y]])
])
])
AT_TEST([m4_errprintn])
AT_TEST([@:>@m4_errprintn])
AT_TEST([b4_canary])
AT_TEST([@:>@b4_canary])
m4_popdef([AT_TEST])