yacc.c: escape trigraphs in detailed parse.error

* src/output.c (escape_trigraphs, xescape_trigraphs): New.
(prepare_symbol_names): Use it.
* tests/regression.at: Check the handling of trigraphs with
parse.error = detailed.
This commit is contained in:
Akim Demaille
2020-01-18 12:28:33 +01:00
parent adac9a17f0
commit d9df62bfcd
2 changed files with 67 additions and 6 deletions

View File

@@ -132,6 +132,56 @@ string_output (FILE *out, char const *string)
}
/* Store in BUFFER a copy of SRC where trigraphs are escaped, return
the size of the result (including the final NUL). If called with
BUFFERSIZE = 0, returns the needed size for BUFFER. */
static ptrdiff_t
escape_trigraphs (char *buffer, ptrdiff_t buffersize, const char *src)
{
#define STORE(c) \
do \
{ \
if (res < buffersize) \
buffer[res] = (c); \
++res; \
} \
while (0)
ptrdiff_t res = 0;
for (ptrdiff_t i = 0, len = strlen (src); i < len; ++i)
{
if (i + 2 < len
&& src[i] == '?' && src[i+1] == '?')
{
switch (src[i+2])
{
case '!': case '\'':
case '(': case ')': case '-': case '/':
case '<': case '=': case '>':
i += 1;
STORE ('?');
STORE ('"');
STORE ('"');
STORE ('?');
continue;
}
}
STORE (src[i]);
}
STORE ('\0');
#undef STORE
return res;
}
/* Same as xstrdup, except that trigraphs are escaped. */
static char *
xescape_trigraphs (const char *src)
{
ptrdiff_t bufsize = escape_trigraphs (NULL, 0, src);
char *buf = xcharalloc (bufsize);
escape_trigraphs (buf, bufsize, src);
return buf;
}
/* Generate the b4_<MUSCLE_NAME> (e.g., b4_tname) table with the
symbol names (aka tags). */
@@ -148,7 +198,7 @@ prepare_symbol_names (char const *muscle_name)
{
char *cp =
symbols[i]->tag[0] == '"' && !quote
? xstrdup (symbols[i]->tag)
? xescape_trigraphs (symbols[i]->tag)
: quotearg_alloc (symbols[i]->tag, -1, qo);
/* Width of the next token, including the two quotes, the
comma and the space. */

View File

@@ -366,17 +366,17 @@ AT_CLEANUP
## Token definitions. ##
## ------------------- ##
m4_pushdef([AT_TEST],
[AT_SETUP([Token definitions: $1])
AT_SETUP([Token definitions])
AT_BISON_OPTION_PUSHDEFS
AT_BISON_OPTION_PUSHDEFS([$1])
AT_DATA_GRAMMAR([input.y],
[%{
]AT_YYERROR_DECLARE[
]AT_YYLEX_DECLARE[
%}
[%define parse.error verbose
[$1
%token MYEOF 0 "end of file"
%token 'a' "a" // Bison managed, when fed with '%token 'f' "f"' to #define 'f'!
%token B_TOKEN "b"
@@ -391,7 +391,6 @@ exp: "a" "\\\'\?\"\a\b\f\n\r\t\v\001\201\x001\x000081??!";
]AT_YYLEX_DEFINE([{ SPECIAL }])[
]AT_MAIN_DEFINE[
]])
AT_BISON_OPTION_POPDEFS
# Checking the warning message guarantees that the trigraph "??!" isn't
# unnecessarily escaped here even though it would need to be if encoded in a
@@ -411,6 +410,7 @@ input.y:22.16-63: warning: symbol "\\'?\"\a\b\f\n\r\t\v\001\201\001\201??!" used
AT_COMPILE([input])
AT_ERROR_VERBOSE_IF([
# Checking the error message here guarantees that yytname, which does contain
# C-string literals, does have the trigraph escaped correctly. Thus, the
# symbol name reported by the parser is exactly the same as that reported by
@@ -419,9 +419,20 @@ AT_DATA([experr],
[[syntax error, unexpected "\\'?\"\a\b\f\n\r\t\v\001\201\001\201??!", expecting a
]])
AT_PARSER_CHECK([input], 1, [], [experr])
])
# We don't check the error message in "detailed" parse.error, since
# the special characters are no longer escaped, and it produces
# invalid UTF-8.
AT_BISON_OPTION_POPDEFS
AT_CLEANUP
])
AT_TEST([%define parse.error detailed])
AT_TEST([%define parse.error verbose])
m4_popdef([AT_TEST])
## -------------------- ##
## Characters Escapes. ##