Revamp to fix POSIX incompatibilities, to count columns correctly, and

to check for invalid inputs.

Use mbsnwidth to count columns correctly.  Account for tabs, too.
Include mbswidth.h.
(YY_USER_ACTION): Invoke extend_location rather than LOCATION_COLUMNS.
(extend_location): New function.
(YY_LINES): Remove.

Handle CRLF in C code rather than in Lex code.
(YY_INPUT): New macro.
(no_cr_read): New function.

Scan UCNs, even though we don't fully handle them yet.
(convert_ucn_to_byte): New function.

Handle backslash-newline correctly in C code.
(SC_LINE_COMMENT, SC_YACC_COMMENT): New states.
(eols, blanks): Remove.  YY_USER_ACTION now counts newlines etc.;
all uses changed.
(tag, splice): New EREs.  Do not allow NUL or newline in tags.
Use {splice} wherever C allows backslash-newline.
YY_STEP after space, newline, vertical-tab.
("/*"): BEGIN SC_YACC_COMMENT, not yy_push_state (SC_COMMENT).

(letter, id): Don't assume ASCII; e.g., spell out a-z.

({int}, handle_action_dollar, handle_action_at): Check for integer
overflow.

(YY_STEP): Omit trailing semicolon, so that it's more like C.

(<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>): Allow \0 and \00
as well as \000.  Check for UCHAR_MAX, not 255.
Allow \x with an arbitrary positive number of digits, as in C.
Check for overflow here.
Allow \? and UCNs, for compatibility with C.

(handle_symbol_code_dollar): Use quote_n slot 1 to avoid collision
with quote slot used by complain_at.
This commit is contained in:
Paul Eggert
2002-11-03 08:42:32 +00:00
parent 565a33db8f
commit d8d3f94a99

View File

@@ -24,6 +24,7 @@
%{ %{
#include "system.h" #include "system.h"
#include "mbswidth.h"
#include "complain.h" #include "complain.h"
#include "quote.h" #include "quote.h"
#include "getargs.h" #include "getargs.h"
@@ -39,9 +40,95 @@ do { \
if (yycontrol) {;}; \ if (yycontrol) {;}; \
} while (0) } while (0)
#define YY_USER_ACTION LOCATION_COLUMNS (*yylloc, yyleng); #define YY_USER_ACTION extend_location (yylloc, yytext, yyleng);
#define YY_LINES LOCATION_LINES (*yylloc, yyleng); #define YY_STEP LOCATION_STEP (*yylloc)
#define YY_STEP LOCATION_STEP (*yylloc);
#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
/* Read bytes from FP into buffer BUF of size SIZE. Return the
number of bytes read. Remove '\r' from input, treating \r\n
and isolated \r as \n. */
static size_t
no_cr_read (FILE *fp, char *buf, size_t size)
{
size_t s = fread (buf, 1, size, fp);
if (s)
{
char *w = memchr (buf, '\r', s);
if (w)
{
char const *r = ++w;
char const *lim = buf + s;
for (;;)
{
/* Found an '\r'. Treat it like '\n', but ignore any
'\n' that immediately follows. */
w[-1] = '\n';
if (r == lim)
{
int ch = getc (fp);
if (ch != '\n' && ungetc (ch, fp) != ch)
break;
}
else if (*r == '\n')
r++;
/* Copy until the next '\r'. */
do
{
if (r == lim)
return w - buf;
}
while ((*w++ = *r++) != '\r');
}
return w - buf;
}
}
return s;
}
/* Extend *LOC to account for token TOKEN of size SIZE. */
static void
extend_location (location_t *loc, char const *token, int size)
{
int line = loc->last_line;
int column = loc->last_column;
char const *p0 = token;
char const *p = token;
char const *lim = token + size;
for (p = token; p < lim; p++)
switch (*p)
{
case '\r':
/* \r shouldn't survive no_cr_read. */
abort ();
case '\n':
line++;
column = 1;
p0 = p + 1;
break;
case '\t':
column += mbsnwidth (p0, p - p0, 0);
column += 8 - ((column - 1) & 7);
p0 = p + 1;
break;
}
loc->last_line = line;
loc->last_column = column + mbsnwidth (p0, p - p0, 0);
}
/* STRING_OBSTACK -- Used to store all the characters that we need to /* STRING_OBSTACK -- Used to store all the characters that we need to
keep (to construct ID, STRINGS etc.). Use the following macros to keep (to construct ID, STRINGS etc.). Use the following macros to
@@ -91,17 +178,26 @@ static void handle_dollar (braced_code_t code_kind,
char *cp, location_t location); char *cp, location_t location);
static void handle_at (braced_code_t code_kind, static void handle_at (braced_code_t code_kind,
char *cp, location_t location); char *cp, location_t location);
static int convert_ucn_to_byte (char const *hex_text);
%} %}
%x SC_COMMENT %x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
%x SC_STRING SC_CHARACTER %x SC_STRING SC_CHARACTER
%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER %x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
%x SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE %x SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
id [.a-zA-Z_][.a-zA-Z_0-9]* letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
id {letter}({letter}|[0-9])*
int [0-9]+ int [0-9]+
eols (\n|\r|\n\r|\r\n)+
blanks [ \t\f]+ /* POSIX says that a tag must be both an id and a C union member, but
historically almost any character is allowed in a tag. We disallow
NUL and newline, as this simplifies our implementation. */
tag [^\0\n>]+
/* Zero or more instances of backslash-newline. Following GCC, allow
white space between the backslash and the newline. */
splice (\\[ \f\t\v]*\n)*
%% %%
%{ %{
@@ -136,7 +232,7 @@ blanks [ \t\f]+
"%nterm" return PERCENT_NTERM; "%nterm" return PERCENT_NTERM;
"%output" return PERCENT_OUTPUT; "%output" return PERCENT_OUTPUT;
"%parse-param" return PERCENT_PARSE_PARAM; "%parse-param" return PERCENT_PARSE_PARAM;
"%prec" { rule_length--; return PERCENT_PREC; } "%prec" rule_length--; return PERCENT_PREC;
"%printer" return PERCENT_PRINTER; "%printer" return PERCENT_PRINTER;
"%pure"[-_]"parser" return PERCENT_PURE_PARSER; "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
"%right" return PERCENT_RIGHT; "%right" return PERCENT_RIGHT;
@@ -152,20 +248,31 @@ blanks [ \t\f]+
"%yacc" return PERCENT_YACC; "%yacc" return PERCENT_YACC;
"=" return EQUAL; "=" return EQUAL;
":" { rule_length = 0; return COLON; } ":" rule_length = 0; return COLON;
"|" { rule_length = 0; return PIPE; } "|" rule_length = 0; return PIPE;
"," return COMMA; "," return COMMA;
";" return SEMICOLON; ";" return SEMICOLON;
{eols} YY_LINES; YY_STEP; [ \f\n\t\v]+ YY_STEP;
{blanks} YY_STEP;
{id} { {id} {
yylval->symbol = symbol_get (yytext, *yylloc); yylval->symbol = symbol_get (yytext, *yylloc);
rule_length++; rule_length++;
return ID; return ID;
} }
{int} yylval->integer = strtol (yytext, 0, 10); return INT; {int} {
unsigned long num;
errno = 0;
num = strtoul (yytext, 0, 10);
if (INT_MAX < num || errno)
{
complain_at (*yylloc, _("%s is invalid"), yytext);
num = INT_MAX;
}
yylval->integer = num;
return INT;
}
/* Characters. We don't check there is only one. */ /* Characters. We don't check there is only one. */
"'" YY_OBS_GROW; yy_push_state (SC_ESCAPED_CHARACTER); "'" YY_OBS_GROW; yy_push_state (SC_ESCAPED_CHARACTER);
@@ -174,7 +281,7 @@ blanks [ \t\f]+
"\"" YY_OBS_GROW; yy_push_state (SC_ESCAPED_STRING); "\"" YY_OBS_GROW; yy_push_state (SC_ESCAPED_STRING);
/* Comments. */ /* Comments. */
"/*" yy_push_state (SC_COMMENT); "/*" BEGIN SC_YACC_COMMENT;
"//".* YY_STEP; "//".* YY_STEP;
/* Prologue. */ /* Prologue. */
@@ -184,7 +291,7 @@ blanks [ \t\f]+
"{" YY_OBS_GROW; ++braces_level; yy_push_state (SC_BRACED_CODE); "{" YY_OBS_GROW; ++braces_level; yy_push_state (SC_BRACED_CODE);
/* A type. */ /* A type. */
"<"[^>]+">" { "<"{tag}">" {
obstack_grow (&string_obstack, yytext + 1, yyleng - 2); obstack_grow (&string_obstack, yytext + 1, yyleng - 2);
YY_OBS_FINISH; YY_OBS_FINISH;
yylval->string = last_string; yylval->string = last_string;
@@ -206,41 +313,48 @@ blanks [ \t\f]+
} }
/*------------------------------------------------------------. /*-------------------------------------------------------------------.
| Whatever the start condition (but those which correspond to | | Whatever the start condition (but those which correspond to |
| entity `swallowed' by Bison: SC_ESCAPED_STRING and | | entities `swallowed' by Bison: SC_YACC_COMMENT, SC_ESCAPED_STRING, |
| SC_ESCAPED_CHARACTER), no M4 character must escape as is. | | and SC_ESCAPED_CHARACTER), no M4 character must escape as is. |
`------------------------------------------------------------*/ `-------------------------------------------------------------------*/
<SC_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE> <SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
{ {
\[ if (YY_START != SC_COMMENT) obstack_sgrow (&string_obstack, "@<:@"); \[ obstack_sgrow (&string_obstack, "@<:@");
\] if (YY_START != SC_COMMENT) obstack_sgrow (&string_obstack, "@:>@"); \] obstack_sgrow (&string_obstack, "@:>@");
} }
/*---------------------------------------------------------------.
| Scanning a Yacc comment. The initial `/ *' is already eaten. |
`---------------------------------------------------------------*/
/*-----------------------------------------------------------. <SC_YACC_COMMENT>
| Scanning a C comment. The initial `/ *' is already eaten. | {
`-----------------------------------------------------------*/ "*/" {
YY_STEP;
BEGIN INITIAL;
}
[^*]+|"*" ;
<<EOF>> {
LOCATION_PRINT (stderr, *yylloc);
fprintf (stderr, _(": unexpected end of file in a comment\n"));
BEGIN INITIAL;
}
}
/*------------------------------------------------------------.
| Scanning a C comment. The initial `/ *' is already eaten. |
`------------------------------------------------------------*/
<SC_COMMENT> <SC_COMMENT>
{ {
"*/" { /* End of the comment. */ "*"{splice}"/" YY_OBS_GROW; yy_pop_state ();
if (yy_top_state () == INITIAL) [^*\[\]]+|"*" YY_OBS_GROW;
{
YY_STEP;
}
else
{
YY_OBS_GROW;
}
yy_pop_state ();
}
[^\[\]*\n\r]+ if (yy_top_state () != INITIAL) YY_OBS_GROW;
{eols} if (yy_top_state () != INITIAL) YY_OBS_GROW; YY_LINES;
. /* Stray `*'. */if (yy_top_state () != INITIAL) YY_OBS_GROW;
<<EOF>> { <<EOF>> {
LOCATION_PRINT (stderr, *yylloc); LOCATION_PRINT (stderr, *yylloc);
@@ -250,6 +364,18 @@ blanks [ \t\f]+
} }
/*--------------------------------------------------------------.
| Scanning a line comment. The initial `//' is already eaten. |
`--------------------------------------------------------------*/
<SC_LINE_COMMENT>
{
"\n" YY_OBS_GROW; yy_pop_state ();
([^\n\[\]]|{splice})+ YY_OBS_GROW;
<<EOF>> yy_pop_state ();
}
/*----------------------------------------------------------------. /*----------------------------------------------------------------.
| Scanning a C string, including its escapes. The initial `"' is | | Scanning a C string, including its escapes. The initial `"' is |
| already eaten. | | already eaten. |
@@ -267,9 +393,7 @@ blanks [ \t\f]+
return STRING; return STRING;
} }
[^\"\n\r\\]+ YY_OBS_GROW; [^\"\\]+ YY_OBS_GROW;
{eols} obstack_1grow (&string_obstack, '\n'); YY_LINES;
<<EOF>> { <<EOF>> {
LOCATION_PRINT (stderr, *yylloc); LOCATION_PRINT (stderr, *yylloc);
@@ -305,9 +429,7 @@ blanks [ \t\f]+
} }
} }
[^\n\r\\] YY_OBS_GROW; [^'\\]+ YY_OBS_GROW;
{eols} obstack_1grow (&string_obstack, '\n'); YY_LINES;
<<EOF>> { <<EOF>> {
LOCATION_PRINT (stderr, *yylloc); LOCATION_PRINT (stderr, *yylloc);
@@ -327,9 +449,9 @@ blanks [ \t\f]+
<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER> <SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
{ {
\\[0-7]{3} { \\[0-7]{1,3} {
long c = strtol (yytext + 1, 0, 8); unsigned long c = strtoul (yytext + 1, 0, 8);
if (c > 255) if (UCHAR_MAX < c)
{ {
LOCATION_PRINT (stderr, *yylloc); LOCATION_PRINT (stderr, *yylloc);
fprintf (stderr, _(": invalid escape: %s\n"), quote (yytext)); fprintf (stderr, _(": invalid escape: %s\n"), quote (yytext));
@@ -339,8 +461,18 @@ blanks [ \t\f]+
obstack_1grow (&string_obstack, c); obstack_1grow (&string_obstack, c);
} }
\\x[0-9a-fA-F]{2} { \\x[0-9a-fA-F]+ {
obstack_1grow (&string_obstack, strtol (yytext + 2, 0, 16)); unsigned long c;
errno = 0;
c = strtoul (yytext + 2, 0, 16);
if (UCHAR_MAX < c || errno)
{
LOCATION_PRINT (stderr, *yylloc);
fprintf (stderr, _(": invalid escape: %s\n"), quote (yytext));
YY_STEP;
}
else
obstack_1grow (&string_obstack, c);
} }
\\a obstack_1grow (&string_obstack, '\a'); \\a obstack_1grow (&string_obstack, '\a');
@@ -350,7 +482,18 @@ blanks [ \t\f]+
\\r obstack_1grow (&string_obstack, '\r'); \\r obstack_1grow (&string_obstack, '\r');
\\t obstack_1grow (&string_obstack, '\t'); \\t obstack_1grow (&string_obstack, '\t');
\\v obstack_1grow (&string_obstack, '\v'); \\v obstack_1grow (&string_obstack, '\v');
\\[\\""''] obstack_1grow (&string_obstack, yytext[1]); \\[\"'?\\] obstack_1grow (&string_obstack, yytext[1]);
\\(u|U[0-9a-fA-F]{4})[0-9a-fA-F]{4} {
int c = convert_ucn_to_byte (yytext);
if (c < 0)
{
LOCATION_PRINT (stderr, *yylloc);
fprintf (stderr, _(": invalid escape: %s\n"), quote (yytext));
YY_STEP;
}
else
obstack_1grow (&string_obstack, c);
}
\\(.|\n) { \\(.|\n) {
LOCATION_PRINT (stderr, *yylloc); LOCATION_PRINT (stderr, *yylloc);
fprintf (stderr, _(": unrecognized escape: %s\n"), quote (yytext)); fprintf (stderr, _(": unrecognized escape: %s\n"), quote (yytext));
@@ -374,13 +517,12 @@ blanks [ \t\f]+
yy_pop_state (); yy_pop_state ();
} }
[^\[\]\'\n\r\\]+ YY_OBS_GROW; [^'\[\]\\]+ YY_OBS_GROW;
\\(.|\n) YY_OBS_GROW; \\{splice}[^\[\]] YY_OBS_GROW;
/* FLex wants this rule, in case of a `\<<EOF>>'. */ {splice} YY_OBS_GROW;
/* Needed for `\<<EOF>>', `\\<<newline>>[', and `\\<<newline>>]'. */
\\ YY_OBS_GROW; \\ YY_OBS_GROW;
{eols} YY_OBS_GROW; YY_LINES;
<<EOF>> { <<EOF>> {
LOCATION_PRINT (stderr, *yylloc); LOCATION_PRINT (stderr, *yylloc);
fprintf (stderr, _(": unexpected end of file in a character\n")); fprintf (stderr, _(": unexpected end of file in a character\n"));
@@ -403,13 +545,12 @@ blanks [ \t\f]+
yy_pop_state (); yy_pop_state ();
} }
[^\[\]\"\n\r\\]+ YY_OBS_GROW; [^\"\[\]\\]+ YY_OBS_GROW;
\\(.|\n) YY_OBS_GROW; \\{splice}[^\[\]] YY_OBS_GROW;
/* FLex wants this rule, in case of a `\<<EOF>>'. */ {splice} YY_OBS_GROW;
/* Needed for `\<<EOF>>', `\\<<newline>>[', and `\\<<newline>>]'. */
\\ YY_OBS_GROW; \\ YY_OBS_GROW;
{eols} YY_OBS_GROW; YY_LINES;
<<EOF>> { <<EOF>> {
LOCATION_PRINT (stderr, *yylloc); LOCATION_PRINT (stderr, *yylloc);
fprintf (stderr, _(": unexpected end of file in a string\n")); fprintf (stderr, _(": unexpected end of file in a string\n"));
@@ -432,8 +573,8 @@ blanks [ \t\f]+
"\"" YY_OBS_GROW; yy_push_state (SC_STRING); "\"" YY_OBS_GROW; yy_push_state (SC_STRING);
/* Comments. */ /* Comments. */
"/*" YY_OBS_GROW; yy_push_state (SC_COMMENT); "/"{splice}"*" YY_OBS_GROW; yy_push_state (SC_COMMENT);
"//".* YY_OBS_GROW; "/"{splice}"/" YY_OBS_GROW; yy_push_state (SC_LINE_COMMENT);
/* Not comments. */ /* Not comments. */
"/" YY_OBS_GROW; "/" YY_OBS_GROW;
@@ -461,15 +602,14 @@ blanks [ \t\f]+
"{" YY_OBS_GROW; braces_level++; "{" YY_OBS_GROW; braces_level++;
"$"("<"[^>]+">")?(-?[0-9]+|"$") { handle_dollar (current_braced_code, "$"("<"{tag}">")?(-?[0-9]+|"$") { handle_dollar (current_braced_code,
yytext, *yylloc); } yytext, *yylloc); }
"@"(-?[0-9]+|"$") { handle_at (current_braced_code, "@"(-?[0-9]+|"$") { handle_at (current_braced_code,
yytext, *yylloc); } yytext, *yylloc); }
[^$@\[\]/\'\"\{\}\n\r]+ YY_OBS_GROW; [^$@\[\]/'\"\{\}]+ YY_OBS_GROW;
{eols} YY_OBS_GROW; YY_LINES;
/* A lose $, or /, or etc. */ /* A stray $, or /, or etc. */
. YY_OBS_GROW; . YY_OBS_GROW;
<<EOF>> { <<EOF>> {
@@ -497,9 +637,8 @@ blanks [ \t\f]+
return PROLOGUE; return PROLOGUE;
} }
[^%\[\]/\'\"\n\r]+ YY_OBS_GROW; [^%\[\]/'\"]+ YY_OBS_GROW;
"%" YY_OBS_GROW; "%" YY_OBS_GROW;
{eols} YY_OBS_GROW; YY_LINES;
<<EOF>> { <<EOF>> {
LOCATION_PRINT (stderr, *yylloc); LOCATION_PRINT (stderr, *yylloc);
@@ -514,12 +653,12 @@ blanks [ \t\f]+
/*---------------------------------------------------------------. /*---------------------------------------------------------------.
| Scanning the epilogue (everything after the second "%%", which | | Scanning the epilogue (everything after the second "%%", which |
| has already been eaten. | | has already been eaten). |
`---------------------------------------------------------------*/ `---------------------------------------------------------------*/
<SC_EPILOGUE> <SC_EPILOGUE>
{ {
([^\[\]]|{eols})+ YY_OBS_GROW; [^\[\]]+ YY_OBS_GROW;
<<EOF>> { <<EOF>> {
yy_pop_state (); yy_pop_state ();
@@ -568,14 +707,15 @@ handle_action_dollar (char *text, location_t location)
obstack_fgrow1 (&string_obstack, obstack_fgrow1 (&string_obstack,
"]b4_lhs_value([%s])[", type_name); "]b4_lhs_value([%s])[", type_name);
} }
else if (('0' <= *cp && *cp <= '9') || *cp == '-') else
{ {
int n = strtol (cp, &cp, 10); long num;
errno = 0;
num = strtol (cp, 0, 10);
if (n > rule_length) if (INT_MIN <= num && num <= rule_length && ! errno)
complain_at (location, _("invalid value: %s%d"), "$", n);
else
{ {
int n = num;
if (!type_name && n > 0) if (!type_name && n > 0)
type_name = symbol_list_n_type_name_get (current_rule, location, type_name = symbol_list_n_type_name_get (current_rule, location,
n); n);
@@ -588,16 +728,14 @@ handle_action_dollar (char *text, location_t location)
"]b4_rhs_value([%d], [%d], [%s])[", "]b4_rhs_value([%d], [%d], [%s])[",
rule_length, n, type_name); rule_length, n, type_name);
} }
} else
else complain_at (location, _("invalid value: %s"), text);
{
complain_at (location, _("%s is invalid"), quote (text));
} }
} }
/*---------------------------------------------------------------. /*---------------------------------------------------------------.
| TEXT is expexted tp be $$ in some code associated to a symbol: | | TEXT is expected to be $$ in some code associated to a symbol: |
| destructor or printer. | | destructor or printer. |
`---------------------------------------------------------------*/ `---------------------------------------------------------------*/
@@ -608,7 +746,7 @@ handle_symbol_code_dollar (char *text, location_t location)
if (*cp == '$') if (*cp == '$')
obstack_sgrow (&string_obstack, "]b4_dollar_dollar["); obstack_sgrow (&string_obstack, "]b4_dollar_dollar[");
else else
complain_at (location, _("%s is invalid"), quote (text)); complain_at (location, _("%s is invalid"), quote_n (1, text));
} }
@@ -650,25 +788,26 @@ handle_action_at (char *text, location_t location)
{ {
obstack_sgrow (&string_obstack, "]b4_lhs_location["); obstack_sgrow (&string_obstack, "]b4_lhs_location[");
} }
else if (('0' <= *cp && *cp <= '9') || *cp == '-')
{
int n = strtol (cp, &cp, 10);
if (n > rule_length)
complain_at (location, _("invalid value: %s%d"), "@", n);
else
obstack_fgrow2 (&string_obstack, "]b4_rhs_location([%d], [%d])[",
rule_length, n);
}
else else
{ {
complain_at (location, _("%s is invalid"), quote (text)); long num;
errno = 0;
num = strtol (cp, 0, 10);
if (INT_MIN <= num && num <= rule_length && ! errno)
{
int n = num;
obstack_fgrow2 (&string_obstack, "]b4_rhs_location([%d], [%d])[",
rule_length, n);
}
else
complain_at (location, _("invalid value: %s"), text);
} }
} }
/*---------------------------------------------------------------. /*---------------------------------------------------------------.
| TEXT is expexted tp be @$ in some code associated to a symbol: | | TEXT is expected to be @$ in some code associated to a symbol: |
| destructor or printer. | | destructor or printer. |
`---------------------------------------------------------------*/ `---------------------------------------------------------------*/
@@ -679,7 +818,7 @@ handle_symbol_code_at (char *text, location_t location)
if (*cp == '$') if (*cp == '$')
obstack_sgrow (&string_obstack, "]b4_at_dollar["); obstack_sgrow (&string_obstack, "]b4_at_dollar[");
else else
complain_at (location, _("%s is invalid"), quote (text)); complain_at (location, _("%s is invalid"), quote_n (1, text));
} }
@@ -706,6 +845,62 @@ handle_at (braced_code_t braced_code_kind,
} }
/*------------------------------------------------------------------.
| Convert universal character name UCN to a single-byte character, |
| and return that character. Return -1 if UCN does not correspond |
| to a single-byte character. |
`------------------------------------------------------------------*/
static int
convert_ucn_to_byte (char const *ucn)
{
unsigned long code = strtoul (ucn + 2, 0, 16);
/* FIXME: Currently we assume Unicode-compatible unibyte characters
on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
non-ASCII hosts we support only the portable C character set.
These limitations should be removed once we add support for
multibyte characters. */
if (UCHAR_MAX < code)
return -1;
#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
{
/* A non-ASCII host. Use CODE to index into a table of the C
basic execution character set, which is guaranteed to exist on
all Standard C platforms. This table also includes '$', '@',
and '`', which not in the basic execution character set but
which are unibyte characters on all the platforms that we know
about. */
static signed char const table[] =
{
'\0', -1, -1, -1, -1, -1, -1, '\a',
'\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
' ', '!', '"', '#', '$', '%', '&', '\'',
'(', ')', '*', '+', ',', '-', '.', '/',
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', ':', ';', '<', '=', '>', '?',
'@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
'`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
'x', 'y', 'z', '{', '|', '}', '~'
};
code = code < sizeof table ? table[code] : -1;
}
#endif
return code;
}
/*-------------------------. /*-------------------------.
| Initialize the scanner. | | Initialize the scanner. |
`-------------------------*/ `-------------------------*/