Add support for hex token numbers.

This commit is contained in:
Paul Eggert
2004-03-08 20:49:34 +00:00
parent 006d217ddd
commit 1452af69b4
4 changed files with 63 additions and 19 deletions

View File

@@ -1,3 +1,18 @@
2004-03-08 Paul Eggert <eggert@gnu.org>
Add support for hex token numbers. Suggested by Odd Arild Olsen in
<http://mail.gnu.org/archive/html/bison-patches/2004-03/msg00000.html>.
* NEWS: Document hexadecimal tokens, no NUL bytes, %destructor
in lalr1.cc.
* doc/bison.texinfo (Token Decl): Add hexadecimal token numbers.
* src/scan-gram.l (scan_integer): New function.
({int}): Use it.
(0[xX][0-9abcdefABCDEF]+): New pattern, to support hex numbers.
(<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>, \\x[0-9abcdefABCDEF]+,
handle_action_dollar, handle_action_at, convert_ucn_to_byte):
Say "long int", not "long", for uniformity with GNU style.
2004-02-25 Paul Eggert <eggert@twinsun.com> 2004-02-25 Paul Eggert <eggert@twinsun.com>
* tests/local.at (AT_COMPILE, AT_COMPILE_CXX): Ignore stdout from * tests/local.at (AT_COMPILE, AT_COMPILE_CXX): Ignore stdout from

15
NEWS
View File

@@ -1,7 +1,18 @@
Bison News Bison News
---------- ----------
Changes in version 1.875c: Changes in version 1.875d:
* NUL bytes are no longer allowed in Bison string literals, unfortunately.
* %token numbers can now be hexadecimal integers, e.g., `%token FOO 0x12d'.
This is a GNU extension.
* Experimental %destructor support has been added to lalr1.cc.
Changes in version 1.875c, 2003-08-25:
(Just bug fixes.)
Changes in version 1.875b, 2003-06-17: Changes in version 1.875b, 2003-06-17:
@@ -551,7 +562,7 @@ End:
----- -----
Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004
Free Software Foundation, Inc. Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler. This file is part of Bison, the GNU Compiler Compiler.

View File

@@ -44,7 +44,7 @@ This manual is for @acronym{GNU} Bison (version @value{VERSION},
@value{UPDATED}), the @acronym{GNU} parser generator. @value{UPDATED}), the @acronym{GNU} parser generator.
Copyright @copyright{} 1988, 1989, 1990, 1991, 1992, 1993, 1995, 1998, Copyright @copyright{} 1988, 1989, 1990, 1991, 1992, 1993, 1995, 1998,
1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc. 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
@quotation @quotation
Permission is granted to copy, distribute and/or modify this document Permission is granted to copy, distribute and/or modify this document
@@ -3309,10 +3309,12 @@ associativity and precedence. @xref{Precedence Decl, ,Operator
Precedence}. Precedence}.
You can explicitly specify the numeric code for a token type by appending You can explicitly specify the numeric code for a token type by appending
an integer value in the field immediately following the token name: a decimal or hexadecimal integer value in the field immediately
following the token name:
@example @example
%token NUM 300 %token NUM 300
%token XNUM 0x12d // a GNU extension
@end example @end example
@noindent @noindent

View File

@@ -1,6 +1,6 @@
/* Bison Grammar Scanner -*- C -*- /* Bison Grammar Scanner -*- C -*-
Copyright (C) 2002, 2003 Free Software Foundation, Inc. Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler. This file is part of Bison, the GNU Compiler Compiler.
@@ -101,6 +101,7 @@ static int rule_length;
static void handle_dollar (int token_type, char *cp, location loc); static void handle_dollar (int token_type, char *cp, location loc);
static void handle_at (int token_type, char *cp, location loc); static void handle_at (int token_type, char *cp, location loc);
static void handle_syncline (char *args); static void handle_syncline (char *args);
static unsigned long int scan_integer (char const *p, int base, location loc);
static int convert_ucn_to_byte (char const *hex_text); static int convert_ucn_to_byte (char const *hex_text);
static void unexpected_eof (boundary, char const *); static void unexpected_eof (boundary, char const *);
@@ -235,15 +236,11 @@ splice (\\[ \f\t\v]*\n)*
} }
{int} { {int} {
unsigned long num; val->integer = scan_integer (yytext, 10, *loc);
set_errno (0); return INT;
num = strtoul (yytext, 0, 10); }
if (INT_MAX < num || get_errno ()) 0[xX][0-9abcdefABCDEF]+ {
{ val->integer = scan_integer (yytext, 16, *loc);
complain_at (*loc, _("integer out of range: %s"), quote (yytext));
num = INT_MAX;
}
val->integer = num;
return INT; return INT;
} }
@@ -411,7 +408,7 @@ splice (\\[ \f\t\v]*\n)*
<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER> <SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
{ {
\\[0-7]{1,3} { \\[0-7]{1,3} {
unsigned long c = strtoul (yytext + 1, 0, 8); unsigned long int c = strtoul (yytext + 1, 0, 8);
if (UCHAR_MAX < c) if (UCHAR_MAX < c)
complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext)); complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
else if (! c) else if (! c)
@@ -421,7 +418,7 @@ splice (\\[ \f\t\v]*\n)*
} }
\\x[0-9abcdefABCDEF]+ { \\x[0-9abcdefABCDEF]+ {
unsigned long c; unsigned long int c;
set_errno (0); set_errno (0);
c = strtoul (yytext + 2, 0, 16); c = strtoul (yytext + 2, 0, 16);
if (UCHAR_MAX < c || get_errno ()) if (UCHAR_MAX < c || get_errno ())
@@ -790,7 +787,7 @@ handle_action_dollar (char *text, location loc)
} }
else else
{ {
long num; long int num;
set_errno (0); set_errno (0);
num = strtol (cp, 0, 10); num = strtol (cp, 0, 10);
@@ -869,7 +866,7 @@ handle_action_at (char *text, location loc)
obstack_sgrow (&obstack_for_string, "]b4_lhs_location["); obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
else else
{ {
long num; long int num;
set_errno (0); set_errno (0);
num = strtol (cp, 0, 10); num = strtol (cp, 0, 10);
@@ -919,6 +916,25 @@ handle_at (int token_type, char *text, location loc)
} }
/*------------------------------------------------------.
| Scan NUMBER for a base-BASE integer at location LOC. |
`------------------------------------------------------*/
static unsigned long int
scan_integer (char const *number, int base, location loc)
{
unsigned long int num;
set_errno (0);
num = strtoul (number, 0, base);
if (INT_MAX < num || get_errno ())
{
complain_at (loc, _("integer out of range: %s"), quote (number));
num = INT_MAX;
}
return num;
}
/*------------------------------------------------------------------. /*------------------------------------------------------------------.
| Convert universal character name UCN to a single-byte character, | | Convert universal character name UCN to a single-byte character, |
| and return that character. Return -1 if UCN does not correspond | | and return that character. Return -1 if UCN does not correspond |
@@ -928,7 +944,7 @@ handle_at (int token_type, char *text, location loc)
static int static int
convert_ucn_to_byte (char const *ucn) convert_ucn_to_byte (char const *ucn)
{ {
unsigned long code = strtoul (ucn + 2, 0, 16); unsigned long int code = strtoul (ucn + 2, 0, 16);
/* FIXME: Currently we assume Unicode-compatible unibyte characters /* FIXME: Currently we assume Unicode-compatible unibyte characters
on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On