Add support for hex token numbers.

This commit is contained in:
Paul Eggert
2004-03-08 20:49:34 +00:00
parent 006d217ddd
commit 1452af69b4
4 changed files with 63 additions and 19 deletions

View File

@@ -1,3 +1,18 @@
2004-03-08 Paul Eggert <eggert@gnu.org>
Add support for hex token numbers. Suggested by Odd Arild Olsen in
<http://mail.gnu.org/archive/html/bison-patches/2004-03/msg00000.html>.
* NEWS: Document hexadecimal tokens, no NUL bytes, %destructor
in lalr1.cc.
* doc/bison.texinfo (Token Decl): Add hexadecimal token numbers.
* src/scan-gram.l (scan_integer): New function.
({int}): Use it.
(0[xX][0-9abcdefABCDEF]+): New pattern, to support hex numbers.
(<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>, \\x[0-9abcdefABCDEF]+,
handle_action_dollar, handle_action_at, convert_ucn_to_byte):
Say "long int", not "long", for uniformity with GNU style.
2004-02-25 Paul Eggert <eggert@twinsun.com>
* tests/local.at (AT_COMPILE, AT_COMPILE_CXX): Ignore stdout from

15
NEWS
View File

@@ -1,7 +1,18 @@
Bison News
----------
Changes in version 1.875c:
Changes in version 1.875d:
* NUL bytes are no longer allowed in Bison string literals, unfortunately.
* %token numbers can now be hexadecimal integers, e.g., `%token FOO 0x12d'.
This is a GNU extension.
* Experimental %destructor support has been added to lalr1.cc.
Changes in version 1.875c, 2003-08-25:
(Just bug fixes.)
Changes in version 1.875b, 2003-06-17:
@@ -551,7 +562,7 @@ End:
-----
Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003
Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004
Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.

View File

@@ -44,7 +44,7 @@ This manual is for @acronym{GNU} Bison (version @value{VERSION},
@value{UPDATED}), the @acronym{GNU} parser generator.
Copyright @copyright{} 1988, 1989, 1990, 1991, 1992, 1993, 1995, 1998,
1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
@quotation
Permission is granted to copy, distribute and/or modify this document
@@ -3309,10 +3309,12 @@ associativity and precedence. @xref{Precedence Decl, ,Operator
Precedence}.
You can explicitly specify the numeric code for a token type by appending
an integer value in the field immediately following the token name:
a decimal or hexadecimal integer value in the field immediately
following the token name:
@example
%token NUM 300
%token XNUM 0x12d // a GNU extension
@end example
@noindent

View File

@@ -1,6 +1,6 @@
/* Bison Grammar Scanner -*- C -*-
Copyright (C) 2002, 2003 Free Software Foundation, Inc.
Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
@@ -101,6 +101,7 @@ static int rule_length;
static void handle_dollar (int token_type, char *cp, location loc);
static void handle_at (int token_type, char *cp, location loc);
static void handle_syncline (char *args);
static unsigned long int scan_integer (char const *p, int base, location loc);
static int convert_ucn_to_byte (char const *hex_text);
static void unexpected_eof (boundary, char const *);
@@ -235,15 +236,11 @@ splice (\\[ \f\t\v]*\n)*
}
{int} {
unsigned long num;
set_errno (0);
num = strtoul (yytext, 0, 10);
if (INT_MAX < num || get_errno ())
{
complain_at (*loc, _("integer out of range: %s"), quote (yytext));
num = INT_MAX;
}
val->integer = num;
val->integer = scan_integer (yytext, 10, *loc);
return INT;
}
0[xX][0-9abcdefABCDEF]+ {
val->integer = scan_integer (yytext, 16, *loc);
return INT;
}
@@ -411,7 +408,7 @@ splice (\\[ \f\t\v]*\n)*
<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
{
\\[0-7]{1,3} {
unsigned long c = strtoul (yytext + 1, 0, 8);
unsigned long int c = strtoul (yytext + 1, 0, 8);
if (UCHAR_MAX < c)
complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
else if (! c)
@@ -421,7 +418,7 @@ splice (\\[ \f\t\v]*\n)*
}
\\x[0-9abcdefABCDEF]+ {
unsigned long c;
unsigned long int c;
set_errno (0);
c = strtoul (yytext + 2, 0, 16);
if (UCHAR_MAX < c || get_errno ())
@@ -790,7 +787,7 @@ handle_action_dollar (char *text, location loc)
}
else
{
long num;
long int num;
set_errno (0);
num = strtol (cp, 0, 10);
@@ -869,7 +866,7 @@ handle_action_at (char *text, location loc)
obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
else
{
long num;
long int num;
set_errno (0);
num = strtol (cp, 0, 10);
@@ -919,6 +916,25 @@ handle_at (int token_type, char *text, location loc)
}
/*------------------------------------------------------.
| Scan NUMBER for a base-BASE integer at location LOC. |
`------------------------------------------------------*/
static unsigned long int
scan_integer (char const *number, int base, location loc)
{
unsigned long int num;
set_errno (0);
num = strtoul (number, 0, base);
if (INT_MAX < num || get_errno ())
{
complain_at (loc, _("integer out of range: %s"), quote (number));
num = INT_MAX;
}
return num;
}
/*------------------------------------------------------------------.
| Convert universal character name UCN to a single-byte character, |
| and return that character. Return -1 if UCN does not correspond |
@@ -928,7 +944,7 @@ handle_at (int token_type, char *text, location loc)
static int
convert_ucn_to_byte (char const *ucn)
{
unsigned long code = strtoul (ucn + 2, 0, 16);
unsigned long int code = strtoul (ucn + 2, 0, 16);
/* FIXME: Currently we assume Unicode-compatible unibyte characters
on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On