Get rid of the ad hoc handling of token_buffer in the scanner: use

the obstacks.
* src/lex.c (token_obstack): New.
(init_lex): Initialize it.  No longer call...
(grow_token_buffer): this.  Remove it.
Adjust all the places which used it to use the obstack.
This commit is contained in:
Akim Demaille
2001-01-19 18:26:32 +00:00
parent 511e79b3d4
commit f17bcd1f66
3 changed files with 99 additions and 118 deletions

View File

@@ -1,3 +1,13 @@
2001-01-19 Akim Demaille <akim@epita.fr>
Get rid of the ad hoc handling of token_buffer in the scanner: use
the obstacks.
* src/lex.c (token_obstack): New.
(init_lex): Initialize it. No longer call...
(grow_token_buffer): this. Remove it.
Adjust all the places which used it to use the obstack.
2001-01-19 Akim Demaille <akim@epita.fr> 2001-01-19 Akim Demaille <akim@epita.fr>
* src/lex.h: Rename all the tokens: * src/lex.h: Rename all the tokens:

183
src/lex.c
View File

@@ -30,10 +30,8 @@
#include "quote.h" #include "quote.h"
/* Buffer for storing the current token. */ /* Buffer for storing the current token. */
char *token_buffer; struct obstack token_obstack;
char *token_buffer = NULL;
/* Allocated size of token_buffer, not including space for terminator. */
static int maxtoken;
bucket *symval; bucket *symval;
int numval; int numval;
@@ -45,22 +43,11 @@ static bucket *unlexed_symval; /* by the next call to lex */
void void
init_lex (void) init_lex (void)
{ {
maxtoken = 100; obstack_init (&token_obstack);
token_buffer = XCALLOC (char, maxtoken + 1);
unlexed = -1; unlexed = -1;
} }
static char *
grow_token_buffer (char *p)
{
int offset = p - token_buffer;
maxtoken *= 2;
token_buffer = XREALLOC (token_buffer, char, maxtoken + 1);
return token_buffer + offset;
}
int int
skip_white_space (void) skip_white_space (void)
{ {
@@ -148,16 +135,22 @@ xgetc (FILE *f)
/*------------------------------------------------------------------. /*------------------------------------------------------------------.
| Read one literal character from finput. Process \ escapes. | | Read one literal character from finput. Process \ escapes. |
| Append the normalized string version of the char to *PP. Assign | | Append the normalized string version of the char to OUT. Assign |
| the character code to *PCODE. Return 1 unless the character is an | | the character code to *PCODE. Return 1 unless the character is an |
| unescaped `term' or \n report error for \n | | unescaped `term' or \n report error for \n. |
`------------------------------------------------------------------*/ `------------------------------------------------------------------*/
/* FIXME: We could directly work in the obstack, but that would make
it more difficult to move to quotearg some day. So for the time
being, I prefer have literalchar behave like quotearg, and change
my mind later if I was wrong. */
static int static int
literalchar (char **pp, int *pcode, char term) literalchar (struct obstack *out, int *pcode, char term)
{ {
int c; int c;
char *p; char buf[4096];
char *cp;
int code; int code;
int wasquote = 0; int wasquote = 0;
@@ -249,68 +242,71 @@ literalchar (char **pp, int *pcode, char term)
} }
} /* has \ */ } /* has \ */
/* now fill token_buffer with the canonical name for this character /* now fill BUF with the canonical name for this character as a
as a literal token. Do not use what the user typed, literal token. Do not use what the user typed, so that `\012'
so that `\012' and `\n' can be interchangeable. */ and `\n' can be interchangeable. */
p = *pp; cp = buf;
if (code == term && wasquote) if (code == term && wasquote)
*p++ = code; *cp++ = code;
else if (code == '\\') else if (code == '\\')
{ {
*p++ = '\\'; *cp++ = '\\';
*p++ = '\\'; *cp++ = '\\';
} }
else if (code == '\'') else if (code == '\'')
{ {
*p++ = '\\'; *cp++ = '\\';
*p++ = '\''; *cp++ = '\'';
} }
else if (code == '\"') else if (code == '\"')
{ {
*p++ = '\\'; *cp++ = '\\';
*p++ = '\"'; *cp++ = '\"';
} }
else if (code >= 040 && code < 0177) else if (code >= 040 && code < 0177)
*p++ = code; *cp++ = code;
else if (code == '\t') else if (code == '\t')
{ {
*p++ = '\\'; *cp++ = '\\';
*p++ = 't'; *cp++ = 't';
} }
else if (code == '\n') else if (code == '\n')
{ {
*p++ = '\\'; *cp++ = '\\';
*p++ = 'n'; *cp++ = 'n';
} }
else if (code == '\r') else if (code == '\r')
{ {
*p++ = '\\'; *cp++ = '\\';
*p++ = 'r'; *cp++ = 'r';
} }
else if (code == '\v') else if (code == '\v')
{ {
*p++ = '\\'; *cp++ = '\\';
*p++ = 'v'; *cp++ = 'v';
} }
else if (code == '\b') else if (code == '\b')
{ {
*p++ = '\\'; *cp++ = '\\';
*p++ = 'b'; *cp++ = 'b';
} }
else if (code == '\f') else if (code == '\f')
{ {
*p++ = '\\'; *cp++ = '\\';
*p++ = 'f'; *cp++ = 'f';
} }
else else
{ {
*p++ = '\\'; *cp++ = '\\';
*p++ = code / 0100 + '0'; *cp++ = code / 0100 + '0';
*p++ = ((code / 010) & 07) + '0'; *cp++ = ((code / 010) & 07) + '0';
*p++ = (code & 07) + '0'; *cp++ = (code & 07) + '0';
} }
*pp = p; *cp = '\0';
if (out)
obstack_sgrow (out, buf);
*pcode = code; *pcode = code;
return !wasquote; return !wasquote;
} }
@@ -331,7 +327,6 @@ unlex (int token)
void void
read_type_name (FILE *fin) read_type_name (FILE *fin)
{ {
char *p = token_buffer;
int c = getc (fin); int c = getc (fin);
while (c != '>') while (c != '>')
@@ -345,13 +340,11 @@ read_type_name (FILE *fin)
break; break;
} }
if (p == token_buffer + maxtoken) obstack_1grow (&token_obstack, c);
p = grow_token_buffer (p);
*p++ = c;
c = getc (fin); c = getc (fin);
} }
*p = 0; obstack_1grow (&token_obstack, '\0');
token_buffer = obstack_finish (&token_obstack);
} }
@@ -359,7 +352,9 @@ token_t
lex (void) lex (void)
{ {
int c; int c;
char *p;
/* Just to make sure. */
token_buffer = NULL;
if (unlexed >= 0) if (unlexed >= 0)
{ {
@@ -370,14 +365,11 @@ lex (void)
} }
c = skip_white_space (); c = skip_white_space ();
/* for error messages (token buffer always valid) */
*token_buffer = c;
token_buffer[1] = 0;
switch (c) switch (c)
{ {
case EOF: case EOF:
strcpy (token_buffer, "EOF"); token_buffer = "EOF";
return tok_eof; return tok_eof;
case 'A': case 'B': case 'C': case 'D': case 'E': case 'A': case 'B': case 'C': case 'D': case 'E':
@@ -394,17 +386,13 @@ lex (void)
case 'z': case 'z':
case '.': case '_': case '.': case '_':
p = token_buffer;
while (isalnum (c) || c == '_' || c == '.') while (isalnum (c) || c == '_' || c == '.')
{ {
if (p == token_buffer + maxtoken) obstack_1grow (&token_obstack, c);
p = grow_token_buffer (p);
*p++ = c;
c = getc (finput); c = getc (finput);
} }
obstack_1grow (&token_obstack, '\0');
*p = 0; token_buffer = obstack_finish (&token_obstack);
ungetc (c, finput); ungetc (c, finput);
symval = getsym (token_buffer); symval = getsym (token_buffer);
return tok_identifier; return tok_identifier;
@@ -414,17 +402,14 @@ lex (void)
{ {
numval = 0; numval = 0;
p = token_buffer;
while (isdigit (c)) while (isdigit (c))
{ {
if (p == token_buffer + maxtoken) obstack_1grow (&token_obstack, c);
p = grow_token_buffer (p);
*p++ = c;
numval = numval * 10 + c - '0'; numval = numval * 10 + c - '0';
c = getc (finput); c = getc (finput);
} }
*p = 0; obstack_1grow (&token_obstack, '\0');
token_buffer = obstack_finish (&token_obstack);
ungetc (c, finput); ungetc (c, finput);
return tok_number; return tok_number;
} }
@@ -435,25 +420,21 @@ lex (void)
translations = -1; translations = -1;
{ {
int code, discode; int code, discode;
char discard[10], *dp;
p = token_buffer; obstack_1grow (&token_obstack, '\'');
*p++ = '\''; literalchar (&token_obstack, &code, '\'');
literalchar (&p, &code, '\'');
c = getc (finput); c = getc (finput);
if (c != '\'') if (c != '\'')
{ {
complain (_("use \"...\" for multi-character literal tokens")); complain (_("use \"...\" for multi-character literal tokens"));
while (1) while (1)
{ if (!literalchar (0, &discode, '\''))
dp = discard; break;
if (!literalchar (&dp, &discode, '\''))
break;
}
} }
*p++ = '\''; obstack_1grow (&token_obstack, '\'');
*p = 0; obstack_1grow (&token_obstack, '\0');
token_buffer = obstack_finish (&token_obstack);
symval = getsym (token_buffer); symval = getsym (token_buffer);
symval->class = token_sym; symval->class = token_sym;
if (!symval->user_token_number) if (!symval->user_token_number)
@@ -467,15 +448,13 @@ lex (void)
translations = -1; translations = -1;
{ {
int code; /* ignored here */ int code; /* ignored here */
p = token_buffer;
*p++ = '\"'; obstack_1grow (&token_obstack, '\"');
/* Read up to and including ". */ /* Read up to and including ". */
while (literalchar (&p, &code, '\"')) while (literalchar (&token_obstack, &code, '\"'))
{ /* nothing */;
if (p >= token_buffer + maxtoken - 4) obstack_1grow (&token_obstack, '\0');
p = grow_token_buffer (p); token_buffer = obstack_finish (&token_obstack);
}
*p = 0;
symval = getsym (token_buffer); symval = getsym (token_buffer);
symval->class = token_sym; symval->class = token_sym;
@@ -509,7 +488,7 @@ lex (void)
if (c == '{') if (c == '{')
{ {
strcpy (token_buffer, "={"); token_buffer = "={";
return tok_left_curly; return tok_left_curly;
} }
else else
@@ -589,14 +568,9 @@ int
parse_percent_token (void) parse_percent_token (void)
{ {
int c; int c;
char *p;
struct percent_table_struct *tx; struct percent_table_struct *tx;
p = token_buffer;
c = getc (finput); c = getc (finput);
*p++ = '%';
*p++ = c; /* for error msg */
*p = 0;
switch (c) switch (c)
{ {
@@ -621,25 +595,22 @@ parse_percent_token (void)
case '=': case '=':
return tok_prec; return tok_prec;
} }
if (!isalpha (c)) if (!isalpha (c))
return tok_illegal; return tok_illegal;
p = token_buffer; obstack_1grow (&token_obstack, '%');
*p++ = '%';
while (isalpha (c) || c == '_' || c == '-') while (isalpha (c) || c == '_' || c == '-')
{ {
if (p == token_buffer + maxtoken)
p = grow_token_buffer (p);
if (c == '-') if (c == '-')
c = '_'; c = '_';
*p++ = c; obstack_1grow (&token_obstack, c);
c = getc (finput); c = getc (finput);
} }
ungetc (c, finput); ungetc (c, finput);
obstack_1grow (&token_obstack, '\0');
*p = 0; token_buffer = obstack_finish (&token_obstack);
/* table lookup % directive */ /* table lookup % directive */
for (tx = percent_table; tx->name; tx++) for (tx = percent_table; tx->name; tx++)

View File

@@ -466,7 +466,7 @@ copy_definition (void)
static void static void
parse_token_decl (symbol_class what_is, symbol_class what_is_not) parse_token_decl (symbol_class what_is, symbol_class what_is_not)
{ {
int token = 0; token_t token = 0;
char *typename = 0; char *typename = 0;
/* The symbol being defined. */ /* The symbol being defined. */
@@ -593,7 +593,7 @@ parse_type_decl (void)
for (;;) for (;;)
{ {
int t; token_t t;
int tmp_char = ungetc (skip_white_space (), finput); int tmp_char = ungetc (skip_white_space (), finput);
if (tmp_char == '%') if (tmp_char == '%')
@@ -643,7 +643,7 @@ parse_assoc_decl (associativity assoc)
for (;;) for (;;)
{ {
int t; token_t t;
int tmp_char = ungetc (skip_white_space (), finput); int tmp_char = ungetc (skip_white_space (), finput);
if (tmp_char == '%') if (tmp_char == '%')
@@ -821,7 +821,7 @@ parse_expect_decl (void)
static void static void
parse_thong_decl (void) parse_thong_decl (void)
{ {
int token; token_t token;
struct bucket *symbol; struct bucket *symbol;
char *typename = 0; char *typename = 0;
int usrtoknum; int usrtoknum;
@@ -1198,12 +1198,12 @@ static int
get_type (void) get_type (void)
{ {
int k; int k;
int t; token_t token;
char *name; char *name;
t = lex (); token = lex ();
if (t != tok_typename) if (token != tok_typename)
{ {
complain (_("invalid %s declaration"), "%type"); complain (_("invalid %s declaration"), "%type");
return t; return t;
@@ -1213,9 +1213,9 @@ get_type (void)
for (;;) for (;;)
{ {
t = lex (); token = lex ();
switch (t) switch (token)
{ {
case tok_semicolon: case tok_semicolon:
return lex (); return lex ();
@@ -1232,7 +1232,7 @@ get_type (void)
break; break;
default: default:
return t; return token;
} }
} }
} }
@@ -1253,7 +1253,7 @@ get_type (void)
static void static void
readgram (void) readgram (void)
{ {
int t; token_t t;
bucket *lhs = NULL; bucket *lhs = NULL;
symbol_list *p; symbol_list *p;
symbol_list *p1; symbol_list *p1;
@@ -1352,7 +1352,7 @@ readgram (void)
if (t == tok_identifier) if (t == tok_identifier)
{ {
bucket *ssave; bucket *ssave;
int t1; token_t t1;
ssave = symval; ssave = symval;
t1 = lex (); t1 = lex ();