rgbds/src/asm/lexer.c

/*
 * This file is part of RGBDS.
 *
 * Copyright (c) 1997-2019, Carsten Sorensen and RGBDS contributors.
 *
 * SPDX-License-Identifier: MIT
 */

#include <assert.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <ctype.h>

#include "asm/asm.h"
#include "asm/constexpr.h"
#include "asm/fstack.h"
#include "asm/lexer.h"
#include "asm/main.h"
#include "asm/rpn.h"

#include "extern/err.h"

#include "asmy.h"

struct sLexString {
	char *tzName;
	uint32_t nToken;
	uint32_t nNameLength;
	struct sLexString *pNext;
};

#define pLexBufferRealStart	(pCurrentBuffer->pBufferRealStart)
#define pLexBuffer		(pCurrentBuffer->pBuffer)
#define AtLineStart		(pCurrentBuffer->oAtLineStart)

#define SAFETYMARGIN		1024

#define BOM_SIZE 3

struct sLexFloat tLexFloat[32];
struct sLexString *tLexHash[LEXHASHSIZE];
YY_BUFFER_STATE pCurrentBuffer;
uint32_t nLexMaxLength; // max length of all keywords and operators

uint32_t tFloatingSecondChar[256];
uint32_t tFloatingFirstChar[256];
uint32_t tFloatingChars[256];
uint32_t nFloating;
enum eLexerState lexerstate = LEX_STATE_NORMAL;

struct sStringExpansionPos *pCurrentStringExpansion;
static unsigned int nNbStringExpansions;

/* UTF-8 byte order mark */
static const unsigned char bom[BOM_SIZE] = { 0xEF, 0xBB, 0xBF };

void upperstring(char *s)
{
	while (*s) {
		*s = toupper(*s);
		s += 1;
	}
}

void lowerstring(char *s)
{
	while (*s) {
		*s = tolower(*s);
		s += 1;
	}
}

void yyskipbytes(uint32_t count)
{
	pLexBuffer += count;
}

void yyunputbytes(uint32_t count)
{
	pLexBuffer -= count;
}

void yyunput(char c)
{
	if (pLexBuffer <= pLexBufferRealStart)
		fatalerror("Buffer safety margin exceeded");

	*(--pLexBuffer) = c;
}

void yyunputstr(const char *s)
{
	int32_t len;

	len = strlen(s);

	/*
	 * It would be undefined behavior to subtract `len` from pLexBuffer and
	 * potentially have it point outside of pLexBufferRealStart's buffer,
	 * this is why the check is done this way.
	 * Refer to https://github.com/rednex/rgbds/pull/411#discussion_r319779797
	 */
	if (pLexBuffer - pLexBufferRealStart < len)
		fatalerror("Buffer safety margin exceeded");

	pLexBuffer -= len;

	memcpy(pLexBuffer, s, len);
}

/*
 * Marks that a new string expansion with name `tzName` ends here
 * Enforces recursion depth
 */
void lex_BeginStringExpansion(const char *tzName)
{
	if (++nNbStringExpansions > nMaxRecursionDepth)
		fatalerror("Recursion limit (%d) exceeded", nMaxRecursionDepth);

	struct sStringExpansionPos *pNewStringExpansion =
		malloc(sizeof(*pNewStringExpansion));
	char *tzNewExpansionName = strdup(tzName);

	if (!pNewStringExpansion || !tzNewExpansionName)
		fatalerror("Could not allocate memory to expand '%s'",
			   tzName);

	pNewStringExpansion->tzName = tzNewExpansionName;
	pNewStringExpansion->pBuffer = pLexBufferRealStart;
	pNewStringExpansion->pBufferPos = pLexBuffer;
	pNewStringExpansion->pParent = pCurrentStringExpansion;

	pCurrentStringExpansion = pNewStringExpansion;
}

void yy_switch_to_buffer(YY_BUFFER_STATE buf)
{
	pCurrentBuffer = buf;
}

void yy_set_state(enum eLexerState i)
{
	lexerstate = i;
}

void yy_delete_buffer(YY_BUFFER_STATE buf)
{
	free(buf->pBufferStart - SAFETYMARGIN);
	free(buf);
}

/*
 * Maintains the following invariants:
 * 1. nBufferSize < capacity
 * 2. The buffer is terminated with 0
 * 3. nBufferSize is the size without the terminator
 */
static void yy_buffer_append(YY_BUFFER_STATE buf, uint32_t capacity, char c)
{
	assert(buf->pBufferStart[buf->nBufferSize] == 0);
	assert(buf->nBufferSize + 1 < capacity);

	buf->pBufferStart[buf->nBufferSize++] = c;
	buf->pBufferStart[buf->nBufferSize] = 0;
}

YY_BUFFER_STATE yy_scan_bytes(char *mem, uint32_t size)
{
	YY_BUFFER_STATE pBuffer = malloc(sizeof(struct yy_buffer_state));

	if (pBuffer == NULL)
		fatalerror("%s: Out of memory!", __func__);

	pBuffer->pBufferRealStart = malloc(size + 1 + SAFETYMARGIN);

	if (pBuffer->pBufferRealStart == NULL)
		fatalerror("%s: Out of memory for buffer!", __func__);

	pBuffer->pBufferStart = pBuffer->pBufferRealStart + SAFETYMARGIN;
	pBuffer->pBuffer = pBuffer->pBufferRealStart + SAFETYMARGIN;
	memcpy(pBuffer->pBuffer, mem, size);
	pBuffer->nBufferSize = size;
	pBuffer->oAtLineStart = 1;
	pBuffer->pBuffer[size] = 0;

	return pBuffer;
}

YY_BUFFER_STATE yy_create_buffer(FILE *f)
{
	YY_BUFFER_STATE pBuffer = malloc(sizeof(struct yy_buffer_state));

	if (pBuffer == NULL)
		fatalerror("%s: Out of memory!", __func__);

	size_t size = 0, capacity = -1;
	char *buf = NULL;

	/*
	 * Check if we can get the file size without implementation-defined
	 * behavior:
	 *
	 * From ftell(3p):
	 * [On error], ftell() and ftello() shall return −1, and set errno to
	 * indicate the error.
	 *
	 * The ftell() and ftello() functions shall fail if: [...]
	 * ESPIPE The file descriptor underlying stream is associated with a
	 * pipe, FIFO, or socket.
	 *
	 * From fseek(3p):
	 * The behavior of fseek() on devices which are incapable of seeking
	 * is implementation-defined.
	 */
	if (ftell(f) != -1) {
		fseek(f, 0, SEEK_END);
		capacity = ftell(f);
		rewind(f);
	}

	// If ftell errored or the block above wasn't executed
	if (capacity == -1)
		capacity = 4096;
	// Handle 0-byte files gracefully
	else if (capacity == 0)
		capacity = 1;

	while (!feof(f)) {
		if (buf == NULL || size >= capacity) {
			if (buf)
				capacity *= 2;
			/* Give extra room for 2 newlines and terminator */
			buf = realloc(buf, capacity + SAFETYMARGIN + 3);

			if (buf == NULL)
				fatalerror("%s: Out of memory for buffer!",
					   __func__);
		}

		char *bufpos = buf + SAFETYMARGIN + size;
		size_t read_count = fread(bufpos, 1, capacity - size, f);

		if (read_count == 0 && !feof(f))
			fatalerror("%s: fread error", __func__);

		size += read_count;
	}

	pBuffer->pBufferRealStart = buf;
	pBuffer->pBufferStart = buf + SAFETYMARGIN;
	pBuffer->pBuffer = buf + SAFETYMARGIN;
	pBuffer->pBuffer[size] = 0;
	pBuffer->nBufferSize = size;

	/* This is added here to make the buffer scaling above easy to express,
	 * while taking the newline space into account
	 * for the `yy_buffer_append`s below.
	 */
	capacity += 3;

	/* Skip UTF-8 byte order mark. */
	if (pBuffer->nBufferSize >= BOM_SIZE
	 && !memcmp(pBuffer->pBuffer, bom, BOM_SIZE))
		pBuffer->pBuffer += BOM_SIZE;

	/* Convert all line endings to LF and spaces */

	char *mem = pBuffer->pBuffer;
	int32_t lineCount = 0;

	while (*mem) {
		if ((mem[0] == '\\') && (mem[1] == '\"' || mem[1] == '\\')) {
			mem += 2;
		} else {
			/* LF CR and CR LF */
			if (((mem[0] == '\n') && (mem[1] == '\r'))
			 || ((mem[0] == '\r') && (mem[1] == '\n'))) {
				*mem++ = ' ';
				*mem++ = '\n';
				lineCount++;
			/* LF and CR */
			} else if ((mem[0] == '\n') || (mem[0] == '\r')) {
				*mem++ = '\n';
				lineCount++;
			} else {
				mem++;
			}
		}
	}

	if (mem != pBuffer->pBuffer + size) {
		nLineNo = lineCount + 1;
		fatalerror("Found null character");
	}

	/* Remove comments */

	mem = pBuffer->pBuffer;
	bool instring = false;

	while (*mem) {
		if (*mem == '\"')
			instring = !instring;

		if ((mem[0] == '\\') && (mem[1] == '\"' || mem[1] == '\\')) {
			mem += 2;
		} else if (instring) {
			mem++;
		} else {
			/* Comments that start with ; anywhere in a line */
			if (*mem == ';') {
				while (!((*mem == '\n') || (*mem == '\0')))
					*mem++ = ' ';
			/* Comments that start with * at the start of a line */
			} else if ((mem[0] == '\n') && (mem[1] == '*')) {
				mem++;
				while (!((*mem == '\n') || (*mem == '\0')))
					*mem++ = ' ';
			} else {
				mem++;
			}
		}
	}

	/* Add newline if file doesn't end with one */
	if (size == 0 || pBuffer->pBufferStart[size - 1] != '\n')
		yy_buffer_append(pBuffer, capacity, '\n');

	/* Add newline if \ will eat the last newline */
	if (pBuffer->nBufferSize >= 2) {
		size_t pos = pBuffer->nBufferSize - 2;

		/* Skip spaces */
		while (pos > 0 && pBuffer->pBufferStart[pos] == ' ')
			pos--;

		if (pBuffer->pBufferStart[pos] == '\\')
			yy_buffer_append(pBuffer, capacity, '\n');
	}

	pBuffer->oAtLineStart = 1;
	return pBuffer;
}

uint32_t lex_FloatAlloc(const struct sLexFloat *token)
{
	tLexFloat[nFloating] = *token;

	return (1 << (nFloating++));
}

/*
 * Make sure that only non-zero ASCII characters are used. Also, check if the
 * start is greater than the end of the range.
 */
void lex_CheckCharacterRange(uint16_t start, uint16_t end)
{
	if (start > end || start < 1 || end > 127) {
		errx(1, "Invalid character range (start: %u, end: %u)",
		     start, end);
	}
}

void lex_FloatDeleteRange(uint32_t id, uint16_t start, uint16_t end)
{
	lex_CheckCharacterRange(start, end);

	while (start <= end) {
		tFloatingChars[start] &= ~id;
		start += 1;
	}
}

void lex_FloatAddRange(uint32_t id, uint16_t start, uint16_t end)
{
	lex_CheckCharacterRange(start, end);

	while (start <= end) {
		tFloatingChars[start] |= id;
		start += 1;
	}
}

void lex_FloatDeleteFirstRange(uint32_t id, uint16_t start, uint16_t end)
{
	lex_CheckCharacterRange(start, end);

	while (start <= end) {
		tFloatingFirstChar[start] &= ~id;
		start += 1;
	}
}

void lex_FloatAddFirstRange(uint32_t id, uint16_t start, uint16_t end)
{
	lex_CheckCharacterRange(start, end);

	while (start <= end) {
		tFloatingFirstChar[start] |= id;
		start += 1;
	}
}

void lex_FloatDeleteSecondRange(uint32_t id, uint16_t start, uint16_t end)
{
	lex_CheckCharacterRange(start, end);

	while (start <= end) {
		tFloatingSecondChar[start] &= ~id;
		start += 1;
	}
}

void lex_FloatAddSecondRange(uint32_t id, uint16_t start, uint16_t end)
{
	lex_CheckCharacterRange(start, end);

	while (start <= end) {
		tFloatingSecondChar[start] |= id;
		start += 1;
	}
}

static struct sLexFloat *lexgetfloat(uint32_t nFloatMask)
{
	if (nFloatMask == 0)
		fatalerror("Internal error in %s", __func__);

	int32_t i = 0;

	while ((nFloatMask & 1) == 0) {
		nFloatMask >>= 1;
		i++;
	}

	return &tLexFloat[i];
}

static uint32_t lexcalchash(char *s)
{
	uint32_t hash = 0;

	while (*s)
		hash = (hash * 283) ^ toupper(*s++);

	return hash % LEXHASHSIZE;
}

void lex_Init(void)
{
	uint32_t i;

	for (i = 0; i < LEXHASHSIZE; i++)
		tLexHash[i] = NULL;

	for (i = 0; i < 256; i++) {
		tFloatingFirstChar[i] = 0;
		tFloatingSecondChar[i] = 0;
		tFloatingChars[i] = 0;
	}

	nLexMaxLength = 0;
	nFloating = 0;

	pCurrentStringExpansion = NULL;
	nNbStringExpansions = 0;
}

void lex_AddStrings(const struct sLexInitString *lex)
{
	while (lex->tzName) {
		struct sLexString **ppHash;
		uint32_t hash;

		ppHash = &tLexHash[hash = lexcalchash(lex->tzName)];
		while (*ppHash)
			ppHash = &((*ppHash)->pNext);

		*ppHash = malloc(sizeof(struct sLexString));
		if (*ppHash == NULL)
			fatalerror("Out of memory!");

		(*ppHash)->tzName = (char *)strdup(lex->tzName);
		if ((*ppHash)->tzName == NULL)
			fatalerror("Out of memory!");

		(*ppHash)->nNameLength = strlen(lex->tzName);
		(*ppHash)->nToken = lex->nToken;
		(*ppHash)->pNext = NULL;

		upperstring((*ppHash)->tzName);

		if ((*ppHash)->nNameLength > nLexMaxLength)
			nLexMaxLength = (*ppHash)->nNameLength;

		lex += 1;
	}
}

/*
 * Gets the "float" mask and "float" length.
 * "Float" refers to the token type of a token that is not a keyword.
 * The character classes floatingFirstChar, floatingSecondChar, and
 * floatingChars are defined separately for each token type.
 * It uses bit masks to match against a set of simple regular expressions
 * of the form /[floatingFirstChar]([floatingSecondChar][floatingChars]*)?/.
 * The token types with the longest match from the current position in the
 * buffer will have their bits set in the float mask.
 */
void yylex_GetFloatMaskAndFloatLen(uint32_t *pnFloatMask, uint32_t *pnFloatLen)
{
	/*
	 * Note that '\0' should always have a bit mask of 0 in the "floating"
	 * tables, so it doesn't need to be checked for separately.
	 */

	char *s = pLexBuffer;
	uint32_t nOldFloatMask = 0;
	uint32_t nFloatMask = tFloatingFirstChar[(uint8_t)*s];

	if (nFloatMask != 0) {
		s++;
		nOldFloatMask = nFloatMask;
		nFloatMask &= tFloatingSecondChar[(uint8_t)*s];

		while (nFloatMask != 0) {
			s++;
			nOldFloatMask = nFloatMask;
			nFloatMask &= tFloatingChars[(uint8_t)*s];
		}
	}

	*pnFloatMask = nOldFloatMask;
	*pnFloatLen = (uint32_t)(s - pLexBuffer);
}

/*
 * Gets the longest keyword/operator from the current position in the buffer.
 */
struct sLexString *yylex_GetLongestFixed(void)
{
	struct sLexString *pLongestFixed = NULL;
	char *s = pLexBuffer;
	uint32_t hash = 0;
	uint32_t length = 0;

	while (length < nLexMaxLength && *s) {
		hash = (hash * 283) ^ toupper(*s);
		s++;
		length++;

		struct sLexString *lex = tLexHash[hash % LEXHASHSIZE];

		while (lex) {
			if (lex->nNameLength == length
			 && strncasecmp(pLexBuffer, lex->tzName, length) == 0) {
				pLongestFixed = lex;
				break;
			}
			lex = lex->pNext;
		}
	}

	return pLongestFixed;
}

size_t CopyMacroArg(char *dest, size_t maxLength, char c)
{
	size_t i;
	char *s;
	int32_t argNum;

	switch (c) {
	case '1':
	case '2':
	case '3':
	case '4':
	case '5':
	case '6':
	case '7':
	case '8':
	case '9':
		argNum = c - '0';
		break;
	case '@':
		argNum = -1;
		break;
	default:
		return 0;
	}

	s = sym_FindMacroArg(argNum);

	if (s == NULL)
		fatalerror("Macro argument not defined");

	for (i = 0; s[i] != 0; i++) {
		if (i >= maxLength)
			fatalerror("Macro argument too long to fit buffer");

		dest[i] = s[i];
	}

	return i;
}

static inline void yylex_StringWriteChar(char *s, size_t index, char c)
{
	if (index >= MAXSTRLEN)
		fatalerror("String too long");

	s[index] = c;
}

static inline void yylex_SymbolWriteChar(char *s, size_t index, char c)
{
	if (index >= MAXSYMLEN)
		fatalerror("Symbol too long");

	s[index] = c;
}

/*
 * Trims white space at the end of a string.
 * The index parameter is the index of the 0 at the end of the string.
 */
void yylex_TrimEnd(char *s, size_t index)
{
	int32_t i = (int32_t)index - 1;

	while ((i >= 0) && (s[i] == ' ' || s[i] == '\t')) {
		s[i] = 0;
		i--;
	}
}

size_t yylex_ReadBracketedSymbol(char *dest, size_t index)
{
	char sym[MAXSYMLEN + 1];
	char ch;
	size_t i = 0;
	size_t length, maxLength;
	const char *mode = NULL;

	for (ch = *pLexBuffer;
	     ch != '}' && ch != '"' && ch != '\n';
		 ch = *(++pLexBuffer)) {
		if (ch == '\\') {
			ch = *(++pLexBuffer);
			maxLength = MAXSYMLEN - i;
			length = CopyMacroArg(&sym[i], maxLength, ch);

			if (length != 0)
				i += length;
			else
				fatalerror("Illegal character escape '%c'", ch);
		} else if (ch == ':' && !mode) { /* Only grab 1st colon */
			/* Use a whitelist of modes, which does prevent the
			 * use of some features such as precision,
			 * but also avoids a security flaw
			 */
			const char *acceptedModes = "bxXd";
			/* Binary isn't natively supported,
			 * so it's handled differently
			 */
			static const char * const formatSpecifiers[] = {
				"", "%x", "%X", "%d"
			};
			/* Prevent reading out of bounds! */
			const char *designatedMode;

			if (i != 1)
				fatalerror("Print types are exactly 1 character long");

			designatedMode = strchr(acceptedModes, sym[i - 1]);
			if (!designatedMode)
				fatalerror("Illegal print type '%c'",
					   sym[i - 1]);
			mode = formatSpecifiers[designatedMode - acceptedModes];
			/* Begin writing the symbol again */
			i = 0;
		} else {
			yylex_SymbolWriteChar(sym, i++, ch);
		}
	}

	/* Properly terminate the string */
	yylex_SymbolWriteChar(sym, i, 0);

	/* It's assumed we're writing to a T_STRING */
	maxLength = MAXSTRLEN - index;
	length = symvaluetostring(&dest[index], maxLength, sym, mode);

	if (*pLexBuffer == '}')
		pLexBuffer++;
	else
		fatalerror("Missing }");

	return length;
}

static void yylex_ReadQuotedString(void)
{
	size_t index = 0;
	size_t length, maxLength;

	while (*pLexBuffer != '"' && *pLexBuffer != '\n') {
		char ch = *pLexBuffer++;

		if (ch == '\\') {
			ch = *pLexBuffer++;

			switch (ch) {
			case 'n':
				ch = '\n';
				break;
			case 't':
				ch = '\t';
				break;
			case '\\':
				ch = '\\';
				break;
			case '"':
				ch = '"';
				break;
			case ',':
				ch = ',';
				break;
			case '{':
				ch = '{';
				break;
			case '}':
				ch = '}';
				break;
			default:
				maxLength = MAXSTRLEN - index;
				length = CopyMacroArg(&yylval.tzString[index],
						      maxLength, ch);

				if (length != 0)
					index += length;
				else
					fatalerror("Illegal character escape '%c'",
						   ch);

				ch = 0;
				break;
			}
		} else if (ch == '{') {
			// Get bracketed symbol within string.
			index += yylex_ReadBracketedSymbol(yylval.tzString,
							   index);
			ch = 0;
		}

		if (ch)
			yylex_StringWriteChar(yylval.tzString, index++, ch);
	}

	yylex_StringWriteChar(yylval.tzString, index, 0);

	if (*pLexBuffer == '"')
		pLexBuffer++;
	else
		fatalerror("Unterminated string");
}

static uint32_t yylex_NORMAL(void)
{
	struct sLexString *pLongestFixed = NULL;
	uint32_t nFloatMask, nFloatLen;
	uint32_t linestart = AtLineStart;

	AtLineStart = 0;

scanagain:
	while (*pLexBuffer == ' ' || *pLexBuffer == '\t') {
		linestart = 0;
		pLexBuffer++;
	}

	if (*pLexBuffer == 0) {
		// Reached the end of a file, macro, or rept.
		if (yywrap() == 0) {
			linestart = AtLineStart;
			AtLineStart = 0;
			goto scanagain;
		}
	}

	/* Check for line continuation character */
	if (*pLexBuffer == '\\') {
		/*
		 * Look for line continuation character after a series of
		 * spaces. This is also useful for files that use Windows line
		 * endings: "\r\n" is replaced by " \n" before the lexer has the
		 * opportunity to see it.
		 */
		if (pLexBuffer[1] == ' ' || pLexBuffer[1] == '\t') {
			pLexBuffer += 2;
			while (1) {
				if (*pLexBuffer == ' ' || *pLexBuffer == '\t') {
					pLexBuffer++;
				} else if (*pLexBuffer == '\n') {
					pLexBuffer++;
					nLineNo += 1;
					goto scanagain;
				} else {
					errx(1, "Expected a new line after the continuation character.");
				}
			}
		}

		/* Line continuation character */
		if (pLexBuffer[1] == '\n') {
			pLexBuffer += 2;
			nLineNo += 1;
			goto scanagain;
		}

		/*
		 * If there isn't a newline character or a space, ignore the
		 * character '\'. It will eventually be handled by other
		 * functions like PutMacroArg().
		 */
	}

	/*
	 * Try to match an identifier, macro argument (e.g. \1),
	 * or numeric literal.
	 */
	yylex_GetFloatMaskAndFloatLen(&nFloatMask, &nFloatLen);

	/* Try to match a keyword or operator. */
	pLongestFixed = yylex_GetLongestFixed();

	if (nFloatLen == 0 && pLongestFixed == NULL) {
		/*
		 * No keyword, identifier, operator, or numerical literal
		 * matches.
		 */

		if (*pLexBuffer == '"') {
			pLexBuffer++;
			yylex_ReadQuotedString();
			return T_STRING;
		} else if (*pLexBuffer == '{') {
			pLexBuffer++;
			size_t len = yylex_ReadBracketedSymbol(yylval.tzString,
							       0);
			yylval.tzString[len] = 0;
			return T_STRING;
		}

		/*
		 * It's not a keyword, operator, identifier, macro argument,
		 * numeric literal, string, or bracketed symbol, so just return
		 * the ASCII character.
		 */
		unsigned char ch = *pLexBuffer++;

		if (ch == '\n')
			AtLineStart = 1;

		/*
		 * Check for invalid unprintable characters.
		 * They may not be readily apparent in a text editor,
		 * so this is useful for identifying encoding problems.
		 */
		if (ch != 0
		 && ch != '\n'
		 && !(ch >= 0x20 && ch <= 0x7E))
			fatalerror("Found garbage character: 0x%02X", ch);

		return ch;
	}

	if (pLongestFixed == NULL || nFloatLen > pLongestFixed->nNameLength) {
		/*
		 * Longest match was an identifier, macro argument, or numeric
		 * literal.
		 */
		struct sLexFloat *token = lexgetfloat(nFloatMask);

		if (token->Callback) {
			int32_t done = token->Callback(pLexBuffer, nFloatLen);

			if (!done)
				goto scanagain;
		}

		if (token->nToken == T_ID && linestart)
			return T_LABEL;
		else
			return token->nToken;
	}

	/* Longest match was a keyword or operator. */
	pLexBuffer += pLongestFixed->nNameLength;
	yylval.nConstValue = pLongestFixed->nToken;
	return pLongestFixed->nToken;
}

static uint32_t yylex_MACROARGS(void)
{
	size_t index = 0;
	size_t length, maxLength;

	while ((*pLexBuffer == ' ') || (*pLexBuffer == '\t'))
		pLexBuffer++;

	while ((*pLexBuffer != ',') && (*pLexBuffer != '\n')) {
		char ch = *pLexBuffer++;

		if (ch == '\\') {
			ch = *pLexBuffer++;

			switch (ch) {
			case 'n':
				ch = '\n';
				break;
			case 't':
				ch = '\t';
				break;
			case '\\':
				ch = '\\';
				break;
			case '"':
				ch = '\"';
				break;
			case ',':
				ch = ',';
				break;
			case '{':
				ch = '{';
				break;
			case '}':
				ch = '}';
				break;
			case ' ':
			case '\t':
				/*
				 * Look for line continuation character after a
				 * series of spaces. This is also useful for
				 * files that use Windows line endings: "\r\n"
				 * is replaced by " \n" before the lexer has the
				 * opportunity to see it.
				 */
				while (1) {
					if (*pLexBuffer == ' '
					 || *pLexBuffer == '\t') {
						pLexBuffer++;
					} else if (*pLexBuffer == '\n') {
						pLexBuffer++;
						nLineNo += 1;
						ch = 0;
						break;
					} else {
						errx(1, "Expected a new line after the continuation character.");
					}
				}
				break;
			case '\n':
				/* Line continuation character */
				nLineNo += 1;
				ch = 0;
				break;
			default:
				maxLength = MAXSTRLEN - index;
				length = CopyMacroArg(&yylval.tzString[index],
						      maxLength, ch);

				if (length != 0)
					index += length;
				else
					fatalerror("Illegal character escape '%c'",
						   ch);

				ch = 0;
				break;
			}
		} else if (ch == '{') {
			index += yylex_ReadBracketedSymbol(yylval.tzString,
							   index);
			ch = 0;
		}
		if (ch)
			yylex_StringWriteChar(yylval.tzString, index++, ch);
	}

	if (index) {
		yylex_StringWriteChar(yylval.tzString, index, 0);

		/* trim trailing white space at the end of the line */
		if (*pLexBuffer == '\n')
			yylex_TrimEnd(yylval.tzString, index);

		return T_STRING;
	} else if (*pLexBuffer == '\n') {
		pLexBuffer++;
		AtLineStart = 1;
		return '\n';
	} else if (*pLexBuffer == ',') {
		pLexBuffer++;
		return ',';
	}

	fatalerror("Internal error in %s", __func__);
}

int yylex(void)
{
	int returnedChar;
	switch (lexerstate) {
	case LEX_STATE_NORMAL:
		returnedChar = yylex_NORMAL();
		break;
	case LEX_STATE_MACROARGS:
		returnedChar = yylex_MACROARGS();
		break;
	default:
		fatalerror("%s: Internal error.", __func__);
	}

	/* Check if string expansions were fully read */
	while (pCurrentStringExpansion
	    && pCurrentStringExpansion->pBuffer == pLexBufferRealStart
	    && pCurrentStringExpansion->pBufferPos <= pLexBuffer) {
		struct sStringExpansionPos *pParent =
			pCurrentStringExpansion->pParent;
		free(pCurrentStringExpansion->tzName);
		free(pCurrentStringExpansion);

		pCurrentStringExpansion = pParent;
		nNbStringExpansions--;
	}

	return returnedChar;
}