From 2bf31870a7b9be374e24c20aaac13985b5e33f67 Mon Sep 17 00:00:00 2001 From: YamaArashi Date: Fri, 22 Aug 2014 21:44:18 -0700 Subject: [PATCH] Cleaned up lexer - separated the lexer into multiple functions so it is more readable - fixed issue with long label names in macro arguments - added error checking code to prevent buffer overflows --- include/asm/lexer.h | 7 +- src/asm/lexer.c | 949 +++++++++++++++++++++----------------------- src/asm/yaccprt1.y | 40 +- 3 files changed, 490 insertions(+), 506 deletions(-) diff --git a/include/asm/lexer.h b/include/asm/lexer.h index 27fe475c..5b6db4a2 100644 --- a/include/asm/lexer.h +++ b/include/asm/lexer.h @@ -5,7 +5,8 @@ #include "asm/types.h" -#define LEXHASHSIZE 512 +#define LEXHASHSIZE (1 << 11) +#define MAXSTRLEN 255 struct sLexInitString { char *tzName; @@ -18,7 +19,9 @@ struct sLexFloat { }; struct yy_buffer_state { - char *pBufferStart; + char *pBufferRealStart; // actual starting address + char *pBufferStart; // address where the data is initially written + // after the "safety margin" char *pBuffer; ULONG nBufferSize; ULONG oAtLineStart; diff --git a/src/asm/lexer.c b/src/asm/lexer.c index ead1ee49..a4f43572 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -18,18 +18,18 @@ struct sLexString { ULONG nNameLength; struct sLexString *pNext; }; -#define pLexBuffer (pCurrentBuffer->pBuffer) -#define nLexBufferLeng (pCurrentBuffer->nBufferSize) +#define pLexBufferRealStart (pCurrentBuffer->pBufferRealStart) +#define pLexBuffer (pCurrentBuffer->pBuffer) +#define AtLineStart (pCurrentBuffer->oAtLineStart) #define SAFETYMARGIN 1024 -extern ULONG symvaluetostring(char *dest, char *s); +extern size_t symvaluetostring(char *dest, size_t maxLength, char *sym); struct sLexFloat tLexFloat[32]; struct sLexString *tLexHash[LEXHASHSIZE]; YY_BUFFER_STATE pCurrentBuffer; -ULONG yyleng; -ULONG nLexMaxLeng; +ULONG nLexMaxLength; // max length of all keywords and operators ULONG tFloatingSecondChar[256]; ULONG tFloatingFirstChar[256]; @@ -37,8 +37,6 @@ ULONG tFloatingChars[256]; ULONG nFloating; enum eLexerState lexerstate = LEX_STATE_NORMAL; -#define AtLineStart pCurrentBuffer->oAtLineStart - #ifdef __GNUC__ void strupr(char *s) @@ -73,18 +71,24 @@ yyunputbytes(ULONG count) void yyunput(char c) { + if (pLexBuffer <= pLexBufferRealStart) + fatalerror("Buffer safety margin exceeded"); + *(--pLexBuffer) = c; } void yyunputstr(char *s) { - SLONG i; + int i, len; - i = strlen(s) - 1; + len = strlen(s); - while (i >= 0) - yyunput(s[i--]); + if (pLexBuffer - len < pLexBufferRealStart) + fatalerror("Buffer safety margin exceeded"); + + for (i = len - 1; i >= 0; i--) + *(--pLexBuffer) = s[i]; } void @@ -114,10 +118,10 @@ yy_scan_bytes(char *mem, ULONG size) if ((pBuffer = (YY_BUFFER_STATE) malloc(sizeof(struct yy_buffer_state))) != NULL) { - if ((pBuffer->pBuffer = pBuffer->pBufferStart = + if ((pBuffer->pBufferRealStart = (char *) malloc(size + 1 + SAFETYMARGIN)) != NULL) { - pBuffer->pBuffer += SAFETYMARGIN; - pBuffer->pBufferStart += SAFETYMARGIN; + pBuffer->pBufferStart = pBuffer->pBufferRealStart + SAFETYMARGIN; + pBuffer->pBuffer = pBuffer->pBufferRealStart + SAFETYMARGIN; memcpy(pBuffer->pBuffer, mem, size); pBuffer->nBufferSize = size; pBuffer->oAtLineStart = 1; @@ -135,21 +139,20 @@ yy_create_buffer(FILE * f) YY_BUFFER_STATE pBuffer; if ((pBuffer = - (YY_BUFFER_STATE) malloc(sizeof(struct yy_buffer_state))) != - NULL) { + (YY_BUFFER_STATE) malloc(sizeof(struct yy_buffer_state))) != NULL) { ULONG size; fseek(f, 0, SEEK_END); size = ftell(f); fseek(f, 0, SEEK_SET); - if ((pBuffer->pBuffer = pBuffer->pBufferStart = + if ((pBuffer->pBufferRealStart = (char *) malloc(size + 2 + SAFETYMARGIN)) != NULL) { char *mem; ULONG instring = 0; - pBuffer->pBuffer += SAFETYMARGIN; - pBuffer->pBufferStart += SAFETYMARGIN; + pBuffer->pBufferStart = pBuffer->pBufferRealStart + SAFETYMARGIN; + pBuffer->pBuffer = pBuffer->pBufferRealStart + SAFETYMARGIN; size = fread(pBuffer->pBuffer, sizeof(UBYTE), size, f); @@ -167,24 +170,19 @@ yy_create_buffer(FILE * f) mem += 1; } else { if ((mem[0] == 10 && mem[1] == 13) - || (mem[0] == 13 && mem[1] == 10)) { + || (mem[0] == 13 && mem[1] == 10)) { mem[0] = ' '; mem[1] = '\n'; mem += 2; } else if (mem[0] == 10 || mem[0] == 13) { mem[0] = '\n'; mem += 1; - } else if (mem[0] == '\n' - && mem[1] == '*') { + } else if (mem[0] == '\n' && mem[1] == '*') { mem += 1; - while (! - (*mem == '\n' - || *mem == '\0')) + while (!(*mem == '\n' || *mem == '\0')) *mem++ = ' '; } else if (*mem == ';') { - while (! - (*mem == '\n' - || *mem == '\0')) + while (!(*mem == '\n' || *mem == '\0')) *mem++ = ' '; } else mem += 1; @@ -199,17 +197,33 @@ yy_create_buffer(FILE * f) return (NULL); } -ULONG -lex_FloatAlloc(struct sLexFloat * tok) +ULONG +lex_FloatAlloc(struct sLexFloat *token) { - tLexFloat[nFloating] = (*tok); + tLexFloat[nFloating] = *token; return (1 << (nFloating++)); } +/* + * Make sure that only non-zero ASCII characters are used. Also, check if the + * start is greater than the end of the range. + */ +void +lex_CheckCharacterRange(UWORD start, UWORD end) +{ + if (start > end || start < 1 || end > 127) { + fprintf(stderr, "Invalid character range (start: %u, end: %u)\n", + start, end); + exit(1); + } +} + void lex_FloatDeleteRange(ULONG id, UWORD start, UWORD end) { + lex_CheckCharacterRange(start, end); + while (start <= end) { tFloatingChars[start] &= ~id; start += 1; @@ -219,6 +233,8 @@ lex_FloatDeleteRange(ULONG id, UWORD start, UWORD end) void lex_FloatAddRange(ULONG id, UWORD start, UWORD end) { + lex_CheckCharacterRange(start, end); + while (start <= end) { tFloatingChars[start] |= id; start += 1; @@ -228,6 +244,8 @@ lex_FloatAddRange(ULONG id, UWORD start, UWORD end) void lex_FloatDeleteFirstRange(ULONG id, UWORD start, UWORD end) { + lex_CheckCharacterRange(start, end); + while (start <= end) { tFloatingFirstChar[start] &= ~id; start += 1; @@ -237,6 +255,8 @@ lex_FloatDeleteFirstRange(ULONG id, UWORD start, UWORD end) void lex_FloatAddFirstRange(ULONG id, UWORD start, UWORD end) { + lex_CheckCharacterRange(start, end); + while (start <= end) { tFloatingFirstChar[start] |= id; start += 1; @@ -246,6 +266,8 @@ lex_FloatAddFirstRange(ULONG id, UWORD start, UWORD end) void lex_FloatDeleteSecondRange(ULONG id, UWORD start, UWORD end) { + lex_CheckCharacterRange(start, end); + while (start <= end) { tFloatingSecondChar[start] &= ~id; start += 1; @@ -255,6 +277,8 @@ lex_FloatDeleteSecondRange(ULONG id, UWORD start, UWORD end) void lex_FloatAddSecondRange(ULONG id, UWORD start, UWORD end) { + lex_CheckCharacterRange(start, end); + while (start <= end) { tFloatingSecondChar[start] |= id; start += 1; @@ -262,32 +286,32 @@ lex_FloatAddSecondRange(ULONG id, UWORD start, UWORD end) } struct sLexFloat * -lexgetfloat(ULONG id) +lexgetfloat(ULONG nFloatMask) { - ULONG r = 0, mask = 1; - - if (id == 0) - return (NULL); - - while ((id & mask) == 0) { - mask <<= 1; - r += 1; + if (nFloatMask == 0) { + fatalerror("Internal error in lexgetfloat"); } - return (&tLexFloat[r]); + int i = 0; + + while ((nFloatMask & 1) == 0) { + nFloatMask >>= 1; + i++; + } + + return (&tLexFloat[i]); } ULONG lexcalchash(char *s) { - ULONG r = 0; + ULONG hash = 0; while (*s) { - r = ((r << 1) + (toupper(*s))) % LEXHASHSIZE; - s += 1; + hash = (hash * 283) ^ toupper(*s++); } - return (r); + return (hash % LEXHASHSIZE); } void @@ -295,17 +319,17 @@ lex_Init(void) { ULONG i; - for (i = 0; i < LEXHASHSIZE; i += 1) { + for (i = 0; i < LEXHASHSIZE; i++) { tLexHash[i] = NULL; } - for (i = 0; i < 256; i += 1) { + for (i = 0; i < 256; i++) { tFloatingFirstChar[i] = 0; tFloatingSecondChar[i] = 0; tFloatingChars[i] = 0; } - nLexMaxLeng = 0; + nLexMaxLength = 0; nFloating = 0; } @@ -333,8 +357,8 @@ lex_AddStrings(struct sLexInitString * lex) strupr((*ppHash)->tzName); - if ((*ppHash)->nNameLength > nLexMaxLeng) - nLexMaxLeng = (*ppHash)->nNameLength; + if ((*ppHash)->nNameLength > nLexMaxLength) + nLexMaxLength = (*ppHash)->nNameLength; } else fatalerror("Out of memory!"); @@ -345,458 +369,391 @@ lex_AddStrings(struct sLexInitString * lex) } } +/* + * Gets the "float" mask and "float" length. + * "Float" refers to the token type of a token that is not a keyword. + * The character classes floatingFirstChar, floatingSecondChar, and + * floatingChars are defined separately for each token type. + * It uses bit masks to match against a set of simple regular expressions + * of the form /[floatingFirstChar]([floatingSecondChar][floatingChars]*)?/. + * The token types with the longest match from the current position in the + * buffer will have their bits set in the float mask. + */ +void +yylex_GetFloatMaskAndFloatLen(ULONG *pnFloatMask, ULONG *pnFloatLen) +{ + // Note that '\0' should always have a bit mask of 0 in the "floating" + // tables, so it doesn't need to be checked for separately. + + char *s = pLexBuffer; + ULONG nOldFloatMask = 0; + ULONG nFloatMask = tFloatingFirstChar[(int)*s]; + + if (nFloatMask != 0) { + s++; + nOldFloatMask = nFloatMask; + nFloatMask &= tFloatingSecondChar[(int)*s]; + + while (nFloatMask != 0) { + s++; + nOldFloatMask = nFloatMask; + nFloatMask &= tFloatingChars[(int)*s]; + } + } + + *pnFloatMask = nOldFloatMask; + *pnFloatLen = (ULONG)(s - pLexBuffer); +} + +/* + * Gets the longest keyword/operator from the current position in the buffer. + */ +struct sLexString * +yylex_GetLongestFixed() +{ + struct sLexString *pLongestFixed = NULL; + char *s = pLexBuffer; + ULONG hash = 0; + ULONG length = 0; + + while (length < nLexMaxLength && *s) { + hash = (hash * 283) ^ toupper(*s); + s++; + length++; + + struct sLexString *lex = tLexHash[hash % LEXHASHSIZE]; + + while (lex) { + if (lex->nNameLength == length + && strncasecmp(pLexBuffer, lex->tzName, length) == 0) { + pLongestFixed = lex; + break; + } + lex = lex->pNext; + } + } + + return pLongestFixed; +} + +size_t +CopyMacroArg(char *dest, size_t maxLength, char c) +{ + int i; + char *s; + int argNum; + + switch (c) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + argNum = c - '0'; + break; + case '@': + argNum = -1; + break; + default: + return 0; + } + + if ((s = sym_FindMacroArg(argNum)) == NULL) + fatalerror("Macro argument not defined"); + + for (i = 0; s[i] != 0; i++) { + if (i >= maxLength) { + fatalerror("Macro argument too long to fit buffer"); + } + dest[i] = s[i]; + } + + return i; +} + +static inline void +yylex_StringWriteChar(char *s, size_t index, char c) +{ + if (index >= MAXSTRLEN) { + fatalerror("String too long"); + } + + s[index] = c; +} + +static inline void +yylex_SymbolWriteChar(char *s, size_t index, char c) +{ + if (index >= MAXSYMLEN) { + fatalerror("Symbol too long"); + } + + s[index] = c; +} + +/* + * Trims white space at the end of a string. + * The index parameter is the index of the 0 at the end of the string. + */ +void yylex_TrimEnd(char *s, size_t index) +{ + int i; + + for (i = (int)index - 1; i >= 0 && (s[i] == ' ' || s[i] == '\t'); i--) + s[i] = 0; +} + +size_t +yylex_ReadBracketedSymbol(char *dest, size_t index) +{ + char sym[MAXSYMLEN + 1]; + char ch; + size_t i = 0; + size_t length, maxLength; + + for (ch = *pLexBuffer; + ch != '}' && ch != '"' && ch != '\n'; + ch = *(++pLexBuffer)) { + if (ch == '\\') { + ch = *(++pLexBuffer); + maxLength = MAXSYMLEN - i; + length = CopyMacroArg(&sym[i], maxLength, ch); + + if (length != 0) + i += length; + else + fatalerror("Illegal character escape '%c'", ch); + } else + yylex_SymbolWriteChar(sym, i++, ch); + } + + yylex_SymbolWriteChar(sym, i, 0); + + maxLength = MAXSTRLEN - index; // it's assumed we're writing to a T_STRING + length = symvaluetostring(&dest[index], maxLength, sym); + + if (*pLexBuffer == '}') + pLexBuffer++; + else + yyerror("Missing }"); + + return length; +} + +void +yylex_ReadQuotedString() +{ + size_t index = 0; + size_t length, maxLength; + + while (*pLexBuffer != '"' && *pLexBuffer != '\n') { + char ch = *pLexBuffer++; + + if (ch == '\\') { + ch = *pLexBuffer++; + + switch (ch) { + case 'n': + ch = '\n'; + break; + case 't': + ch = '\t'; + break; + case '\\': + ch = '\\'; + break; + case '"': + ch = '"'; + break; + default: + maxLength = MAXSTRLEN - index; + length = CopyMacroArg(&yylval.tzString[index], maxLength, ch); + + if (length != 0) + index += length; + else + fatalerror("Illegal character escape '%c'", ch); + + ch = 0; + break; + } + } else if (ch == '{') { + // Get bracketed symbol within string. + index += yylex_ReadBracketedSymbol(yylval.tzString, index); + ch = 0; + } + + if (ch) + yylex_StringWriteChar(yylval.tzString, index++, ch); + } + + yylex_StringWriteChar(yylval.tzString, index, 0); + + if (*pLexBuffer == '"') + pLexBuffer++; + else + yyerror("Unterminated string"); +} + +ULONG +yylex_NORMAL() +{ + struct sLexString *pLongestFixed = NULL; + ULONG nFloatMask, nFloatLen; + ULONG linestart = AtLineStart; + + AtLineStart = 0; + +scanagain: + while (*pLexBuffer == ' ' || *pLexBuffer == '\t') { + linestart = 0; + pLexBuffer++; + } + + if (*pLexBuffer == 0) { + // Reached the end of a file, macro, or rept. + if (yywrap() == 0) { + linestart = AtLineStart; + AtLineStart = 0; + goto scanagain; + } + } + + // Try to match an identifier, macro argument (e.g. \1), + // or numeric literal. + yylex_GetFloatMaskAndFloatLen(&nFloatMask, &nFloatLen); + + // Try to match a keyword or operator. + pLongestFixed = yylex_GetLongestFixed(); + + if (nFloatLen == 0 && pLongestFixed == NULL) { + // No keyword, identifier, operator, or numerical literal matches. + + if (*pLexBuffer == '"') { + pLexBuffer++; + yylex_ReadQuotedString(); + return T_STRING; + } else if (*pLexBuffer == '{') { + pLexBuffer++; + yylex_ReadBracketedSymbol(yylval.tzString, 0); + return T_STRING; + } else { + // It's not a keyword, operator, identifier, macro argument, + // numeric literal, string, or bracketed symbol, so just return + // the ASCII character. + if (*pLexBuffer == '\n') + AtLineStart = 1; + + return *pLexBuffer++; + } + } + + if (pLongestFixed == NULL || nFloatLen > pLongestFixed->nNameLength) { + // Longest match was an identifier, macro argument, or numeric literal. + struct sLexFloat *token = lexgetfloat(nFloatMask); + + if (token->Callback) { + int done = token->Callback(pLexBuffer, nFloatLen); + if (!done) + goto scanagain; + } + + pLexBuffer += nFloatLen; + + if (token->nToken == T_ID && linestart) { + return T_LABEL; + } else { + return token->nToken; + } + } + + // Longest match was a keyword or operator. + pLexBuffer += pLongestFixed->nNameLength; + return pLongestFixed->nToken; +} + +ULONG +yylex_MACROARGS() +{ + size_t index = 0; + size_t length, maxLength; + + while (*pLexBuffer == ' ' || *pLexBuffer == '\t') { + pLexBuffer++; + } + + while (*pLexBuffer != ',' && (*pLexBuffer != '\n')) { + char ch = *pLexBuffer++; + + if (ch == '\\') { + ch = *pLexBuffer++; + + switch (ch) { + case 'n': + ch = '\n'; + break; + case 't': + ch = '\t'; + break; + case '\\': + ch = '\\'; + break; + default: + maxLength = MAXSTRLEN - index; + length = CopyMacroArg(&yylval.tzString[index], maxLength, ch); + + if (length != 0) + index += length; + else + fatalerror("Illegal character escape '%c'", ch); + + ch = 0; + break; + } + } else if (ch == '{') { + index += yylex_ReadBracketedSymbol(yylval.tzString, index); + ch = 0; + } + if (ch) + yylex_StringWriteChar(yylval.tzString, index++, ch); + } + + if (index) { + yylex_StringWriteChar(yylval.tzString, index, 0); + + // trim trailing white space at the end of the line + if (*pLexBuffer == '\n') + yylex_TrimEnd(yylval.tzString, index); + + return T_STRING; + } else if (*pLexBuffer == '\n') { + pLexBuffer++; + AtLineStart = 1; + return '\n'; + } else if (*pLexBuffer == ',') { + pLexBuffer++; + return ','; + } + + fatalerror("Internal error in yylex_MACROARGS"); + return 0; +} + ULONG yylex(void) { - ULONG hash, maxlen; - char *s; - struct sLexString *pLongestFixed = NULL; - ULONG nFloatMask, nOldFloatMask, nFloatLen; - ULONG linestart = AtLineStart; - switch (lexerstate) { case LEX_STATE_NORMAL: - AtLineStart = 0; - -scanagain: - - while (*pLexBuffer == ' ' || *pLexBuffer == '\t') { - linestart = 0; - pLexBuffer += 1; - } - - if (*pLexBuffer == 0) { - if (yywrap() == 0) { - linestart = AtLineStart; - AtLineStart = 0; - goto scanagain; - } - } - s = pLexBuffer; - nOldFloatMask = nFloatLen = 0; - nFloatMask = tFloatingFirstChar[(int) *s++]; - while (nFloatMask && nFloatLen < nLexBufferLeng) { - nFloatLen += 1; - nOldFloatMask = nFloatMask; - if (nFloatLen == 1) - nFloatMask &= tFloatingSecondChar[(int) *s++]; - else - nFloatMask &= tFloatingChars[(int) *s++]; - } - - maxlen = nLexBufferLeng; - if (nLexMaxLeng < maxlen) - maxlen = nLexMaxLeng; - - yyleng = 0; - hash = 0; - s = pLexBuffer; - while (yyleng < nLexMaxLeng) { - /* XXX: Kludge warning! The dereference of s below - * may go beyond the end of the buffer. We use the - * following test to stop that from happening, - * without really understanding what the rest of - * the code is doing. This may not be the correct - * fix! */ - if (!*s) - break; - - yyleng += 1; - hash = ((hash << 1) + (toupper(*s))) % LEXHASHSIZE; - s += 1; - if (tLexHash[hash]) { - struct sLexString *lex; - - lex = tLexHash[hash]; - while (lex) { - if (lex->nNameLength == yyleng) { - if (strncasecmp - (pLexBuffer, lex->tzName, - yyleng) == 0) { - pLongestFixed = lex; - } - } - lex = lex->pNext; - } - } - } - - if (nFloatLen == 0 && pLongestFixed == NULL) { - if (*pLexBuffer == '"') { - ULONG index = 0; - - pLexBuffer += 1; - while ((*pLexBuffer != '"') - && (*pLexBuffer != '\n')) { - char ch, *marg; - - if ((ch = *pLexBuffer++) == '\\') { - switch (ch = (*pLexBuffer++)) { - case 'n': - ch = '\n'; - break; - case 't': - ch = '\t'; - break; - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - if ((marg = - sym_FindMacroArg(ch - - - '0')) - != NULL) { - while (*marg) - yylval. - tzString - [index++] - = - *marg++; - ch = 0; - } - break; - case '@': - if ((marg = - sym_FindMacroArg - (-1)) != NULL) { - while (*marg) - yylval. - tzString - [index++] - = - *marg++; - ch = 0; - } - break; - } - } else if (ch == '{') { - char sym[MAXSYMLEN]; - int i = 0; - - while ((*pLexBuffer != '}') - && (*pLexBuffer != '"') - && (*pLexBuffer != - '\n')) { - if ((ch = - *pLexBuffer++) == - '\\') { - switch (ch = - (*pLexBuffer++)) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - if ((marg = sym_FindMacroArg(ch - '0')) != NULL) { - while - (*marg) - sym[i++] = *marg++; - ch = 0; - } - break; - case '@': - if ((marg = sym_FindMacroArg(-1)) != NULL) { - while - (*marg) - sym[i++] = *marg++; - ch = 0; - } - break; - } - } else - sym[i++] = ch; - } - - sym[i] = 0; - index += - symvaluetostring(&yylval. - tzString - [index], - sym); - if (*pLexBuffer == '}') - pLexBuffer += 1; - else - yyerror("Missing }"); - ch = 0; - } - if (ch) - yylval.tzString[index++] = ch; - } - - yylval.tzString[index++] = 0; - - if (*pLexBuffer == '\n') - yyerror("Unterminated string"); - else - pLexBuffer += 1; - - return (T_STRING); - } else if (*pLexBuffer == '{') { - char sym[MAXSYMLEN], ch, *marg; - int i = 0; - - pLexBuffer += 1; - - while ((*pLexBuffer != '}') - && (*pLexBuffer != '\n')) { - if ((ch = *pLexBuffer++) == '\\') { - switch (ch = (*pLexBuffer++)) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - if ((marg = - sym_FindMacroArg(ch - - - '0')) - != NULL) { - while (*marg) - sym[i++] - = - *marg++; - ch = 0; - } - break; - case '@': - if ((marg = - sym_FindMacroArg - (-1)) != NULL) { - while (*marg) - sym[i++] - = - *marg++; - ch = 0; - } - break; - } - } else - sym[i++] = ch; - } - sym[i] = 0; - symvaluetostring(yylval.tzString, sym); - if (*pLexBuffer == '}') - pLexBuffer += 1; - else - yyerror("Missing }"); - - return (T_STRING); - } else { - if (*pLexBuffer == '\n') - AtLineStart = 1; - - yyleng = 1; - return (*pLexBuffer++); - } - } - if (nFloatLen == 0) { - yyleng = pLongestFixed->nNameLength; - pLexBuffer += yyleng; - return (pLongestFixed->nToken); - } - if (pLongestFixed == NULL) { - struct sLexFloat *tok; - - tok = lexgetfloat(nOldFloatMask); - yyleng = nFloatLen; - if (tok->Callback) { - if (tok->Callback(pLexBuffer, yyleng) == 0) - goto scanagain; - } - if (tok->nToken == T_ID && linestart) { - pLexBuffer += yyleng; - return (T_LABEL); - } else { - pLexBuffer += yyleng; - return (tok->nToken); - } - } - if (nFloatLen > pLongestFixed->nNameLength) { - struct sLexFloat *tok; - - tok = lexgetfloat(nOldFloatMask); - yyleng = nFloatLen; - if (tok->Callback) { - if (tok->Callback(pLexBuffer, yyleng) == 0) - goto scanagain; - } - if (tok->nToken == T_ID && linestart) { - pLexBuffer += yyleng; - return (T_LABEL); - } else { - pLexBuffer += yyleng; - return (tok->nToken); - } - } else { - yyleng = pLongestFixed->nNameLength; - pLexBuffer += yyleng; - return (pLongestFixed->nToken); - } - break; - + return yylex_NORMAL(); case LEX_STATE_MACROARGS: - { - ULONG index = 0; - - while (*pLexBuffer == ' ' || *pLexBuffer == '\t') { - linestart = 0; - pLexBuffer += 1; - } - - while ((*pLexBuffer != ',') - && (*pLexBuffer != '\n')) { - char ch, *marg; - - if ((ch = *pLexBuffer++) == '\\') { - switch (ch = (*pLexBuffer++)) { - case 'n': - ch = '\n'; - break; - case 't': - ch = '\t'; - break; - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - if ((marg = - sym_FindMacroArg(ch - - '0')) != - NULL) { - while (*marg) - yylval. - tzString - [index++] = - *marg++; - ch = 0; - } - break; - case '@': - if ((marg = - sym_FindMacroArg(-1)) != - NULL) { - while (*marg) - yylval. - tzString - [index++] = - *marg++; - ch = 0; - } - break; - } - } else if (ch == '{') { - char sym[MAXSYMLEN]; - int i = 0; - - while ((*pLexBuffer != '}') - && (*pLexBuffer != '"') - && (*pLexBuffer != '\n')) { - if ((ch = - *pLexBuffer++) == '\\') { - switch (ch = - (*pLexBuffer++)) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - if ((marg = - sym_FindMacroArg - (ch - - '0')) != - NULL) { - while - (*marg) - sym[i++] = *marg++; - ch = 0; - } - break; - case '@': - if ((marg = - sym_FindMacroArg - (-1)) != - NULL) { - while - (*marg) - sym[i++] = *marg++; - ch = 0; - } - break; - } - } else - sym[i++] = ch; - } - sym[i] = 0; - index += - symvaluetostring(&yylval. - tzString[index], - sym); - if (*pLexBuffer == '}') - pLexBuffer += 1; - else - yyerror("Missing }"); - ch = 0; - } - if (ch) - yylval.tzString[index++] = ch; - } - - if (index) { - yyleng = index; - yylval.tzString[index] = 0; - if (*pLexBuffer == '\n') { - while (yylval.tzString[--index] == ' ') { - yylval.tzString[index] = 0; - yyleng -= 1; - } - } - return (T_STRING); - } else if (*pLexBuffer == '\n') { - pLexBuffer += 1; - AtLineStart = 1; - yyleng = 1; - return ('\n'); - } else if (*pLexBuffer == ',') { - pLexBuffer += 1; - yyleng = 1; - return (','); - } else { - yyerror("INTERNAL ERROR IN YYLEX"); - return (0); - } - } - - break; + return yylex_MACROARGS(); } - yyerror("INTERNAL ERROR IN YYLEX"); - return (0); + fatalerror("Internal error in yylex"); + return 0; } diff --git a/src/asm/yaccprt1.y b/src/asm/yaccprt1.y index ea99ff52..8464d370 100644 --- a/src/asm/yaccprt1.y +++ b/src/asm/yaccprt1.y @@ -21,14 +21,38 @@ extern bool haltnop; char *tzNewMacro; ULONG ulNewMacroSize; -ULONG symvaluetostring( char *dest, char *sym ) +size_t symvaluetostring(char *dest, size_t maxLength, char *sym) { - if( sym_isString(sym) ) - strcpy( dest, sym_GetStringValue(sym) ); - else - sprintf( dest, "$%lX", sym_GetConstantValue(sym) ); + size_t length; - return( strlen(dest) ); + if (sym_isString(sym)) { + char *src = sym_GetStringValue(sym); + size_t i; + + for (i = 0; src[i] != 0; i++) { + if (i >= maxLength) { + fatalerror("Symbol value too long to fit buffer"); + } + dest[i] = src[i]; + } + + length = i; + } else { + ULONG value = sym_GetConstantValue(sym); + int fullLength = snprintf(dest, maxLength + 1, "$%lX", value); + + if (fullLength < 0) { + fatalerror("snprintf encoding error"); + } else { + length = (size_t)fullLength; + + if (length > maxLength) { + fatalerror("Symbol value too long to fit buffer"); + } + } + } + + return length; } ULONG str2int( char *s ) @@ -335,8 +359,8 @@ void if_skip_to_endc( void ) %union { - char tzSym[MAXSYMLEN+1]; - char tzString[256]; + char tzSym[MAXSYMLEN + 1]; + char tzString[MAXSTRLEN + 1]; struct Expression sVal; SLONG nConstValue; }