From 1decf5d0d45e8ec6a8a0fc0d4ba640dcc1f2fb13 Mon Sep 17 00:00:00 2001 From: dbrotz <43593771+dbrotz@users.noreply.github.com> Date: Sun, 16 Jun 2019 15:50:56 -0700 Subject: [PATCH 1/2] Fix out of bounds array access in lexer If the type char is signed, then in the function yylex_GetFloatMaskAndFloatLen(), *s can have a negative value and be converted to a negative int32_t which is then used as an array index. It should be converted to uint8_t instead to ensure that the value is in the bounds of the tFloatingFirstChar, tFloatingSecondChar, and tFloatingChars arrays. --- src/asm/lexer.c | 6 +++--- test/asm/garbage_char.asm | 1 + test/asm/garbage_char.out | 3 +++ test/asm/garbage_char.out.pipe | 3 +++ 4 files changed, 10 insertions(+), 3 deletions(-) create mode 100644 test/asm/garbage_char.asm create mode 100644 test/asm/garbage_char.out create mode 100644 test/asm/garbage_char.out.pipe diff --git a/src/asm/lexer.c b/src/asm/lexer.c index a98fef81..e47032d3 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -471,17 +471,17 @@ void yylex_GetFloatMaskAndFloatLen(uint32_t *pnFloatMask, uint32_t *pnFloatLen) char *s = pLexBuffer; uint32_t nOldFloatMask = 0; - uint32_t nFloatMask = tFloatingFirstChar[(int32_t)*s]; + uint32_t nFloatMask = tFloatingFirstChar[(uint8_t)*s]; if (nFloatMask != 0) { s++; nOldFloatMask = nFloatMask; - nFloatMask &= tFloatingSecondChar[(int32_t)*s]; + nFloatMask &= tFloatingSecondChar[(uint8_t)*s]; while (nFloatMask != 0) { s++; nOldFloatMask = nFloatMask; - nFloatMask &= tFloatingChars[(int32_t)*s]; + nFloatMask &= tFloatingChars[(uint8_t)*s]; } } diff --git a/test/asm/garbage_char.asm b/test/asm/garbage_char.asm new file mode 100644 index 00000000..ca5f132c --- /dev/null +++ b/test/asm/garbage_char.asm @@ -0,0 +1 @@ +xÿ \ No newline at end of file diff --git a/test/asm/garbage_char.out b/test/asm/garbage_char.out new file mode 100644 index 00000000..a05e4960 --- /dev/null +++ b/test/asm/garbage_char.out @@ -0,0 +1,3 @@ +ERROR: garbage_char.asm(1): + syntax error +error: Assembly aborted (1 errors)! diff --git a/test/asm/garbage_char.out.pipe b/test/asm/garbage_char.out.pipe new file mode 100644 index 00000000..1ba12ddc --- /dev/null +++ b/test/asm/garbage_char.out.pipe @@ -0,0 +1,3 @@ +ERROR: -(1): + syntax error +error: Assembly aborted (1 errors)! From 484d15dbb279ca2fca490e278fd81404561b08f7 Mon Sep 17 00:00:00 2001 From: dbrotz <43593771+dbrotz@users.noreply.github.com> Date: Sat, 29 Jun 2019 12:50:41 -0700 Subject: [PATCH 2/2] Handle unprintable characters more gracefully * Skip UTF-8 byte order mark at beginning of file * Error on other unexpected unprintable characters --- src/asm/lexer.c | 38 +++++++++++++++++++++++++++------- test/asm/garbage_char.out | 3 +-- test/asm/garbage_char.out.pipe | 3 +-- 3 files changed, 32 insertions(+), 12 deletions(-) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index e47032d3..5baa0877 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -38,6 +38,8 @@ struct sLexString { #define SAFETYMARGIN 1024 +#define BOM_SIZE 3 + struct sLexFloat tLexFloat[32]; struct sLexString *tLexHash[LEXHASHSIZE]; YY_BUFFER_STATE pCurrentBuffer; @@ -49,6 +51,9 @@ uint32_t tFloatingChars[256]; uint32_t nFloating; enum eLexerState lexerstate = LEX_STATE_NORMAL; +/* UTF-8 byte order mark */ +static const unsigned char bom[BOM_SIZE] = { 0xEF, 0xBB, 0xBF }; + void upperstring(char *s) { while (*s) { @@ -120,11 +125,11 @@ void yy_delete_buffer(YY_BUFFER_STATE buf) */ static void yy_buffer_append(YY_BUFFER_STATE buf, uint32_t capacity, char c) { - assert(buf->pBuffer[buf->nBufferSize] == 0); + assert(buf->pBufferStart[buf->nBufferSize] == 0); assert(buf->nBufferSize + 1 < capacity); - buf->pBuffer[buf->nBufferSize++] = c; - buf->pBuffer[buf->nBufferSize] = 0; + buf->pBufferStart[buf->nBufferSize++] = c; + buf->pBufferStart[buf->nBufferSize] = 0; } YY_BUFFER_STATE yy_scan_bytes(char *mem, uint32_t size) @@ -221,6 +226,11 @@ YY_BUFFER_STATE yy_create_buffer(FILE *f) */ capacity += 3; + /* Skip UTF-8 byte order mark. */ + if (pBuffer->nBufferSize >= BOM_SIZE + && !memcmp(pBuffer->pBuffer, bom, BOM_SIZE)) + pBuffer->pBuffer += BOM_SIZE; + /* Convert all line endings to LF and spaces */ char *mem = pBuffer->pBuffer; @@ -281,7 +291,7 @@ YY_BUFFER_STATE yy_create_buffer(FILE *f) } /* Add newline if file doesn't end with one */ - if (size == 0 || pBuffer->pBuffer[size - 1] != '\n') + if (size == 0 || pBuffer->pBufferStart[size - 1] != '\n') yy_buffer_append(pBuffer, capacity, '\n'); /* Add newline if \ will eat the last newline */ @@ -289,10 +299,10 @@ YY_BUFFER_STATE yy_create_buffer(FILE *f) size_t pos = pBuffer->nBufferSize - 2; /* Skip spaces */ - while (pos > 0 && pBuffer->pBuffer[pos] == ' ') + while (pos > 0 && pBuffer->pBufferStart[pos] == ' ') pos--; - if (pBuffer->pBuffer[pos] == '\\') + if (pBuffer->pBufferStart[pos] == '\\') yy_buffer_append(pBuffer, capacity, '\n'); } @@ -783,10 +793,22 @@ scanagain: * numeric literal, string, or bracketed symbol, so just return * the ASCII character. */ - if (*pLexBuffer == '\n') + unsigned char ch = *pLexBuffer++; + + if (ch == '\n') AtLineStart = 1; - return *pLexBuffer++; + /* + * Check for invalid unprintable characters. + * They may not be readily apparent in a text editor, + * so this is useful for identifying encoding problems. + */ + if (ch != 0 + && ch != '\n' + && !(ch >= 0x20 && ch <= 0x7E)) + fatalerror("Found garbage character: 0x%02X", ch); + + return ch; } if (pLongestFixed == NULL || nFloatLen > pLongestFixed->nNameLength) { diff --git a/test/asm/garbage_char.out b/test/asm/garbage_char.out index a05e4960..b2a30cb7 100644 --- a/test/asm/garbage_char.out +++ b/test/asm/garbage_char.out @@ -1,3 +1,2 @@ ERROR: garbage_char.asm(1): - syntax error -error: Assembly aborted (1 errors)! + Found garbage character: 0xFF diff --git a/test/asm/garbage_char.out.pipe b/test/asm/garbage_char.out.pipe index 1ba12ddc..43643660 100644 --- a/test/asm/garbage_char.out.pipe +++ b/test/asm/garbage_char.out.pipe @@ -1,3 +1,2 @@ ERROR: -(1): - syntax error -error: Assembly aborted (1 errors)! + Found garbage character: 0xFF