From 1decf5d0d45e8ec6a8a0fc0d4ba640dcc1f2fb13 Mon Sep 17 00:00:00 2001
From: dbrotz <43593771+dbrotz@users.noreply.github.com>
Date: Sun, 16 Jun 2019 15:50:56 -0700
Subject: [PATCH 1/2] Fix out of bounds array access in lexer If the type char
 is signed, then in the function yylex_GetFloatMaskAndFloatLen(), *s can have
 a negative value and be converted to a negative int32_t which is then used as
 an array index. It should be converted to uint8_t instead to ensure that the
 value is in the bounds of the tFloatingFirstChar, tFloatingSecondChar, and
 tFloatingChars arrays.

---
 src/asm/lexer.c                | 6 +++---
 test/asm/garbage_char.asm      | 1 +
 test/asm/garbage_char.out      | 3 +++
 test/asm/garbage_char.out.pipe | 3 +++
 4 files changed, 10 insertions(+), 3 deletions(-)
 create mode 100644 test/asm/garbage_char.asm
 create mode 100644 test/asm/garbage_char.out
 create mode 100644 test/asm/garbage_char.out.pipe

diff --git a/src/asm/lexer.c b/src/asm/lexer.c
index a98fef81..e47032d3 100644
--- a/src/asm/lexer.c
+++ b/src/asm/lexer.c
@@ -471,17 +471,17 @@ void yylex_GetFloatMaskAndFloatLen(uint32_t *pnFloatMask, uint32_t *pnFloatLen)
 
 	char *s = pLexBuffer;
 	uint32_t nOldFloatMask = 0;
-	uint32_t nFloatMask = tFloatingFirstChar[(int32_t)*s];
+	uint32_t nFloatMask = tFloatingFirstChar[(uint8_t)*s];
 
 	if (nFloatMask != 0) {
 		s++;
 		nOldFloatMask = nFloatMask;
-		nFloatMask &= tFloatingSecondChar[(int32_t)*s];
+		nFloatMask &= tFloatingSecondChar[(uint8_t)*s];
 
 		while (nFloatMask != 0) {
 			s++;
 			nOldFloatMask = nFloatMask;
-			nFloatMask &= tFloatingChars[(int32_t)*s];
+			nFloatMask &= tFloatingChars[(uint8_t)*s];
 		}
 	}
 
diff --git a/test/asm/garbage_char.asm b/test/asm/garbage_char.asm
new file mode 100644
index 00000000..ca5f132c
--- /dev/null
+++ b/test/asm/garbage_char.asm
@@ -0,0 +1 @@
+xÿ
\ No newline at end of file
diff --git a/test/asm/garbage_char.out b/test/asm/garbage_char.out
new file mode 100644
index 00000000..a05e4960
--- /dev/null
+++ b/test/asm/garbage_char.out
@@ -0,0 +1,3 @@
+ERROR: garbage_char.asm(1):
+    syntax error
+error: Assembly aborted (1 errors)!
diff --git a/test/asm/garbage_char.out.pipe b/test/asm/garbage_char.out.pipe
new file mode 100644
index 00000000..1ba12ddc
--- /dev/null
+++ b/test/asm/garbage_char.out.pipe
@@ -0,0 +1,3 @@
+ERROR: -(1):
+    syntax error
+error: Assembly aborted (1 errors)!

From 484d15dbb279ca2fca490e278fd81404561b08f7 Mon Sep 17 00:00:00 2001
From: dbrotz <43593771+dbrotz@users.noreply.github.com>
Date: Sat, 29 Jun 2019 12:50:41 -0700
Subject: [PATCH 2/2] Handle unprintable characters more gracefully * Skip
 UTF-8 byte order mark at beginning of file * Error on other unexpected
 unprintable characters

---
 src/asm/lexer.c                | 38 +++++++++++++++++++++++++++-------
 test/asm/garbage_char.out      |  3 +--
 test/asm/garbage_char.out.pipe |  3 +--
 3 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/src/asm/lexer.c b/src/asm/lexer.c
index e47032d3..5baa0877 100644
--- a/src/asm/lexer.c
+++ b/src/asm/lexer.c
@@ -38,6 +38,8 @@ struct sLexString {
 
 #define SAFETYMARGIN		1024
 
+#define BOM_SIZE 3
+
 struct sLexFloat tLexFloat[32];
 struct sLexString *tLexHash[LEXHASHSIZE];
 YY_BUFFER_STATE pCurrentBuffer;
@@ -49,6 +51,9 @@ uint32_t tFloatingChars[256];
 uint32_t nFloating;
 enum eLexerState lexerstate = LEX_STATE_NORMAL;
 
+/* UTF-8 byte order mark */
+static const unsigned char bom[BOM_SIZE] = { 0xEF, 0xBB, 0xBF };
+
 void upperstring(char *s)
 {
 	while (*s) {
@@ -120,11 +125,11 @@ void yy_delete_buffer(YY_BUFFER_STATE buf)
  */
 static void yy_buffer_append(YY_BUFFER_STATE buf, uint32_t capacity, char c)
 {
-	assert(buf->pBuffer[buf->nBufferSize] == 0);
+	assert(buf->pBufferStart[buf->nBufferSize] == 0);
 	assert(buf->nBufferSize + 1 < capacity);
 
-	buf->pBuffer[buf->nBufferSize++] = c;
-	buf->pBuffer[buf->nBufferSize] = 0;
+	buf->pBufferStart[buf->nBufferSize++] = c;
+	buf->pBufferStart[buf->nBufferSize] = 0;
 }
 
 YY_BUFFER_STATE yy_scan_bytes(char *mem, uint32_t size)
@@ -221,6 +226,11 @@ YY_BUFFER_STATE yy_create_buffer(FILE *f)
 	 */
 	capacity += 3;
 
+	/* Skip UTF-8 byte order mark. */
+	if (pBuffer->nBufferSize >= BOM_SIZE
+	 && !memcmp(pBuffer->pBuffer, bom, BOM_SIZE))
+		pBuffer->pBuffer += BOM_SIZE;
+
 	/* Convert all line endings to LF and spaces */
 
 	char *mem = pBuffer->pBuffer;
@@ -281,7 +291,7 @@ YY_BUFFER_STATE yy_create_buffer(FILE *f)
 	}
 
 	/* Add newline if file doesn't end with one */
-	if (size == 0 || pBuffer->pBuffer[size - 1] != '\n')
+	if (size == 0 || pBuffer->pBufferStart[size - 1] != '\n')
 		yy_buffer_append(pBuffer, capacity, '\n');
 
 	/* Add newline if \ will eat the last newline */
@@ -289,10 +299,10 @@ YY_BUFFER_STATE yy_create_buffer(FILE *f)
 		size_t pos = pBuffer->nBufferSize - 2;
 
 		/* Skip spaces */
-		while (pos > 0 && pBuffer->pBuffer[pos] == ' ')
+		while (pos > 0 && pBuffer->pBufferStart[pos] == ' ')
 			pos--;
 
-		if (pBuffer->pBuffer[pos] == '\\')
+		if (pBuffer->pBufferStart[pos] == '\\')
 			yy_buffer_append(pBuffer, capacity, '\n');
 	}
 
@@ -783,10 +793,22 @@ scanagain:
 		 * numeric literal, string, or bracketed symbol, so just return
 		 * the ASCII character.
 		 */
-		if (*pLexBuffer == '\n')
+		unsigned char ch = *pLexBuffer++;
+
+		if (ch == '\n')
 			AtLineStart = 1;
 
-		return *pLexBuffer++;
+		/*
+		 * Check for invalid unprintable characters.
+		 * They may not be readily apparent in a text editor,
+		 * so this is useful for identifying encoding problems.
+		 */
+		if (ch != 0
+		 && ch != '\n'
+		 && !(ch >= 0x20 && ch <= 0x7E))
+			fatalerror("Found garbage character: 0x%02X", ch);
+
+		return ch;
 	}
 
 	if (pLongestFixed == NULL || nFloatLen > pLongestFixed->nNameLength) {
diff --git a/test/asm/garbage_char.out b/test/asm/garbage_char.out
index a05e4960..b2a30cb7 100644
--- a/test/asm/garbage_char.out
+++ b/test/asm/garbage_char.out
@@ -1,3 +1,2 @@
 ERROR: garbage_char.asm(1):
-    syntax error
-error: Assembly aborted (1 errors)!
+    Found garbage character: 0xFF
diff --git a/test/asm/garbage_char.out.pipe b/test/asm/garbage_char.out.pipe
index 1ba12ddc..43643660 100644
--- a/test/asm/garbage_char.out.pipe
+++ b/test/asm/garbage_char.out.pipe
@@ -1,3 +1,2 @@
 ERROR: -(1):
-    syntax error
-error: Assembly aborted (1 errors)!
+    Found garbage character: 0xFF