From db6793f444993dd80a17163e0d76e2b2b9882260 Mon Sep 17 00:00:00 2001
From: Rangi <35663410+Rangi42@users.noreply.github.com>
Date: Thu, 14 Aug 2025 10:10:59 -0400
Subject: [PATCH] Don't count single quote `'` as garbage (#1801)

Also copy the "blank space" (space or tab) vs "whitespace" (space,
tab, or newline) convention from `<ctype.h>`
---
 Makefile                      |  3 ++-
 include/util.hpp              |  3 ++-
 src/CMakeLists.txt            |  1 +
 src/asm/lexer.cpp             | 38 ++++++++++++++++++++++-------------
 src/asm/main.cpp              |  2 +-
 src/asm/opt.cpp               |  6 +++---
 src/diagnostics.cpp           |  2 +-
 src/fix/mbc.cpp               | 15 +++++++-------
 src/gfx/main.cpp              | 32 ++++++++++++++---------------
 src/gfx/pal_spec.cpp          | 16 +++++++--------
 src/link/lexer.cpp            |  4 ++--
 src/link/main.cpp             | 32 ++++++++++++++++-------------
 src/util.cpp                  | 10 ++++++---
 test/asm/garbage_sequence.asm |  2 ++
 test/asm/garbage_sequence.err |  4 +++-
 15 files changed, 98 insertions(+), 72 deletions(-)

diff --git a/Makefile b/Makefile
index 77f00688..3d64ed20 100644
--- a/Makefile
+++ b/Makefile
@@ -109,7 +109,8 @@ rgbfix_obj := \
 	${common_obj} \
 	src/fix/main.o \
 	src/fix/mbc.o \
-	src/fix/warning.o
+	src/fix/warning.o \
+	src/util.o
 
 rgbgfx_obj := \
 	${common_obj} \
diff --git a/include/util.hpp b/include/util.hpp
index a26dd393..929a2f77 100644
--- a/include/util.hpp
+++ b/include/util.hpp
@@ -12,8 +12,9 @@
 
 #include "helpers.hpp"
 
-bool isWhitespace(int c);
 bool isNewline(int c);
+bool isBlankSpace(int c);
+bool isWhitespace(int c);
 bool isPrintable(int c);
 
 bool startsIdentifier(int c);
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 57359c22..cfc9b692 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -84,6 +84,7 @@ set(rgbfix_src
     "fix/main.cpp"
     "fix/mbc.cpp"
     "fix/warning.cpp"
+    "util.cpp"
     )
 
 set(rgbgfx_src
diff --git a/src/asm/lexer.cpp b/src/asm/lexer.cpp
index b2593cf0..fa7e6db2 100644
--- a/src/asm/lexer.cpp
+++ b/src/asm/lexer.cpp
@@ -898,7 +898,7 @@ static void discardComment() {
 
 static void discardLineContinuation() {
 	for (;;) {
-		if (int c = peek(); isWhitespace(c)) {
+		if (int c = peek(); isBlankSpace(c)) {
 			shiftChar();
 		} else if (isNewline(c)) {
 			shiftChar();
@@ -1561,8 +1561,18 @@ static Token yylex_SKIP_TO_ENDC(); // Forward declaration for `yylex_NORMAL`
 
 // Must stay in sync with the `switch` in `yylex_NORMAL`!
 static bool isGarbageCharacter(int c) {
-	return c != EOF && !continuesIdentifier(c)
-	       && (c == '\0' || !strchr("; \t~[](),+-*/|^=!<>:&%`\"\r\n\\", c));
+	// Whitespace characters are not garbage, even the non-"printable" ones
+	if (isWhitespace(c)) {
+		return false;
+	}
+	// Printable characters which are nevertheless garbage: braces should have been interpolated,
+	// and question mark is unused
+	if (c == '{' || c == '}' || c == '?') {
+		return true;
+	}
+	// All other printable characters are not garbage (i.e. `yylex_NORMAL` handles them), and
+	// all other nonprintable characters are garbage (including '\0' and EOF)
+	return !isPrintable(c);
 }
 
 static void reportGarbageCharacters(int c) {
@@ -1594,7 +1604,7 @@ static Token yylex_NORMAL() {
 		int c = bumpChar();
 
 		switch (c) {
-			// Ignore whitespace and comments
+			// Ignore blank space and comments
 
 		case ';':
 			discardComment();
@@ -1949,18 +1959,18 @@ static Token yylex_RAW() {
 	size_t parenDepth = 0;
 	int c;
 
-	// Trim left whitespace (stops at a block comment)
+	// Trim left spaces (stops at a block comment)
 	for (;;) {
 		c = peek();
-		if (isWhitespace(c)) {
+		if (isBlankSpace(c)) {
 			shiftChar();
 		} else if (c == '\\') {
 			c = nextChar();
 			// If not a line continuation, handle as a normal char
-			if (!isWhitespace(c) && !isNewline(c)) {
+			if (!isWhitespace(c)) {
 				goto backslash;
 			}
-			// Line continuations count as "whitespace"
+			// Line continuations count as "space"
 			discardLineContinuation();
 		} else {
 			break;
@@ -2083,8 +2093,8 @@ append:
 	}
 
 finish: // Can't `break` out of a nested `for`-`switch`
-	// Trim right whitespace
-	auto rightPos = std::find_if_not(str.rbegin(), str.rend(), isWhitespace);
+	// Trim right blank space
+	auto rightPos = std::find_if_not(str.rbegin(), str.rend(), isBlankSpace);
 	str.resize(rightPos.base() - str.begin());
 
 	// Returning COMMAs to the parser would mean that two consecutive commas
@@ -2120,7 +2130,7 @@ finish: // Can't `break` out of a nested `for`-`switch`
 static int skipPastEOL() {
 	if (lexerState->atLineStart) {
 		lexerState->atLineStart = false;
-		return skipChars(isWhitespace);
+		return skipChars(isBlankSpace);
 	}
 
 	for (;;) {
@@ -2129,7 +2139,7 @@ static int skipPastEOL() {
 		} else if (isNewline(c)) {
 			handleCRLF(c);
 			nextLine();
-			return skipChars(isWhitespace);
+			return skipChars(isBlankSpace);
 		} else if (c == '\\') {
 			// Unconditionally skip the next char, including line continuations
 			c = bumpChar();
@@ -2323,7 +2333,7 @@ Capture lexer_CaptureRept() {
 		nextLine();
 
 		// We're at line start, so attempt to match a `REPT`, `FOR`, or `ENDR` token
-		if (int c = skipChars(isWhitespace); startsIdentifier(c)) {
+		if (int c = skipChars(isBlankSpace); startsIdentifier(c)) {
 			shiftChar();
 			switch (readIdentifier(c, false).type) {
 			case T_(POP_REPT):
@@ -2365,7 +2375,7 @@ Capture lexer_CaptureMacro() {
 		nextLine();
 
 		// We're at line start, so attempt to match an `ENDM` token
-		if (int c = skipChars(isWhitespace); startsIdentifier(c)) {
+		if (int c = skipChars(isBlankSpace); startsIdentifier(c)) {
 			shiftChar();
 			if (readIdentifier(c, false).type == T_(POP_ENDM)) {
 				endCapture(capture);
diff --git a/src/asm/main.cpp b/src/asm/main.cpp
index 213ed5a1..a358e68c 100644
--- a/src/asm/main.cpp
+++ b/src/asm/main.cpp
@@ -250,7 +250,7 @@ static std::vector<StateFeature> parseStateFeatures(char *str) {
 		if (next) {
 			*next++ = '\0';
 		}
-		// Trim whitespace from the beginning of `feature`...
+		// Trim blank spaces from the beginning of `feature`...
 		feature += strspn(feature, " \t");
 		// ...and from the end
 		if (char *end = strpbrk(feature, " \t"); end) {
diff --git a/src/asm/opt.cpp b/src/asm/opt.cpp
index 40015407..5201be3d 100644
--- a/src/asm/opt.cpp
+++ b/src/asm/opt.cpp
@@ -1,6 +1,5 @@
 // SPDX-License-Identifier: MIT
 
-#include <ctype.h>
 #include <errno.h>
 #include <stack>
 #include <stdio.h>
@@ -8,6 +7,7 @@
 #include <string.h>
 
 #include "helpers.hpp" // assume
+#include "util.hpp"    // isBlankSpace
 
 #include "asm/fixpoint.hpp"
 #include "asm/fstack.hpp"
@@ -116,8 +116,8 @@ void opt_Parse(char const *s) {
 
 	case 'r': {
 		++s; // Skip 'r'
-		while (isblank(*s)) {
-			++s; // Skip leading whitespace
+		while (isBlankSpace(*s)) {
+			++s; // Skip leading blank spaces
 		}
 
 		if (s[0] == '\0') {
diff --git a/src/diagnostics.cpp b/src/diagnostics.cpp
index 5b15e8f3..bbdfcefe 100644
--- a/src/diagnostics.cpp
+++ b/src/diagnostics.cpp
@@ -55,7 +55,7 @@ std::pair<WarningState, std::optional<uint32_t>> getInitialWarningState(std::str
 	}
 
 	// Is the rest of the string a decimal number?
-	// We want to avoid `strtoul`'s whitespace and sign, so we parse manually
+	// We want to avoid `strtoul`'s whitespace and sign handling, so we parse manually
 	char const *ptr = flag.c_str() + equals + 1;
 	uint32_t param = 0;
 	bool overflowed = false;
diff --git a/src/fix/mbc.cpp b/src/fix/mbc.cpp
index 10a4ccf8..f1f6ad91 100644
--- a/src/fix/mbc.cpp
+++ b/src/fix/mbc.cpp
@@ -8,6 +8,7 @@
 
 #include "helpers.hpp"  // unreachable_
 #include "platform.hpp" // strcasecmp
+#include "util.hpp"     // isBlankSpace
 
 #include "fix/warning.hpp"
 
@@ -93,14 +94,14 @@ bool mbc_HasRAM(MbcType type) {
 	return search != mbcData.end() && search->second.second;
 }
 
-static void skipWhitespace(char const *&ptr) {
-	while (*ptr == ' ' || *ptr == '\t') {
+static void skipBlankSpace(char const *&ptr) {
+	while (isBlankSpace(*ptr)) {
 		++ptr;
 	}
 }
 
 static void skipMBCSpace(char const *&ptr) {
-	while (*ptr == ' ' || *ptr == '\t' || *ptr == '_') {
+	while (isBlankSpace(*ptr) || *ptr == '_') {
 		++ptr;
 	}
 }
@@ -166,7 +167,7 @@ MbcType mbc_ParseName(char const *name, uint8_t &tpp1Major, uint8_t &tpp1Minor)
 	uint16_t mbc;
 	char const *ptr = name;
 
-	skipWhitespace(ptr); // Trim off leading whitespace
+	skipBlankSpace(ptr); // Trim off leading blank space
 
 #define tryReadSlice(expected) \
 	do { \
@@ -318,7 +319,7 @@ MbcType mbc_ParseName(char const *name, uint8_t &tpp1Major, uint8_t &tpp1Minor)
 	// clang-format on
 
 	for (;;) {
-		skipWhitespace(ptr); // Trim off trailing whitespace
+		skipBlankSpace(ptr); // Trim off trailing blank space
 
 		// If done, start processing "features"
 		if (!*ptr) {
@@ -501,9 +502,9 @@ MbcType mbc_ParseName(char const *name, uint8_t &tpp1Major, uint8_t &tpp1Minor)
 		break;
 	}
 
-	skipWhitespace(ptr); // Trim off trailing whitespace
+	skipBlankSpace(ptr); // Trim off trailing blank space
 
-	// If there is still something past the whitespace, error out
+	// If there is still something left, error out
 	if (*ptr) {
 		fatalUnknownMBC(fullName);
 	}
diff --git a/src/gfx/main.cpp b/src/gfx/main.cpp
index 4795a4b2..42c9406c 100644
--- a/src/gfx/main.cpp
+++ b/src/gfx/main.cpp
@@ -186,7 +186,7 @@ static uint16_t parseNumber(char *&string, char const *errPrefix, uint16_t errVa
 	return number;
 }
 
-static void skipWhitespace(char *&arg) {
+static void skipBlankSpace(char *&arg) {
 	arg += strspn(arg, " \t");
 }
 
@@ -214,8 +214,8 @@ static std::vector<size_t> readAtFile(std::string const &path, std::vector<char>
 	for (std::vector<size_t> argvOfs;;) {
 		int c = file->sbumpc();
 
-		// First, discard any leading whitespace
-		while (isWhitespace(c)) {
+		// First, discard any leading blank space
+		while (isBlankSpace(c)) {
 			c = file->sbumpc();
 		}
 
@@ -239,13 +239,13 @@ static std::vector<size_t> readAtFile(std::string const &path, std::vector<char>
 
 			// Read one argument (until the next whitespace char).
 			// We know there is one because we already have its first character in `c`.
-			for (; c != EOF && !isNewline(c) && !isWhitespace(c); c = file->sbumpc()) {
+			for (; c != EOF && !isWhitespace(c); c = file->sbumpc()) {
 				argPool.push_back(c);
 			}
 			argPool.push_back('\0');
 
-			// Discard whitespace until the next argument (candidate)
-			while (isWhitespace(c)) {
+			// Discard blank space until the next argument (candidate)
+			while (isBlankSpace(c)) {
 				c = file->sbumpc();
 			}
 		} while (c != EOF && !isNewline(c)); // End if we reached EOL
@@ -285,7 +285,7 @@ static char *parseArgv(int argc, char *argv[]) {
 				options.baseTileIDs[1] = 0;
 				break;
 			}
-			skipWhitespace(arg);
+			skipBlankSpace(arg);
 			if (*arg != ',') {
 				error(
 				    "Base tile IDs must be one or two comma-separated numbers, not \"%s\"",
@@ -294,7 +294,7 @@ static char *parseArgv(int argc, char *argv[]) {
 				break;
 			}
 			++arg; // Skip comma
-			skipWhitespace(arg);
+			skipBlankSpace(arg);
 			number = parseNumber(arg, "Bank 1 base tile ID", 0);
 			if (number >= 256) {
 				error("Bank 1 base tile ID must be below 256");
@@ -356,23 +356,23 @@ static char *parseArgv(int argc, char *argv[]) {
 				error("Input slice left coordinate is out of range!");
 				break;
 			}
-			skipWhitespace(arg);
+			skipBlankSpace(arg);
 			if (*arg != ',') {
 				error("Missing comma after left coordinate in \"%s\"", musl_optarg);
 				break;
 			}
 			++arg;
-			skipWhitespace(arg);
+			skipBlankSpace(arg);
 			options.inputSlice.top = parseNumber(arg, "Input slice upper coordinate");
-			skipWhitespace(arg);
+			skipBlankSpace(arg);
 			if (*arg != ':') {
 				error("Missing colon after upper coordinate in \"%s\"", musl_optarg);
 				break;
 			}
 			++arg;
-			skipWhitespace(arg);
+			skipBlankSpace(arg);
 			options.inputSlice.width = parseNumber(arg, "Input slice width");
-			skipWhitespace(arg);
+			skipBlankSpace(arg);
 			if (options.inputSlice.width == 0) {
 				error("Input slice width may not be 0!");
 			}
@@ -381,7 +381,7 @@ static char *parseArgv(int argc, char *argv[]) {
 				break;
 			}
 			++arg;
-			skipWhitespace(arg);
+			skipBlankSpace(arg);
 			options.inputSlice.height = parseNumber(arg, "Input slice height");
 			if (options.inputSlice.height == 0) {
 				error("Input slice height may not be 0!");
@@ -416,7 +416,7 @@ static char *parseArgv(int argc, char *argv[]) {
 				options.maxNbTiles[1] = 0;
 				break;
 			}
-			skipWhitespace(arg);
+			skipBlankSpace(arg);
 			if (*arg != ',') {
 				error(
 				    "Bank capacity must be one or two comma-separated numbers, not \"%s\"",
@@ -425,7 +425,7 @@ static char *parseArgv(int argc, char *argv[]) {
 				break;
 			}
 			++arg; // Skip comma
-			skipWhitespace(arg);
+			skipBlankSpace(arg);
 			options.maxNbTiles[1] = parseNumber(arg, "Number of tiles in bank 1", 256);
 			if (options.maxNbTiles[1] > 256) {
 				error("Bank 1 cannot contain more than 256 tiles");
diff --git a/src/gfx/pal_spec.cpp b/src/gfx/pal_spec.cpp
index a387293a..98b6963c 100644
--- a/src/gfx/pal_spec.cpp
+++ b/src/gfx/pal_spec.cpp
@@ -31,7 +31,7 @@ using namespace std::string_view_literals;
 static char const *hexDigits = "0123456789ABCDEFabcdef";
 
 template<typename Str> // Should be std::string or std::string_view
-static void skipWhitespace(Str const &str, size_t &pos) {
+static void skipBlankSpace(Str const &str, size_t &pos) {
 	pos = std::min(str.find_first_not_of(" \t"sv, pos), str.length());
 }
 
@@ -120,8 +120,8 @@ void parseInlinePalSpec(char const * const rawArg) {
 			n = pos;
 		}
 
-		// Skip whitespace, if any
-		skipWhitespace(arg, n);
+		// Skip trailing space, if any
+		skipBlankSpace(arg, n);
 
 		// Skip comma/semicolon, or end
 		if (n == arg.length()) {
@@ -134,7 +134,7 @@ void parseInlinePalSpec(char const * const rawArg) {
 			++nbColors;
 
 			// A trailing comma may be followed by a semicolon
-			skipWhitespace(arg, n);
+			skipBlankSpace(arg, n);
 			if (n == arg.length()) {
 				break;
 			} else if (arg[n] != ';' && arg[n] != ':') {
@@ -149,7 +149,7 @@ void parseInlinePalSpec(char const * const rawArg) {
 		case ':':
 		case ';':
 			++n;
-			skipWhitespace(arg, n);
+			skipBlankSpace(arg, n);
 
 			nbColors = 0; // Start a new palette
 			// Avoid creating a spurious empty palette
@@ -253,7 +253,7 @@ static std::optional<Rgba> parseColor(std::string const &str, size_t &n, uint16_
 		error("Failed to parse color #%d (\"%s\"): invalid red component", i + 1, str.c_str());
 		return std::nullopt;
 	}
-	skipWhitespace(str, n);
+	skipBlankSpace(str, n);
 	if (n == str.length()) {
 		error("Failed to parse color #%d (\"%s\"): missing green component", i + 1, str.c_str());
 		return std::nullopt;
@@ -263,7 +263,7 @@ static std::optional<Rgba> parseColor(std::string const &str, size_t &n, uint16_
 		error("Failed to parse color #%d (\"%s\"): invalid green component", i + 1, str.c_str());
 		return std::nullopt;
 	}
-	skipWhitespace(str, n);
+	skipBlankSpace(str, n);
 	if (n == str.length()) {
 		error("Failed to parse color #%d (\"%s\"): missing blue component", i + 1, str.c_str());
 		return std::nullopt;
@@ -356,7 +356,7 @@ static void parseGPLFile(char const *filename, std::filebuf &file) {
 		}
 
 		size_t n = 0;
-		skipWhitespace(line, n);
+		skipBlankSpace(line, n);
 		// Skip empty lines, or lines that contain just a comment.
 		if (line.length() == n || line[n] == '#') {
 			continue;
diff --git a/src/link/lexer.cpp b/src/link/lexer.cpp
index 68e6c810..22bd2a58 100644
--- a/src/link/lexer.cpp
+++ b/src/link/lexer.cpp
@@ -260,8 +260,8 @@ yy::parser::symbol_type yylex() {
 	LexerStackEntry &context = lexerStack.back();
 	int c = context.file.sbumpc();
 
-	// First, skip leading whitespace.
-	while (isWhitespace(c)) {
+	// First, skip leading blank space.
+	while (isBlankSpace(c)) {
 		c = context.file.sbumpc();
 	}
 	// Then, skip a comment if applicable.
diff --git a/src/link/main.cpp b/src/link/main.cpp
index d02496e5..89a79b2f 100644
--- a/src/link/main.cpp
+++ b/src/link/main.cpp
@@ -182,6 +182,10 @@ static void verboseOutputConfig(int argc, char *argv[]) {
 }
 // LCOV_EXCL_STOP
 
+static size_t skipBlankSpace(char const *str) {
+	return strspn(str, " \t");
+}
+
 static void parseScrambleSpec(char *spec) {
 	// clang-format off: vertically align nested initializers
 	static UpperMap<std::pair<uint16_t *, uint16_t>> scrambleSpecs{
@@ -191,19 +195,19 @@ static void parseScrambleSpec(char *spec) {
 	};
 	// clang-format on
 
-	// Skip leading whitespace before the regions.
-	spec += strspn(spec, " \t");
+	// Skip leading blank space before the regions.
+	spec += skipBlankSpace(spec);
 
 	// The argument to `-S` should be a comma-separated list of regions, allowing a trailing comma.
 	// Each region name is optionally followed by an '=' and a region size.
 	while (*spec) {
 		char *regionName = spec;
 
-		// The region name continues (skipping any whitespace) until a ',' (next region),
+		// The region name continues (skipping any blank space) until a ',' (next region),
 		// '=' (region size), or the end of the string.
 		size_t regionNameLen = strcspn(regionName, "=, \t");
-		// Skip trailing whitespace after the region name.
-		size_t regionNameSkipLen = regionNameLen + strspn(regionName + regionNameLen, " \t");
+		// Skip trailing blank space after the region name.
+		size_t regionNameSkipLen = regionNameLen + skipBlankSpace(regionName + regionNameLen);
 		spec = regionName + regionNameSkipLen;
 
 		if (*spec != '=' && *spec != ',' && *spec != '\0') {
@@ -215,14 +219,14 @@ static void parseScrambleSpec(char *spec) {
 		// The '=' region size limit is optional.
 		if (*spec == '=') {
 			regionSize = spec + 1; // Skip the '='
-			// Skip leading whitespace before the region size.
-			regionSize += strspn(regionSize, " \t");
+			// Skip leading blank space before the region size.
+			regionSize += skipBlankSpace(regionSize);
 
-			// The region size continues (skipping any whitespace) until a ',' (next region)
+			// The region size continues (skipping any blank space) until a ',' (next region)
 			// or the end of the string.
 			regionSizeLen = strcspn(regionSize, ", \t");
-			// Skip trailing whitespace after the region size.
-			size_t regionSizeSkipLen = regionSizeLen + strspn(regionSize + regionSizeLen, " \t");
+			// Skip trailing blank space after the region size.
+			size_t regionSizeSkipLen = regionSizeLen + skipBlankSpace(regionSize + regionSizeLen);
 			spec = regionSize + regionSizeSkipLen;
 
 			if (*spec != ',' && *spec != '\0') {
@@ -234,9 +238,9 @@ static void parseScrambleSpec(char *spec) {
 		if (*spec == ',') {
 			++spec;
 		}
-		// Skip trailing whitespace after the region.
+		// Skip trailing blank space after the region.
 		// `spec` will be the next region name, or the end of the string.
-		spec += strspn(spec, " \t");
+		spec += skipBlankSpace(spec);
 
 		// Terminate the `regionName` and `regionSize` strings.
 		regionName[regionNameLen] = '\0';
@@ -245,8 +249,8 @@ static void parseScrambleSpec(char *spec) {
 		}
 
 		// Check for an empty region name or limit.
-		// Note that by skipping leading whitespace before the loop, and skipping a trailing comma
-		// and whitespace before the next iteration, we guarantee that the region name will not be
+		// Note that by skipping leading blank space before the loop, and skipping a trailing comma
+		// and blank space before the next iteration, we guarantee that the region name will not be
 		// empty if it is present at all.
 		if (*regionName == '\0') {
 			fatal("Empty region name in spec for option '-S'");
diff --git a/src/util.cpp b/src/util.cpp
index 3618445d..f6049529 100644
--- a/src/util.cpp
+++ b/src/util.cpp
@@ -5,12 +5,16 @@
 #include <stdint.h>
 #include <stdio.h>
 
-bool isWhitespace(int c) {
+bool isNewline(int c) {
+	return c == '\r' || c == '\n';
+}
+
+bool isBlankSpace(int c) {
 	return c == ' ' || c == '\t';
 }
 
-bool isNewline(int c) {
-	return c == '\r' || c == '\n';
+bool isWhitespace(int c) {
+	return isBlankSpace(c) || isNewline(c);
 }
 
 bool isPrintable(int c) {
diff --git a/test/asm/garbage_sequence.asm b/test/asm/garbage_sequence.asm
index c88ecd4a..69cda417 100644
--- a/test/asm/garbage_sequence.asm
+++ b/test/asm/garbage_sequence.asm
@@ -1,3 +1,5 @@
 assert 1 +# 1 == 2
 assert 2 ?�* 2 == 4
 assert 3 **?�?##?? 3 == 27
+charmap "x", 4
+assert 4 <<#�'x' == 64
diff --git a/test/asm/garbage_sequence.err b/test/asm/garbage_sequence.err
index 68deb0ba..b3588648 100644
--- a/test/asm/garbage_sequence.err
+++ b/test/asm/garbage_sequence.err
@@ -8,4 +8,6 @@ error: Invalid character '#'
     at garbage_sequence.asm(3)
 error: Invalid characters '#', '?', '?'
     at garbage_sequence.asm(3)
-Assembly aborted with 5 errors!
+error: Invalid characters '#', 0xFF (is the file UTF-8?)
+    at garbage_sequence.asm(5)
+Assembly aborted with 6 errors!