diff --git a/src/asm/lexer.cpp b/src/asm/lexer.cpp index cc999fed..bcdbcae6 100644 --- a/src/asm/lexer.cpp +++ b/src/asm/lexer.cpp @@ -1657,6 +1657,12 @@ static void appendStringLiteral(std::string &str, bool raw) { static Token yylex_SKIP_TO_ENDC(); // forward declaration for yylex_NORMAL +// Must stay in sync with the `switch` in `yylex_NORMAL`! +static bool isGarbageCharacter(int c) { + return c != EOF && !continuesIdentifier(c) + && (c == '\0' || !strchr("; \t~[](),+-*/|^=!<>:&%`\"\r\n\\", c)); +} + static Token yylex_NORMAL() { for (;;) { int c = nextChar(); @@ -1982,8 +1988,19 @@ static Token yylex_NORMAL() { // Do not report weird characters when capturing, it'll be done later if (!lexerState->capturing) { - // TODO: try to group reportings - error("Unknown character %s\n", printChar(c)); + assume(isGarbageCharacter(c) || c == '#'); + if (isGarbageCharacter(peek())) { + // At least two characters are garbage; group them into one error report + std::string garbage = printChar(c); + while (isGarbageCharacter(peek())) { + c = nextChar(); + garbage += ", "; + garbage += printChar(c); + } + error("Unknown characters %s\n", garbage.c_str()); + } else { + error("Unknown character %s\n", printChar(c)); + } } } lexerState->atLineStart = false; diff --git a/test/asm/garbage_sequence.asm b/test/asm/garbage_sequence.asm new file mode 100644 index 00000000..d9be77da --- /dev/null +++ b/test/asm/garbage_sequence.asm @@ -0,0 +1,3 @@ +assert 1 +# 1 == 2 +assert 2 '?* 2 == 4 +assert 3 **?''?##?? 3 == 27 diff --git a/test/asm/garbage_sequence.err b/test/asm/garbage_sequence.err new file mode 100644 index 00000000..3add6895 --- /dev/null +++ b/test/asm/garbage_sequence.err @@ -0,0 +1,11 @@ +error: garbage_sequence.asm(1): + Unknown character '#' +error: garbage_sequence.asm(2): + Unknown characters ''', '?' +error: garbage_sequence.asm(3): + Unknown characters '?', ''', ''', '?' +error: garbage_sequence.asm(3): + Unknown character '#' +error: garbage_sequence.asm(3): + Unknown characters '#', '?', '?' +error: Assembly aborted (5 errors)! diff --git a/test/asm/invalid-utf-8.err b/test/asm/invalid-utf-8.err index 4eed4726..0dd97036 100644 --- a/test/asm/invalid-utf-8.err +++ b/test/asm/invalid-utf-8.err @@ -1,5 +1,3 @@ error: invalid-utf-8.asm(6) -> invalid-utf-8.asm::m(4): - Unknown character 0xCF -error: invalid-utf-8.asm(6) -> invalid-utf-8.asm::m(4): - Unknown character 0xD3 -error: Assembly aborted (2 errors)! + Unknown characters 0xCF, 0xD3 +error: Assembly aborted (1 error)!