Group sequences of garbage characters (#1672)

This commit is contained in:
Rangi
2025-04-30 23:31:41 -04:00
committed by GitHub
parent e0a8eb8aff
commit e45b9625ca
4 changed files with 35 additions and 6 deletions

View File

@@ -1657,6 +1657,12 @@ static void appendStringLiteral(std::string &str, bool raw) {
static Token yylex_SKIP_TO_ENDC(); // forward declaration for yylex_NORMAL
// Must stay in sync with the `switch` in `yylex_NORMAL`!
static bool isGarbageCharacter(int c) {
return c != EOF && !continuesIdentifier(c)
&& (c == '\0' || !strchr("; \t~[](),+-*/|^=!<>:&%`\"\r\n\\", c));
}
static Token yylex_NORMAL() {
for (;;) {
int c = nextChar();
@@ -1982,10 +1988,21 @@ static Token yylex_NORMAL() {
// Do not report weird characters when capturing, it'll be done later
if (!lexerState->capturing) {
// TODO: try to group reportings
assume(isGarbageCharacter(c) || c == '#');
if (isGarbageCharacter(peek())) {
// At least two characters are garbage; group them into one error report
std::string garbage = printChar(c);
while (isGarbageCharacter(peek())) {
c = nextChar();
garbage += ", ";
garbage += printChar(c);
}
error("Unknown characters %s\n", garbage.c_str());
} else {
error("Unknown character %s\n", printChar(c));
}
}
}
lexerState->atLineStart = false;
}
}

View File

@@ -0,0 +1,3 @@
assert 1 +# 1 == 2
assert 2 '?* 2 == 4
assert 3 **?''?##?? 3 == 27

View File

@@ -0,0 +1,11 @@
error: garbage_sequence.asm(1):
Unknown character '#'
error: garbage_sequence.asm(2):
Unknown characters ''', '?'
error: garbage_sequence.asm(3):
Unknown characters '?', ''', ''', '?'
error: garbage_sequence.asm(3):
Unknown character '#'
error: garbage_sequence.asm(3):
Unknown characters '#', '?', '?'
error: Assembly aborted (5 errors)!

View File

@@ -1,5 +1,3 @@
error: invalid-utf-8.asm(6) -> invalid-utf-8.asm::m(4):
Unknown character 0xCF
error: invalid-utf-8.asm(6) -> invalid-utf-8.asm::m(4):
Unknown character 0xD3
error: Assembly aborted (2 errors)!
Unknown characters 0xCF, 0xD3
error: Assembly aborted (1 error)!