Do not build up a std::string when skipping leading keywords

This commit is contained in:
Rangi
2026-05-27 19:37:15 -04:00
committed by Rangi
parent 673c62414f
commit 31e1d2ec87
+57 -45
View File
@@ -2058,44 +2058,50 @@ finish: // Can't `break` out of a nested `for`-`switch`
return Token(T_(YYEOF)); return Token(T_(YYEOF));
} }
static Token skipToLeadingKeyword( static Token skipToLeadingKeywordFast(Procedure<> auto shiftFast) {
InvocableR<int> auto peekFn, // This is essentially `skipToLeadingKeyword` with `peek` and `shiftChar` replaced,
Procedure<> auto shiftFn, // as well as anything that calls them like `nextChar` or `handleCRLF`.
Procedure<> auto nextLineFn, char const *ptr = lexerState->content.ptr.get();
Procedure<> auto finalizeFn auto peekFast = [&]() {
) { return lexerState->offset < lexerState->content.size ? ptr[lexerState->offset] : EOF;
};
for (;;) { for (;;) {
int c = peekFn(); int c = peekFast();
if (lexerState->atLineStart) { if (lexerState->atLineStart) {
lexerState->atLineStart = false; lexerState->atLineStart = false;
while (isBlankSpace(c)) { while (isBlankSpace(c)) {
shiftFn(); shiftFast();
c = peekFn(); c = peekFast();
} }
if (c == EOF) { if (c == EOF) {
return Token(T_(YYEOF)); return Token(T_(YYEOF));
} else if (isLetter(c)) { } else if (isLetter(c)) {
std::string builder(1, c); size_t start = lexerState->offset;
shiftFn(); shiftFast();
for (c = peekFn(); continuesIdentifier(c); c = peekFn()) { for (c = peekFast(); continuesIdentifier(c); c = peekFast()) {
builder += c; shiftFast();
shiftFn();
} }
if (auto search = keywords.find(builder); search != keywords.end()) { std::string_view leading{ptr + start, ptr + lexerState->offset};
finalizeFn(); if (auto search = keywords.find(leading); search != keywords.end()) {
// When this branch returns a token, there has been one more call to `peekFast`
// than to `shiftFast`. Unlike `peek` and `shiftChar`, the optimized functions
// do not update `lexerState->expansionScanDistance`, so it must be incremented
// if it was previously zero.
if (lexerState->expansionScanDistance == 0) {
++lexerState->expansionScanDistance;
}
return Token(search->second); return Token(search->second);
} }
} }
} }
shiftFn(); shiftFast();
if (c == EOF) { if (c == EOF) {
return Token(T_(YYEOF)); return Token(T_(YYEOF));
} else if (isNewline(c)) { } else if (isNewline(c)) {
// Like `handleCRLF` but calling generic `shiftFn` if (c == '\r' && peekFast() == '\n') {
if (c == '\r' && peekFn() == '\n') { shiftFast();
shiftFn();
} }
nextLineFn(); ++lexerState->lineNo;
lexerState->atLineStart = true; lexerState->atLineStart = true;
} }
} }
@@ -2120,36 +2126,42 @@ static Token skipToLeadingKeyword() {
if (lexerState->expansionStack.empty()) { if (lexerState->expansionStack.empty()) {
// Optimize the common case (no ongoing expansions) to avoid // Optimize the common case (no ongoing expansions) to avoid
// the bookkeeping of `peek` and `shiftChar`. // the bookkeeping of `peek` and `shiftChar`.
char const *ptr = lexerState->content.ptr.get();
auto quickPeek = [&]() {
return lexerState->offset < lexerState->content.size ? ptr[lexerState->offset] : EOF;
};
auto quickNextLine = []() { ++lexerState->lineNo; };
auto quickFinalize = []() {
// When `skipToLeadingKeyword` returns a token, there has been one more
// call to `quickPeek` than to `quickNextLine`. Unlike `peek` and `shiftChar`,
// the optimized functions do not update `lexerState->expansionScanDistance`,
// so it must be incrementedif it was previously zero.
if (lexerState->expansionScanDistance == 0) {
++lexerState->expansionScanDistance;
}
};
if (lexerState->capturing) { if (lexerState->capturing) {
assume(lexerState->captureBuf == nullptr); assume(lexerState->captureBuf == nullptr);
auto quickCaptureShiftChar = [&]() { return skipToLeadingKeywordFast([&]() {
++lexerState->offset; ++lexerState->offset;
++lexerState->captureSize; ++lexerState->captureSize;
}; });
return skipToLeadingKeyword(
quickPeek, quickCaptureShiftChar, quickNextLine, quickFinalize
);
} else { } else {
auto quickShiftChar = [&]() { ++lexerState->offset; }; return skipToLeadingKeywordFast([&]() { ++lexerState->offset; });
return skipToLeadingKeyword(quickPeek, quickShiftChar, quickNextLine, quickFinalize); }
}
for (;;) {
int c = peek();
if (lexerState->atLineStart) {
lexerState->atLineStart = false;
c = skipChars(isBlankSpace);
if (c == EOF) {
return Token(T_(YYEOF));
} else if (isLetter(c)) {
std::string builder(1, c);
for (c = nextChar(); continuesIdentifier(c); c = nextChar()) {
builder += c;
}
if (auto search = keywords.find(builder); search != keywords.end()) {
return Token(search->second);
}
}
}
shiftChar();
if (c == EOF) {
return Token(T_(YYEOF));
} else if (isNewline(c)) {
handleCRLF(c);
nextLine();
lexerState->atLineStart = true;
} }
} else {
auto finalize = []() {};
return skipToLeadingKeyword(peek, shiftChar, nextLine, finalize);
} }
} }