Shorten lexing of simple tokens

This commit is contained in:
Rangi42
2025-08-20 22:37:03 -04:00
parent 3d155d5695
commit fcfc931867

View File

@@ -1592,6 +1592,26 @@ static void reportGarbageCharacters(int c) {
} }
} }
static Token oneOrTwo(int c, int longer, int shorter) {
if (peek() == c) {
shiftChar();
return Token(longer);
}
return Token(shorter);
}
static Token oneOrTwo(int c1, int longer1, int c2, int longer2, int shorter) {
if (int c = peek(); c == c1) {
shiftChar();
return Token(longer1);
} else if (c == c2) {
shiftChar();
return Token(longer2);
} else {
return Token(shorter);
}
}
static Token yylex_NORMAL() { static Token yylex_NORMAL() {
if (int nextToken = lexerState->nextToken; nextToken) { if (int nextToken = lexerState->nextToken; nextToken) {
lexerState->nextToken = 0; lexerState->nextToken = 0;
@@ -1607,6 +1627,7 @@ static Token yylex_NORMAL() {
case ';': case ';':
discardComment(); discardComment();
[[fallthrough]]; [[fallthrough]];
case ' ': case ' ':
case '\t': case '\t':
continue; continue;
@@ -1626,19 +1647,17 @@ static Token yylex_NORMAL() {
case '(': case '(':
return Token(T_(LPAREN)); return Token(T_(LPAREN));
case ')': case ')':
return Token(T_(RPAREN)); return Token(T_(RPAREN));
case ',': case ',':
return Token(T_(COMMA)); return Token(T_(COMMA));
// Handle ambiguous 1- or 2-char tokens // Handle ambiguous 1- or 2-char tokens
case '[': // Either [ or [[ case '[': // Either [ or [[
if (peek() == '[') { return oneOrTwo('[', T_(LBRACKS), T_(LBRACK));
shiftChar();
return Token(T_(LBRACKS));
}
return Token(T_(LBRACK));
case ']': // Either ] or ]] case ']': // Either ] or ]]
if (peek() == ']') { if (peek() == ']') {
@@ -1651,135 +1670,57 @@ static Token yylex_NORMAL() {
return Token(T_(RBRACK)); return Token(T_(RBRACK));
case '+': // Either +=, ADD, or CAT case '+': // Either +=, ADD, or CAT
switch (peek()) { return oneOrTwo('=', T_(POP_ADDEQ), '+', T_(OP_CAT), T_(OP_ADD));
case '=':
shiftChar();
return Token(T_(POP_ADDEQ));
case '+':
shiftChar();
return Token(T_(OP_CAT));
default:
return Token(T_(OP_ADD));
}
case '-': // Either -= or SUB case '-': // Either -= or SUB
if (peek() == '=') { return oneOrTwo('=', T_(POP_SUBEQ), T_(OP_SUB));
shiftChar();
return Token(T_(POP_SUBEQ));
}
return Token(T_(OP_SUB));
case '*': // Either *=, MUL, or EXP case '*': // Either *=, MUL, or EXP
switch (peek()) { return oneOrTwo('=', T_(POP_MULEQ), '*', T_(OP_EXP), T_(OP_MUL));
case '=':
shiftChar();
return Token(T_(POP_MULEQ));
case '*':
shiftChar();
return Token(T_(OP_EXP));
default:
return Token(T_(OP_MUL));
}
case '/': // Either /=, DIV, or a block comment case '/': // Either /=, DIV, or a block comment
switch (peek()) { if (peek() == '*') {
case '=':
shiftChar();
return Token(T_(POP_DIVEQ));
case '*':
shiftChar(); shiftChar();
discardBlockComment(); discardBlockComment();
continue; continue;
default:
return Token(T_(OP_DIV));
} }
return oneOrTwo('=', T_(POP_DIVEQ), T_(OP_DIV));
case '|': // Either |=, binary OR, or logical OR case '|': // Either |=, binary OR, or logical OR
switch (peek()) { return oneOrTwo('=', T_(POP_OREQ), '|', T_(OP_LOGICOR), T_(OP_OR));
case '=':
shiftChar();
return Token(T_(POP_OREQ));
case '|':
shiftChar();
return Token(T_(OP_LOGICOR));
default:
return Token(T_(OP_OR));
}
case '^': // Either ^= or XOR case '^': // Either ^= or XOR
if (peek() == '=') { return oneOrTwo('=', T_(POP_XOREQ), T_(OP_XOR));
shiftChar();
return Token(T_(POP_XOREQ));
}
return Token(T_(OP_XOR));
case '=': // Either assignment or EQ case '=': // Either assignment or EQ
if (peek() == '=') { return oneOrTwo('=', T_(OP_LOGICEQU), T_(POP_EQUAL));
shiftChar();
return Token(T_(OP_LOGICEQU));
}
return Token(T_(POP_EQUAL));
case '!': // Either a NEQ or negation case '!': // Either a NEQ or negation
if (peek() == '=') { return oneOrTwo('=', T_(OP_LOGICNE), T_(OP_LOGICNOT));
shiftChar();
return Token(T_(OP_LOGICNE));
}
return Token(T_(OP_LOGICNOT));
// Handle ambiguous 1-, 2-, or 3-char tokens // Handle ambiguous 1-, 2-, or 3-char tokens
case '<': // Either <<=, LT, LTE, or left shift case '<': // Either <<=, LT, LTE, or left shift
switch (peek()) { if (peek() == '<') {
case '=':
shiftChar(); shiftChar();
return Token(T_(OP_LOGICLE)); return oneOrTwo('=', T_(POP_SHLEQ), T_(OP_SHL));
case '<':
shiftChar();
if (peek() == '=') {
shiftChar();
return Token(T_(POP_SHLEQ));
}
return Token(T_(OP_SHL));
default:
return Token(T_(OP_LOGICLT));
} }
return oneOrTwo('=', T_(OP_LOGICLE), T_(OP_LOGICLT));
case '>': // Either >>=, GT, GTE, or either kind of right shift case '>': // Either >>=, GT, GTE, or either kind of right shift
switch (peek()) { if (peek() == '>') {
case '=':
shiftChar(); shiftChar();
return Token(T_(OP_LOGICGE)); return oneOrTwo('=', T_(POP_SHREQ), '>', T_(OP_USHR), T_(OP_SHR));
case '>':
shiftChar();
switch (peek()) {
case '=':
shiftChar();
return Token(T_(POP_SHREQ));
case '>':
shiftChar();
return Token(T_(OP_USHR));
default:
return Token(T_(OP_SHR));
}
default:
return Token(T_(OP_LOGICGT));
} }
return oneOrTwo('=', T_(OP_LOGICGE), T_(OP_LOGICGT));
case ':': // Either :, ::, or an anonymous label ref case ':': // Either :, ::, or an anonymous label ref
c = peek(); c = peek();
switch (c) { if (c == '+' || c == '-') {
case ':':
shiftChar();
return Token(T_(DOUBLE_COLON));
case '+':
case '-': {
std::string symName = readAnonLabelRef(c); std::string symName = readAnonLabelRef(c);
return Token(T_(ANON), symName); return Token(T_(ANON), symName);
} }
default: return oneOrTwo(':', T_(DOUBLE_COLON), T_(COLON));
return Token(T_(COLON));
}
// Handle numbers // Handle numbers
@@ -1822,27 +1763,17 @@ static Token yylex_NORMAL() {
case '&': // Either &=, binary AND, logical AND, or an octal constant case '&': // Either &=, binary AND, logical AND, or an octal constant
c = peek(); c = peek();
if (c == '=') { if (isOctDigit(c)) {
shiftChar();
return Token(T_(POP_ANDEQ));
} else if (c == '&') {
shiftChar();
return Token(T_(OP_LOGICAND));
} else if (isOctDigit(c)) {
return Token(T_(NUMBER), readOctalNumber()); return Token(T_(NUMBER), readOctalNumber());
} }
return Token(T_(OP_AND)); return oneOrTwo('=', T_(POP_ANDEQ), '&', T_(OP_LOGICAND), T_(OP_AND));
case '%': // Either %=, MOD, or a binary constant case '%': // Either %=, MOD, or a binary constant
c = peek(); c = peek();
if (c == '=') { if (c == '0' || c == '1' || c == options.binDigits[0] || c == options.binDigits[1]) {
shiftChar();
return Token(T_(POP_MODEQ));
} else if (c == '0' || c == '1' || c == options.binDigits[0]
|| c == options.binDigits[1]) {
return Token(T_(NUMBER), readBinaryNumber()); return Token(T_(NUMBER), readBinaryNumber());
} }
return Token(T_(OP_MOD)); return oneOrTwo('=', T_(POP_MODEQ), T_(OP_MOD));
case '$': // Hex constant case '$': // Hex constant
return Token(T_(NUMBER), readHexNumber()); return Token(T_(NUMBER), readHexNumber());