From fcfc931867e4947f3984b04ed91e55d929b56f97 Mon Sep 17 00:00:00 2001 From: Rangi42 Date: Wed, 20 Aug 2025 22:37:03 -0400 Subject: [PATCH] Shorten lexing of simple tokens --- src/asm/lexer.cpp | 159 +++++++++++++--------------------------------- 1 file changed, 45 insertions(+), 114 deletions(-) diff --git a/src/asm/lexer.cpp b/src/asm/lexer.cpp index 322aeeef..ca5a1dd3 100644 --- a/src/asm/lexer.cpp +++ b/src/asm/lexer.cpp @@ -1592,6 +1592,26 @@ static void reportGarbageCharacters(int c) { } } +static Token oneOrTwo(int c, int longer, int shorter) { + if (peek() == c) { + shiftChar(); + return Token(longer); + } + return Token(shorter); +} + +static Token oneOrTwo(int c1, int longer1, int c2, int longer2, int shorter) { + if (int c = peek(); c == c1) { + shiftChar(); + return Token(longer1); + } else if (c == c2) { + shiftChar(); + return Token(longer2); + } else { + return Token(shorter); + } +} + static Token yylex_NORMAL() { if (int nextToken = lexerState->nextToken; nextToken) { lexerState->nextToken = 0; @@ -1607,6 +1627,7 @@ static Token yylex_NORMAL() { case ';': discardComment(); [[fallthrough]]; + case ' ': case '\t': continue; @@ -1626,19 +1647,17 @@ static Token yylex_NORMAL() { case '(': return Token(T_(LPAREN)); + case ')': return Token(T_(RPAREN)); + case ',': return Token(T_(COMMA)); // Handle ambiguous 1- or 2-char tokens case '[': // Either [ or [[ - if (peek() == '[') { - shiftChar(); - return Token(T_(LBRACKS)); - } - return Token(T_(LBRACK)); + return oneOrTwo('[', T_(LBRACKS), T_(LBRACK)); case ']': // Either ] or ]] if (peek() == ']') { @@ -1651,135 +1670,57 @@ static Token yylex_NORMAL() { return Token(T_(RBRACK)); case '+': // Either +=, ADD, or CAT - switch (peek()) { - case '=': - shiftChar(); - return Token(T_(POP_ADDEQ)); - case '+': - shiftChar(); - return Token(T_(OP_CAT)); - default: - return Token(T_(OP_ADD)); - } + return oneOrTwo('=', T_(POP_ADDEQ), '+', T_(OP_CAT), T_(OP_ADD)); case '-': // Either -= or SUB - if (peek() == '=') { - shiftChar(); - return Token(T_(POP_SUBEQ)); - } - return Token(T_(OP_SUB)); + return oneOrTwo('=', T_(POP_SUBEQ), T_(OP_SUB)); case '*': // Either *=, MUL, or EXP - switch (peek()) { - case '=': - shiftChar(); - return Token(T_(POP_MULEQ)); - case '*': - shiftChar(); - return Token(T_(OP_EXP)); - default: - return Token(T_(OP_MUL)); - } + return oneOrTwo('=', T_(POP_MULEQ), '*', T_(OP_EXP), T_(OP_MUL)); case '/': // Either /=, DIV, or a block comment - switch (peek()) { - case '=': - shiftChar(); - return Token(T_(POP_DIVEQ)); - case '*': + if (peek() == '*') { shiftChar(); discardBlockComment(); continue; - default: - return Token(T_(OP_DIV)); } + return oneOrTwo('=', T_(POP_DIVEQ), T_(OP_DIV)); case '|': // Either |=, binary OR, or logical OR - switch (peek()) { - case '=': - shiftChar(); - return Token(T_(POP_OREQ)); - case '|': - shiftChar(); - return Token(T_(OP_LOGICOR)); - default: - return Token(T_(OP_OR)); - } + return oneOrTwo('=', T_(POP_OREQ), '|', T_(OP_LOGICOR), T_(OP_OR)); case '^': // Either ^= or XOR - if (peek() == '=') { - shiftChar(); - return Token(T_(POP_XOREQ)); - } - return Token(T_(OP_XOR)); + return oneOrTwo('=', T_(POP_XOREQ), T_(OP_XOR)); case '=': // Either assignment or EQ - if (peek() == '=') { - shiftChar(); - return Token(T_(OP_LOGICEQU)); - } - return Token(T_(POP_EQUAL)); + return oneOrTwo('=', T_(OP_LOGICEQU), T_(POP_EQUAL)); case '!': // Either a NEQ or negation - if (peek() == '=') { - shiftChar(); - return Token(T_(OP_LOGICNE)); - } - return Token(T_(OP_LOGICNOT)); + return oneOrTwo('=', T_(OP_LOGICNE), T_(OP_LOGICNOT)); // Handle ambiguous 1-, 2-, or 3-char tokens case '<': // Either <<=, LT, LTE, or left shift - switch (peek()) { - case '=': + if (peek() == '<') { shiftChar(); - return Token(T_(OP_LOGICLE)); - case '<': - shiftChar(); - if (peek() == '=') { - shiftChar(); - return Token(T_(POP_SHLEQ)); - } - return Token(T_(OP_SHL)); - default: - return Token(T_(OP_LOGICLT)); + return oneOrTwo('=', T_(POP_SHLEQ), T_(OP_SHL)); } + return oneOrTwo('=', T_(OP_LOGICLE), T_(OP_LOGICLT)); case '>': // Either >>=, GT, GTE, or either kind of right shift - switch (peek()) { - case '=': + if (peek() == '>') { shiftChar(); - return Token(T_(OP_LOGICGE)); - case '>': - shiftChar(); - switch (peek()) { - case '=': - shiftChar(); - return Token(T_(POP_SHREQ)); - case '>': - shiftChar(); - return Token(T_(OP_USHR)); - default: - return Token(T_(OP_SHR)); - } - default: - return Token(T_(OP_LOGICGT)); + return oneOrTwo('=', T_(POP_SHREQ), '>', T_(OP_USHR), T_(OP_SHR)); } + return oneOrTwo('=', T_(OP_LOGICGE), T_(OP_LOGICGT)); case ':': // Either :, ::, or an anonymous label ref c = peek(); - switch (c) { - case ':': - shiftChar(); - return Token(T_(DOUBLE_COLON)); - case '+': - case '-': { + if (c == '+' || c == '-') { std::string symName = readAnonLabelRef(c); return Token(T_(ANON), symName); } - default: - return Token(T_(COLON)); - } + return oneOrTwo(':', T_(DOUBLE_COLON), T_(COLON)); // Handle numbers @@ -1822,27 +1763,17 @@ static Token yylex_NORMAL() { case '&': // Either &=, binary AND, logical AND, or an octal constant c = peek(); - if (c == '=') { - shiftChar(); - return Token(T_(POP_ANDEQ)); - } else if (c == '&') { - shiftChar(); - return Token(T_(OP_LOGICAND)); - } else if (isOctDigit(c)) { + if (isOctDigit(c)) { return Token(T_(NUMBER), readOctalNumber()); } - return Token(T_(OP_AND)); + return oneOrTwo('=', T_(POP_ANDEQ), '&', T_(OP_LOGICAND), T_(OP_AND)); case '%': // Either %=, MOD, or a binary constant c = peek(); - if (c == '=') { - shiftChar(); - return Token(T_(POP_MODEQ)); - } else if (c == '0' || c == '1' || c == options.binDigits[0] - || c == options.binDigits[1]) { + if (c == '0' || c == '1' || c == options.binDigits[0] || c == options.binDigits[1]) { return Token(T_(NUMBER), readBinaryNumber()); } - return Token(T_(OP_MOD)); + return oneOrTwo('=', T_(POP_MODEQ), T_(OP_MOD)); case '$': // Hex constant return Token(T_(NUMBER), readHexNumber());