From 126b1e5726c3c09fa74ee2200f5bdcba7391e2a5 Mon Sep 17 00:00:00 2001 From: Rangi <35663410+Rangi42@users.noreply.github.com> Date: Mon, 19 May 2025 15:31:26 -0400 Subject: [PATCH] Reuse `startsIdentifier` and `continuesIdentifier` functions (#1695) --- include/util.hpp | 3 +++ src/asm/lexer.cpp | 16 ++-------------- src/asm/symbol.cpp | 19 ++++++++++++++----- src/link/output.cpp | 23 ++++------------------- src/util.cpp | 9 +++++++++ test/asm/builtin-overwrite.err | 10 ---------- 6 files changed, 32 insertions(+), 48 deletions(-) diff --git a/include/util.hpp b/include/util.hpp index 6cf9e078..69abdc5b 100644 --- a/include/util.hpp +++ b/include/util.hpp @@ -3,6 +3,9 @@ #ifndef RGBDS_UTIL_HPP #define RGBDS_UTIL_HPP +bool startsIdentifier(int c); +bool continuesIdentifier(int c); + char const *printChar(int c); #endif // RGBDS_UTIL_HPP diff --git a/src/asm/lexer.cpp b/src/asm/lexer.cpp index bcdbcae6..3b40e9b7 100644 --- a/src/asm/lexer.cpp +++ b/src/asm/lexer.cpp @@ -136,9 +136,8 @@ struct CaseInsensitive { } }; -// This map lists all RGBASM keywords which `yylex_NORMAL` lexes as identifiers -// (see `startsIdentifier` and `continuesIdentifier` below). All non-identifier -// tokens are lexed separately. +// This map lists all RGBASM keywords which `yylex_NORMAL` lexes as identifiers. +// All non-identifier tokens are lexed separately. static std::unordered_map keywordDict = { {"ADC", T_(SM83_ADC) }, {"ADD", T_(SM83_ADD) }, @@ -612,8 +611,6 @@ static bool isMacroChar(char c) { static int peek(); static void shiftChar(); static uint32_t readNumber(int radix, uint32_t baseValue); -static bool startsIdentifier(int c); -static bool continuesIdentifier(int c); static uint32_t readBracketedMacroArgNum() { bool disableMacroArgs = lexerState->disableMacroArgs; @@ -1215,15 +1212,6 @@ static uint32_t readGfxConstant() { // Functions to read identifiers and keywords -static bool startsIdentifier(int c) { - // Anonymous labels internally start with '!' - return (c <= 'Z' && c >= 'A') || (c <= 'z' && c >= 'a') || c == '.' || c == '_'; -} - -static bool continuesIdentifier(int c) { - return startsIdentifier(c) || (c <= '9' && c >= '0') || c == '#' || c == '$' || c == '@'; -} - static Token readIdentifier(char firstChar, bool raw) { std::string identifier(1, firstChar); int tokenType = firstChar == '.' ? T_(LOCAL) : T_(SYMBOL); diff --git a/src/asm/symbol.cpp b/src/asm/symbol.cpp index 4aba855e..9510a8d8 100644 --- a/src/asm/symbol.cpp +++ b/src/asm/symbol.cpp @@ -2,6 +2,7 @@ #include "asm/symbol.hpp" +#include #include #include #include @@ -9,6 +10,7 @@ #include "error.hpp" #include "helpers.hpp" // assume +#include "util.hpp" #include "version.hpp" #include "asm/fstack.hpp" @@ -130,6 +132,11 @@ static void updateSymbolFilename(Symbol &sym) { } } +static bool isValidIdentifier(std::string const &s) { + return !s.empty() && startsIdentifier(s[0]) + && std::all_of(s.begin() + 1, s.end(), [](char c) { return continuesIdentifier(c); }); +} + static void alreadyDefinedError(Symbol const &sym, char const *asType) { if (sym.isBuiltin && !sym_FindScopedValidSymbol(sym.name)) { // `DEF()` would return false, so we should not claim the symbol is already defined @@ -142,11 +149,13 @@ static void alreadyDefinedError(Symbol const &sym, char const *asType) { fputs(" at ", stderr); dumpFilename(sym); if (sym.type == SYM_EQUS) { - fprintf( - stderr, - " (should it be {interpolated} to define its contents \"%s\"?)\n", - sym.getEqus()->c_str() - ); + if (std::string const &contents = *sym.getEqus(); isValidIdentifier(contents)) { + fprintf( + stderr, + " (should it be {interpolated} to define its contents \"%s\"?)\n", + contents.c_str() + ); + } } } } diff --git a/src/link/output.cpp b/src/link/output.cpp index 4c7f4d48..13a69756 100644 --- a/src/link/output.cpp +++ b/src/link/output.cpp @@ -15,6 +15,7 @@ #include "helpers.hpp" #include "linkdefs.hpp" #include "platform.hpp" +#include "util.hpp" #include "link/main.hpp" #include "link/section.hpp" @@ -260,29 +261,13 @@ static void writeROM() { } } -// Checks whether a symbol is legal for a sym file or map file. -// Eliminates anonymous labels, which start with a '!'. -static bool isLegalSymbol(Symbol const &sym) { - if (sym.name.empty()) { - return false; - } - char c = sym.name[0]; - return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_'; -} - -// Checks whether this character is legal in a symbol's name in a sym file -static bool isLegalForSymName(char c) { - return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_' - || c == '@' || c == '#' || c == '$' || c == '.'; -} - // Prints a symbol's name to a file, assuming that the first character is legal. // Illegal characters are UTF-8-decoded (errors are replaced by U+FFFD) and emitted as '\u'/'\U'. static void printSymName(std::string const &name, FILE *file) { for (char const *ptr = name.c_str(); *ptr != '\0';) { char c = *ptr; - if (isLegalForSymName(c)) { + if (continuesIdentifier(c)) { // Output legal ASCII characters as-is putc(c, file); ++ptr; @@ -369,7 +354,7 @@ static void writeSymBank(SortedSections const &bankSections, SectionType type, u forEachSortedSection(sect, { for (Symbol const *sym : sect->symbols) { // Don't output symbols that begin with an illegal character - if (isLegalSymbol(*sym)) { + if (!sym->name.empty() && startsIdentifier(sym->name[0])) { uint16_t addr = static_cast(sym->label().offset + sect->org); uint16_t parentAddr = addr; if (auto pos = sym->name.find('.'); pos != std::string::npos) { @@ -482,7 +467,7 @@ static void writeMapBank(SortedSections const §List, SectionType type, uint3 for (uint16_t org = sect->org; sect; sect = sect->nextu.get()) { for (Symbol *sym : sect->symbols) { // Don't output symbols that begin with an illegal character - if (isLegalSymbol(*sym)) { + if (!sym->name.empty() && startsIdentifier(sym->name[0])) { // Space matches "\tSECTION: $xxxx ..." fprintf(mapFile, "\t $%04" PRIx32 " = ", sym->label().offset + org); printSymName(sym->name, mapFile); diff --git a/src/util.cpp b/src/util.cpp index d56762f9..6c525b54 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -6,6 +6,15 @@ #include #include +bool startsIdentifier(int c) { + // This returns false for anonymous labels, which internally start with a '!' + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '.' || c == '_'; +} + +bool continuesIdentifier(int c) { + return startsIdentifier(c) || (c >= '0' && c <= '9') || c == '#' || c == '$' || c == '@'; +} + char const *printChar(int c) { // "'A'" + '\0': 4 bytes // "'\\n'" + '\0': 5 bytes diff --git a/test/asm/builtin-overwrite.err b/test/asm/builtin-overwrite.err index 1b0c6e15..95d90465 100644 --- a/test/asm/builtin-overwrite.err +++ b/test/asm/builtin-overwrite.err @@ -32,34 +32,24 @@ error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(6): Built-in symbol '__ISO_8601_UTC__' cannot be purged error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(9): '__ISO_8601_UTC__' already defined at - (should it be {interpolated} to define its contents ""1989-04-21T12:34:56Z""?) error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(10): '__ISO_8601_UTC__' already defined at - (should it be {interpolated} to define its contents ""1989-04-21T12:34:56Z""?) error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(13): '__ISO_8601_UTC__' already defined as constant at - (should it be {interpolated} to define its contents ""1989-04-21T12:34:56Z""?) error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(14): '__ISO_8601_UTC__' already defined as constant at - (should it be {interpolated} to define its contents ""1989-04-21T12:34:56Z""?) error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(17): '__ISO_8601_UTC__' already defined at - (should it be {interpolated} to define its contents ""1989-04-21T12:34:56Z""?) error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(18): '__ISO_8601_UTC__' already defined at - (should it be {interpolated} to define its contents ""1989-04-21T12:34:56Z""?) error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(21): '__ISO_8601_UTC__' already defined as constant at - (should it be {interpolated} to define its contents ""1989-04-21T12:34:56Z""?) error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(22): '__ISO_8601_UTC__' already defined as constant at - (should it be {interpolated} to define its contents ""1989-04-21T12:34:56Z""?) error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(25): '__ISO_8601_UTC__' already defined as non-EQU at - (should it be {interpolated} to define its contents ""1989-04-21T12:34:56Z""?) error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(26): '__ISO_8601_UTC__' already defined as non-EQU at - (should it be {interpolated} to define its contents ""1989-04-21T12:34:56Z""?) error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(29): Built-in symbol '__ISO_8601_UTC__' cannot be redefined error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(30):