Reuse startsIdentifier and continuesIdentifier functions (#1695)

This commit is contained in:
Rangi
2025-05-19 15:31:26 -04:00
committed by GitHub
parent 4f2400c15b
commit 126b1e5726
6 changed files with 32 additions and 48 deletions

View File

@@ -3,6 +3,9 @@
#ifndef RGBDS_UTIL_HPP #ifndef RGBDS_UTIL_HPP
#define RGBDS_UTIL_HPP #define RGBDS_UTIL_HPP
bool startsIdentifier(int c);
bool continuesIdentifier(int c);
char const *printChar(int c); char const *printChar(int c);
#endif // RGBDS_UTIL_HPP #endif // RGBDS_UTIL_HPP

View File

@@ -136,9 +136,8 @@ struct CaseInsensitive {
} }
}; };
// This map lists all RGBASM keywords which `yylex_NORMAL` lexes as identifiers // This map lists all RGBASM keywords which `yylex_NORMAL` lexes as identifiers.
// (see `startsIdentifier` and `continuesIdentifier` below). All non-identifier // All non-identifier tokens are lexed separately.
// tokens are lexed separately.
static std::unordered_map<std::string, int, CaseInsensitive, CaseInsensitive> keywordDict = { static std::unordered_map<std::string, int, CaseInsensitive, CaseInsensitive> keywordDict = {
{"ADC", T_(SM83_ADC) }, {"ADC", T_(SM83_ADC) },
{"ADD", T_(SM83_ADD) }, {"ADD", T_(SM83_ADD) },
@@ -612,8 +611,6 @@ static bool isMacroChar(char c) {
static int peek(); static int peek();
static void shiftChar(); static void shiftChar();
static uint32_t readNumber(int radix, uint32_t baseValue); static uint32_t readNumber(int radix, uint32_t baseValue);
static bool startsIdentifier(int c);
static bool continuesIdentifier(int c);
static uint32_t readBracketedMacroArgNum() { static uint32_t readBracketedMacroArgNum() {
bool disableMacroArgs = lexerState->disableMacroArgs; bool disableMacroArgs = lexerState->disableMacroArgs;
@@ -1215,15 +1212,6 @@ static uint32_t readGfxConstant() {
// Functions to read identifiers and keywords // Functions to read identifiers and keywords
static bool startsIdentifier(int c) {
// Anonymous labels internally start with '!'
return (c <= 'Z' && c >= 'A') || (c <= 'z' && c >= 'a') || c == '.' || c == '_';
}
static bool continuesIdentifier(int c) {
return startsIdentifier(c) || (c <= '9' && c >= '0') || c == '#' || c == '$' || c == '@';
}
static Token readIdentifier(char firstChar, bool raw) { static Token readIdentifier(char firstChar, bool raw) {
std::string identifier(1, firstChar); std::string identifier(1, firstChar);
int tokenType = firstChar == '.' ? T_(LOCAL) : T_(SYMBOL); int tokenType = firstChar == '.' ? T_(LOCAL) : T_(SYMBOL);

View File

@@ -2,6 +2,7 @@
#include "asm/symbol.hpp" #include "asm/symbol.hpp"
#include <algorithm>
#include <inttypes.h> #include <inttypes.h>
#include <stdio.h> #include <stdio.h>
#include <unordered_map> #include <unordered_map>
@@ -9,6 +10,7 @@
#include "error.hpp" #include "error.hpp"
#include "helpers.hpp" // assume #include "helpers.hpp" // assume
#include "util.hpp"
#include "version.hpp" #include "version.hpp"
#include "asm/fstack.hpp" #include "asm/fstack.hpp"
@@ -130,6 +132,11 @@ static void updateSymbolFilename(Symbol &sym) {
} }
} }
static bool isValidIdentifier(std::string const &s) {
return !s.empty() && startsIdentifier(s[0])
&& std::all_of(s.begin() + 1, s.end(), [](char c) { return continuesIdentifier(c); });
}
static void alreadyDefinedError(Symbol const &sym, char const *asType) { static void alreadyDefinedError(Symbol const &sym, char const *asType) {
if (sym.isBuiltin && !sym_FindScopedValidSymbol(sym.name)) { if (sym.isBuiltin && !sym_FindScopedValidSymbol(sym.name)) {
// `DEF()` would return false, so we should not claim the symbol is already defined // `DEF()` would return false, so we should not claim the symbol is already defined
@@ -142,11 +149,13 @@ static void alreadyDefinedError(Symbol const &sym, char const *asType) {
fputs(" at ", stderr); fputs(" at ", stderr);
dumpFilename(sym); dumpFilename(sym);
if (sym.type == SYM_EQUS) { if (sym.type == SYM_EQUS) {
fprintf( if (std::string const &contents = *sym.getEqus(); isValidIdentifier(contents)) {
stderr, fprintf(
" (should it be {interpolated} to define its contents \"%s\"?)\n", stderr,
sym.getEqus()->c_str() " (should it be {interpolated} to define its contents \"%s\"?)\n",
); contents.c_str()
);
}
} }
} }
} }

View File

@@ -15,6 +15,7 @@
#include "helpers.hpp" #include "helpers.hpp"
#include "linkdefs.hpp" #include "linkdefs.hpp"
#include "platform.hpp" #include "platform.hpp"
#include "util.hpp"
#include "link/main.hpp" #include "link/main.hpp"
#include "link/section.hpp" #include "link/section.hpp"
@@ -260,29 +261,13 @@ static void writeROM() {
} }
} }
// Checks whether a symbol is legal for a sym file or map file.
// Eliminates anonymous labels, which start with a '!'.
static bool isLegalSymbol(Symbol const &sym) {
if (sym.name.empty()) {
return false;
}
char c = sym.name[0];
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_';
}
// Checks whether this character is legal in a symbol's name in a sym file
static bool isLegalForSymName(char c) {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_'
|| c == '@' || c == '#' || c == '$' || c == '.';
}
// Prints a symbol's name to a file, assuming that the first character is legal. // Prints a symbol's name to a file, assuming that the first character is legal.
// Illegal characters are UTF-8-decoded (errors are replaced by U+FFFD) and emitted as '\u'/'\U'. // Illegal characters are UTF-8-decoded (errors are replaced by U+FFFD) and emitted as '\u'/'\U'.
static void printSymName(std::string const &name, FILE *file) { static void printSymName(std::string const &name, FILE *file) {
for (char const *ptr = name.c_str(); *ptr != '\0';) { for (char const *ptr = name.c_str(); *ptr != '\0';) {
char c = *ptr; char c = *ptr;
if (isLegalForSymName(c)) { if (continuesIdentifier(c)) {
// Output legal ASCII characters as-is // Output legal ASCII characters as-is
putc(c, file); putc(c, file);
++ptr; ++ptr;
@@ -369,7 +354,7 @@ static void writeSymBank(SortedSections const &bankSections, SectionType type, u
forEachSortedSection(sect, { forEachSortedSection(sect, {
for (Symbol const *sym : sect->symbols) { for (Symbol const *sym : sect->symbols) {
// Don't output symbols that begin with an illegal character // Don't output symbols that begin with an illegal character
if (isLegalSymbol(*sym)) { if (!sym->name.empty() && startsIdentifier(sym->name[0])) {
uint16_t addr = static_cast<uint16_t>(sym->label().offset + sect->org); uint16_t addr = static_cast<uint16_t>(sym->label().offset + sect->org);
uint16_t parentAddr = addr; uint16_t parentAddr = addr;
if (auto pos = sym->name.find('.'); pos != std::string::npos) { if (auto pos = sym->name.find('.'); pos != std::string::npos) {
@@ -482,7 +467,7 @@ static void writeMapBank(SortedSections const &sectList, SectionType type, uint3
for (uint16_t org = sect->org; sect; sect = sect->nextu.get()) { for (uint16_t org = sect->org; sect; sect = sect->nextu.get()) {
for (Symbol *sym : sect->symbols) { for (Symbol *sym : sect->symbols) {
// Don't output symbols that begin with an illegal character // Don't output symbols that begin with an illegal character
if (isLegalSymbol(*sym)) { if (!sym->name.empty() && startsIdentifier(sym->name[0])) {
// Space matches "\tSECTION: $xxxx ..." // Space matches "\tSECTION: $xxxx ..."
fprintf(mapFile, "\t $%04" PRIx32 " = ", sym->label().offset + org); fprintf(mapFile, "\t $%04" PRIx32 " = ", sym->label().offset + org);
printSymName(sym->name, mapFile); printSymName(sym->name, mapFile);

View File

@@ -6,6 +6,15 @@
#include <stdint.h> #include <stdint.h>
#include <stdio.h> #include <stdio.h>
bool startsIdentifier(int c) {
// This returns false for anonymous labels, which internally start with a '!'
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '.' || c == '_';
}
bool continuesIdentifier(int c) {
return startsIdentifier(c) || (c >= '0' && c <= '9') || c == '#' || c == '$' || c == '@';
}
char const *printChar(int c) { char const *printChar(int c) {
// "'A'" + '\0': 4 bytes // "'A'" + '\0': 4 bytes
// "'\\n'" + '\0': 5 bytes // "'\\n'" + '\0': 5 bytes

View File

@@ -32,34 +32,24 @@ error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(6):
Built-in symbol '__ISO_8601_UTC__' cannot be purged Built-in symbol '__ISO_8601_UTC__' cannot be purged
error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(9): error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(9):
'__ISO_8601_UTC__' already defined at <builtin> '__ISO_8601_UTC__' already defined at <builtin>
(should it be {interpolated} to define its contents ""1989-04-21T12:34:56Z""?)
error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(10): error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(10):
'__ISO_8601_UTC__' already defined at <builtin> '__ISO_8601_UTC__' already defined at <builtin>
(should it be {interpolated} to define its contents ""1989-04-21T12:34:56Z""?)
error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(13): error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(13):
'__ISO_8601_UTC__' already defined as constant at <builtin> '__ISO_8601_UTC__' already defined as constant at <builtin>
(should it be {interpolated} to define its contents ""1989-04-21T12:34:56Z""?)
error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(14): error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(14):
'__ISO_8601_UTC__' already defined as constant at <builtin> '__ISO_8601_UTC__' already defined as constant at <builtin>
(should it be {interpolated} to define its contents ""1989-04-21T12:34:56Z""?)
error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(17): error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(17):
'__ISO_8601_UTC__' already defined at <builtin> '__ISO_8601_UTC__' already defined at <builtin>
(should it be {interpolated} to define its contents ""1989-04-21T12:34:56Z""?)
error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(18): error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(18):
'__ISO_8601_UTC__' already defined at <builtin> '__ISO_8601_UTC__' already defined at <builtin>
(should it be {interpolated} to define its contents ""1989-04-21T12:34:56Z""?)
error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(21): error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(21):
'__ISO_8601_UTC__' already defined as constant at <builtin> '__ISO_8601_UTC__' already defined as constant at <builtin>
(should it be {interpolated} to define its contents ""1989-04-21T12:34:56Z""?)
error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(22): error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(22):
'__ISO_8601_UTC__' already defined as constant at <builtin> '__ISO_8601_UTC__' already defined as constant at <builtin>
(should it be {interpolated} to define its contents ""1989-04-21T12:34:56Z""?)
error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(25): error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(25):
'__ISO_8601_UTC__' already defined as non-EQU at <builtin> '__ISO_8601_UTC__' already defined as non-EQU at <builtin>
(should it be {interpolated} to define its contents ""1989-04-21T12:34:56Z""?)
error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(26): error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(26):
'__ISO_8601_UTC__' already defined as non-EQU at <builtin> '__ISO_8601_UTC__' already defined as non-EQU at <builtin>
(should it be {interpolated} to define its contents ""1989-04-21T12:34:56Z""?)
error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(29): error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(29):
Built-in symbol '__ISO_8601_UTC__' cannot be redefined Built-in symbol '__ISO_8601_UTC__' cannot be redefined
error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(30): error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(30):