Reuse startsIdentifier and continuesIdentifier functions (#1695)

This commit is contained in:
Rangi
2025-05-19 15:31:26 -04:00
committed by GitHub
parent 4f2400c15b
commit 126b1e5726
6 changed files with 32 additions and 48 deletions

View File

@@ -3,6 +3,9 @@
#ifndef RGBDS_UTIL_HPP
#define RGBDS_UTIL_HPP
bool startsIdentifier(int c);
bool continuesIdentifier(int c);
char const *printChar(int c);
#endif // RGBDS_UTIL_HPP

View File

@@ -136,9 +136,8 @@ struct CaseInsensitive {
}
};
// This map lists all RGBASM keywords which `yylex_NORMAL` lexes as identifiers
// (see `startsIdentifier` and `continuesIdentifier` below). All non-identifier
// tokens are lexed separately.
// This map lists all RGBASM keywords which `yylex_NORMAL` lexes as identifiers.
// All non-identifier tokens are lexed separately.
static std::unordered_map<std::string, int, CaseInsensitive, CaseInsensitive> keywordDict = {
{"ADC", T_(SM83_ADC) },
{"ADD", T_(SM83_ADD) },
@@ -612,8 +611,6 @@ static bool isMacroChar(char c) {
static int peek();
static void shiftChar();
static uint32_t readNumber(int radix, uint32_t baseValue);
static bool startsIdentifier(int c);
static bool continuesIdentifier(int c);
static uint32_t readBracketedMacroArgNum() {
bool disableMacroArgs = lexerState->disableMacroArgs;
@@ -1215,15 +1212,6 @@ static uint32_t readGfxConstant() {
// Functions to read identifiers and keywords
static bool startsIdentifier(int c) {
// Anonymous labels internally start with '!'
return (c <= 'Z' && c >= 'A') || (c <= 'z' && c >= 'a') || c == '.' || c == '_';
}
static bool continuesIdentifier(int c) {
return startsIdentifier(c) || (c <= '9' && c >= '0') || c == '#' || c == '$' || c == '@';
}
static Token readIdentifier(char firstChar, bool raw) {
std::string identifier(1, firstChar);
int tokenType = firstChar == '.' ? T_(LOCAL) : T_(SYMBOL);

View File

@@ -2,6 +2,7 @@
#include "asm/symbol.hpp"
#include <algorithm>
#include <inttypes.h>
#include <stdio.h>
#include <unordered_map>
@@ -9,6 +10,7 @@
#include "error.hpp"
#include "helpers.hpp" // assume
#include "util.hpp"
#include "version.hpp"
#include "asm/fstack.hpp"
@@ -130,6 +132,11 @@ static void updateSymbolFilename(Symbol &sym) {
}
}
static bool isValidIdentifier(std::string const &s) {
return !s.empty() && startsIdentifier(s[0])
&& std::all_of(s.begin() + 1, s.end(), [](char c) { return continuesIdentifier(c); });
}
static void alreadyDefinedError(Symbol const &sym, char const *asType) {
if (sym.isBuiltin && !sym_FindScopedValidSymbol(sym.name)) {
// `DEF()` would return false, so we should not claim the symbol is already defined
@@ -142,13 +149,15 @@ static void alreadyDefinedError(Symbol const &sym, char const *asType) {
fputs(" at ", stderr);
dumpFilename(sym);
if (sym.type == SYM_EQUS) {
if (std::string const &contents = *sym.getEqus(); isValidIdentifier(contents)) {
fprintf(
stderr,
" (should it be {interpolated} to define its contents \"%s\"?)\n",
sym.getEqus()->c_str()
contents.c_str()
);
}
}
}
}
static void redefinedError(Symbol const &sym) {

View File

@@ -15,6 +15,7 @@
#include "helpers.hpp"
#include "linkdefs.hpp"
#include "platform.hpp"
#include "util.hpp"
#include "link/main.hpp"
#include "link/section.hpp"
@@ -260,29 +261,13 @@ static void writeROM() {
}
}
// Checks whether a symbol is legal for a sym file or map file.
// Eliminates anonymous labels, which start with a '!'.
static bool isLegalSymbol(Symbol const &sym) {
if (sym.name.empty()) {
return false;
}
char c = sym.name[0];
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_';
}
// Checks whether this character is legal in a symbol's name in a sym file
static bool isLegalForSymName(char c) {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_'
|| c == '@' || c == '#' || c == '$' || c == '.';
}
// Prints a symbol's name to a file, assuming that the first character is legal.
// Illegal characters are UTF-8-decoded (errors are replaced by U+FFFD) and emitted as '\u'/'\U'.
static void printSymName(std::string const &name, FILE *file) {
for (char const *ptr = name.c_str(); *ptr != '\0';) {
char c = *ptr;
if (isLegalForSymName(c)) {
if (continuesIdentifier(c)) {
// Output legal ASCII characters as-is
putc(c, file);
++ptr;
@@ -369,7 +354,7 @@ static void writeSymBank(SortedSections const &bankSections, SectionType type, u
forEachSortedSection(sect, {
for (Symbol const *sym : sect->symbols) {
// Don't output symbols that begin with an illegal character
if (isLegalSymbol(*sym)) {
if (!sym->name.empty() && startsIdentifier(sym->name[0])) {
uint16_t addr = static_cast<uint16_t>(sym->label().offset + sect->org);
uint16_t parentAddr = addr;
if (auto pos = sym->name.find('.'); pos != std::string::npos) {
@@ -482,7 +467,7 @@ static void writeMapBank(SortedSections const &sectList, SectionType type, uint3
for (uint16_t org = sect->org; sect; sect = sect->nextu.get()) {
for (Symbol *sym : sect->symbols) {
// Don't output symbols that begin with an illegal character
if (isLegalSymbol(*sym)) {
if (!sym->name.empty() && startsIdentifier(sym->name[0])) {
// Space matches "\tSECTION: $xxxx ..."
fprintf(mapFile, "\t $%04" PRIx32 " = ", sym->label().offset + org);
printSymName(sym->name, mapFile);

View File

@@ -6,6 +6,15 @@
#include <stdint.h>
#include <stdio.h>
bool startsIdentifier(int c) {
// This returns false for anonymous labels, which internally start with a '!'
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '.' || c == '_';
}
bool continuesIdentifier(int c) {
return startsIdentifier(c) || (c >= '0' && c <= '9') || c == '#' || c == '$' || c == '@';
}
char const *printChar(int c) {
// "'A'" + '\0': 4 bytes
// "'\\n'" + '\0': 5 bytes

View File

@@ -32,34 +32,24 @@ error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(6):
Built-in symbol '__ISO_8601_UTC__' cannot be purged
error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(9):
'__ISO_8601_UTC__' already defined at <builtin>
(should it be {interpolated} to define its contents ""1989-04-21T12:34:56Z""?)
error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(10):
'__ISO_8601_UTC__' already defined at <builtin>
(should it be {interpolated} to define its contents ""1989-04-21T12:34:56Z""?)
error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(13):
'__ISO_8601_UTC__' already defined as constant at <builtin>
(should it be {interpolated} to define its contents ""1989-04-21T12:34:56Z""?)
error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(14):
'__ISO_8601_UTC__' already defined as constant at <builtin>
(should it be {interpolated} to define its contents ""1989-04-21T12:34:56Z""?)
error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(17):
'__ISO_8601_UTC__' already defined at <builtin>
(should it be {interpolated} to define its contents ""1989-04-21T12:34:56Z""?)
error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(18):
'__ISO_8601_UTC__' already defined at <builtin>
(should it be {interpolated} to define its contents ""1989-04-21T12:34:56Z""?)
error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(21):
'__ISO_8601_UTC__' already defined as constant at <builtin>
(should it be {interpolated} to define its contents ""1989-04-21T12:34:56Z""?)
error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(22):
'__ISO_8601_UTC__' already defined as constant at <builtin>
(should it be {interpolated} to define its contents ""1989-04-21T12:34:56Z""?)
error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(25):
'__ISO_8601_UTC__' already defined as non-EQU at <builtin>
(should it be {interpolated} to define its contents ""1989-04-21T12:34:56Z""?)
error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(26):
'__ISO_8601_UTC__' already defined as non-EQU at <builtin>
(should it be {interpolated} to define its contents ""1989-04-21T12:34:56Z""?)
error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(29):
Built-in symbol '__ISO_8601_UTC__' cannot be redefined
error: builtin-overwrite.asm(37) -> builtin-overwrite.asm::tickle(30):