Implement a '#' prefix for raw identifiers that may alias keywords (#1480)

* Implement a '#' prefix for raw identifiers that may alias keywords

* Review comments

* Disallow hashless raw identifiers in interpolations

* Run clang-format
This commit is contained in:
Sylvie
2024-08-21 13:31:44 -04:00
committed by GitHub
parent 82e81ab1da
commit b438c83bda
11 changed files with 114 additions and 13 deletions

View File

@@ -596,7 +596,16 @@ static uint32_t readBracketedMacroArgNum() {
if (c >= '0' && c <= '9') {
num = readNumber(10, 0);
} else if (startsIdentifier(c)) {
} else if (startsIdentifier(c) || c == '#') {
if (c == '#') {
shiftChar();
c = peek();
if (!startsIdentifier(c)) {
error("Empty raw symbol in bracketed macro argument\n");
return 0;
}
}
std::string symName;
for (; continuesIdentifier(c); c = peek()) {
@@ -1138,8 +1147,7 @@ static bool continuesIdentifier(int c) {
return startsIdentifier(c) || (c <= '9' && c >= '0') || c == '#' || c == '@';
}
static Token readIdentifier(char firstChar) {
// Lex while checking for a keyword
static Token readIdentifier(char firstChar, bool raw) {
std::string identifier(1, firstChar);
int tokenType = firstChar == '.' ? T_(LOCAL_ID) : T_(ID);
@@ -1155,9 +1163,13 @@ static Token readIdentifier(char firstChar) {
tokenType = T_(LOCAL_ID);
}
// Attempt to check for a keyword
auto search = keywordDict.find(identifier.c_str());
return search != keywordDict.end() ? Token(search->second) : Token(tokenType, identifier);
// Attempt to check for a keyword if the identifier is not raw
if (!raw) {
if (auto search = keywordDict.find(identifier.c_str()); search != keywordDict.end())
return Token(search->second);
}
return Token(tokenType, identifier);
}
// Functions to read strings
@@ -1207,6 +1219,19 @@ static std::shared_ptr<std::string> readInterpolation(size_t depth) {
// Don't return before `lexerState->disableInterpolation` is reset!
lexerState->disableInterpolation = disableInterpolation;
if (fmtBuf.starts_with('#')) {
// Skip a '#' raw identifier prefix, but after expanding any nested interpolations.
fmtBuf.erase(0, 1);
} else if (keywordDict.find(fmtBuf.c_str()) != keywordDict.end()) {
// Don't allow symbols that alias keywords without a '#' prefix.
error(
"Interpolated symbol \"%s\" is a reserved keyword; add a '#' prefix to use it as a raw "
"symbol\n",
fmtBuf.c_str()
);
return nullptr;
}
Symbol const *sym = sym_FindScopedValidSymbol(fmtBuf);
if (!sym || !sym->isDefined()) {
@@ -1781,8 +1806,13 @@ static Token yylex_NORMAL() {
// Handle identifiers... or report garbage characters
default:
bool raw = c == '#';
if (raw && startsIdentifier(peek())) {
c = nextChar();
}
if (startsIdentifier(c)) {
Token token = readIdentifier(c);
Token token = readIdentifier(c, raw);
// An ELIF after a taken IF needs to not evaluate its condition
if (token.type == T_(POP_ELIF) && lexerState->lastToken == T_(NEWLINE)
@@ -2017,7 +2047,7 @@ static Token skipIfBlock(bool toEndc) {
if (startsIdentifier(c)) {
shiftChar();
switch (Token token = readIdentifier(c); token.type) {
switch (Token token = readIdentifier(c, false); token.type) {
case T_(POP_IF):
lexer_IncIFDepth();
break;
@@ -2103,7 +2133,7 @@ static Token yylex_SKIP_TO_ENDR() {
if (startsIdentifier(c)) {
shiftChar();
switch (readIdentifier(c).type) {
switch (readIdentifier(c, false).type) {
case T_(POP_FOR):
case T_(POP_REPT):
depth++;
@@ -2250,7 +2280,7 @@ Capture lexer_CaptureRept() {
} while (isWhitespace(c));
// Now, try to match `REPT`, `FOR` or `ENDR` as a **whole** identifier
if (startsIdentifier(c)) {
switch (readIdentifier(c).type) {
switch (readIdentifier(c, false).type) {
case T_(POP_REPT):
case T_(POP_FOR):
depth++;
@@ -2303,7 +2333,7 @@ Capture lexer_CaptureMacro() {
} while (isWhitespace(c));
// Now, try to match `ENDM` as a **whole** identifier
if (startsIdentifier(c)) {
switch (readIdentifier(c).type) {
switch (readIdentifier(c, false).type) {
case T_(POP_ENDM):
endCapture(capture);
// The ENDM has been captured, but we don't want it!