Implement 'character' literals (#1747)

This commit is contained in:
Rangi
2025-07-15 13:08:50 -04:00
committed by GitHub
parent b6d77fbb9e
commit 1fecf80659
8 changed files with 124 additions and 11 deletions

View File

@@ -1428,6 +1428,7 @@ static void appendExpandedString(std::string &str, std::string const &expanded)
break;
case '\\':
case '"':
case '\'':
case '{':
str += '\\';
[[fallthrough]];
@@ -1448,6 +1449,7 @@ static void appendCharInLiteral(std::string &str, int c) {
// Character escape
case '\\':
case '"':
case '\'':
case '{':
case '}':
if (rawMode) {
@@ -1518,7 +1520,7 @@ static void appendCharInLiteral(std::string &str, int c) {
break;
case '{': // Symbol interpolation
// We'll be exiting the string scope, so re-enable expansions
// We'll be exiting the string/character scope, so re-enable expansions
// (Not interpolations, since they're handled by the function itself...)
lexerState->disableMacroArgs = false;
if (auto interpolation = readInterpolation(0); interpolation) {
@@ -1614,6 +1616,42 @@ static void readString(std::string &str, bool rawString) {
}
}
static void readCharacter(std::string &str) {
// This is essentially a simplified `readString`
Defer reenableExpansions = scopedDisableExpansions();
bool rawMode = lexerState->mode == LEXER_RAW;
// We reach this function after reading a single quote
if (rawMode) {
str += '\'';
}
for (;;) {
int c = peek();
// '\r', '\n' or EOF ends a character early
if (c == EOF || c == '\r' || c == '\n') {
error("Unterminated character");
return;
}
// We'll be staying in the character, so we can safely consume the char
shiftChar();
// Close the character and return if it's terminated
if (c == '\'') {
if (rawMode) {
str += c;
}
return;
}
// Append the character or handle special ones
appendCharInLiteral(str, c);
}
}
// Lexer core
static Token yylex_SKIP_TO_ENDC(); // forward declaration for yylex_NORMAL
@@ -1896,7 +1934,7 @@ static Token yylex_NORMAL() {
case '`': // Gfx constant
return Token(T_(NUMBER), readGfxConstant());
// Handle strings
// Handle string and character literals
case '"': {
std::string str;
@@ -1904,6 +1942,12 @@ static Token yylex_NORMAL() {
return Token(T_(STRING), str);
}
case '\'': {
std::string chr;
readCharacter(chr);
return Token(T_(CHARACTER), chr);
}
// Handle newlines and EOF
case '\r':
@@ -2036,6 +2080,11 @@ static Token yylex_RAW() {
readString(str, false);
break;
case '\'': // Character literals inside macro args
shiftChar();
readCharacter(str);
break;
case '#': // Raw string literals inside macro args
str += c;
shiftChar();
@@ -2093,6 +2142,7 @@ backslash:
case ')':
case '\\': // Escapes shared with string literals
case '"':
case '\'':
case '{':
case '}':
break;

View File

@@ -339,6 +339,7 @@
// Literals
%token <int32_t> NUMBER "number"
%token <std::string> STRING "string"
%token <std::string> CHARACTER "character"
%token <std::string> SYMBOL "symbol"
%token <std::string> LABEL "label"
%token <std::string> LOCAL "local label"
@@ -1415,6 +1416,15 @@ relocexpr_no_str:
NUMBER {
$$.makeNumber($1);
}
| CHARACTER {
std::vector<int32_t> output = charmap_Convert($1);
if (output.size() == 1) {
$$.makeNumber(static_cast<uint32_t>(output[0]));
} else {
::error("Character literals must be a single charmap unit");
$$.makeNumber(0);
}
}
| OP_LOGICNOT relocexpr %prec NEG {
$$.makeUnaryOp(RPN_LOGNOT, std::move($2));
}

View File

@@ -293,7 +293,7 @@ yy::parser::symbol_type yylex() {
c = '\r';
} else if (c == 't') {
c = '\t';
} else if (c != '\\' && c != '"') {
} else if (c != '\\' && c != '"' && c != '\'') {
scriptError(context, "Cannot escape character %s", printChar(c));
}
context.file.sbumpc();