mirror of
https://github.com/gbdev/rgbds.git
synced 2025-11-20 10:12:06 +00:00
Implement 'character' literals (#1747)
This commit is contained in:
12
man/rgbasm.5
12
man/rgbasm.5
@@ -280,7 +280,7 @@ There are a number of numeric formats.
|
||||
.It Binary Ta Li % , 0b , 0B Ta 01
|
||||
.It Fixed-point Ta none Ta 01234.56789
|
||||
.It Precise fixed-point Ta none Ta 12.34q8
|
||||
.It Character constant Ta none Ta \(dqABYZ\(dq
|
||||
.It Character constant Ta none Ta 'ABYZ'
|
||||
.It Game Boy graphics Ta Li \` Ta 0123
|
||||
.El
|
||||
.Pp
|
||||
@@ -293,11 +293,14 @@ or
|
||||
The "character constant" form yields the value the character maps to in the current charmap.
|
||||
For example, by default
|
||||
.Pq refer to Xr ascii 7
|
||||
.Sq \(dqA\(dq
|
||||
.Sq 'A'
|
||||
yields 65.
|
||||
A character constant must represent a single value, so it cannot include multiple characters, or characters which map to multiple values.
|
||||
See
|
||||
.Sx Character maps
|
||||
for information on charmaps.
|
||||
for information on charmaps, and
|
||||
.Sx String expressions
|
||||
for information on escape characters allowed in character constants.
|
||||
.Pp
|
||||
The last one, Game Boy graphics, is quite interesting and useful.
|
||||
After the backtick, 8 digits between 0 and 3 are expected, corresponding to pixel values.
|
||||
@@ -538,7 +541,8 @@ There are a number of escape sequences you can use within a string:
|
||||
.Bl -column -offset indent "Sequence"
|
||||
.It Sy Sequence Ta Sy Meaning
|
||||
.It Ql \e\e Ta Backslash Pq escapes the escape character itself
|
||||
.It Ql \e" Ta Double quote Pq does not terminate the string
|
||||
.It Ql \e" Ta Double quote Pq does not terminate a string
|
||||
.It Ql \e' Ta Single quote Pq does not terminate a character literal
|
||||
.It Ql \e{ Ta Open curly brace Pq does not start interpolation
|
||||
.It Ql \e} Ta Close curly brace Pq does not end interpolation
|
||||
.It Ql \en Ta Newline Pq ASCII $0A
|
||||
|
||||
@@ -1428,6 +1428,7 @@ static void appendExpandedString(std::string &str, std::string const &expanded)
|
||||
break;
|
||||
case '\\':
|
||||
case '"':
|
||||
case '\'':
|
||||
case '{':
|
||||
str += '\\';
|
||||
[[fallthrough]];
|
||||
@@ -1448,6 +1449,7 @@ static void appendCharInLiteral(std::string &str, int c) {
|
||||
// Character escape
|
||||
case '\\':
|
||||
case '"':
|
||||
case '\'':
|
||||
case '{':
|
||||
case '}':
|
||||
if (rawMode) {
|
||||
@@ -1518,7 +1520,7 @@ static void appendCharInLiteral(std::string &str, int c) {
|
||||
break;
|
||||
|
||||
case '{': // Symbol interpolation
|
||||
// We'll be exiting the string scope, so re-enable expansions
|
||||
// We'll be exiting the string/character scope, so re-enable expansions
|
||||
// (Not interpolations, since they're handled by the function itself...)
|
||||
lexerState->disableMacroArgs = false;
|
||||
if (auto interpolation = readInterpolation(0); interpolation) {
|
||||
@@ -1614,6 +1616,42 @@ static void readString(std::string &str, bool rawString) {
|
||||
}
|
||||
}
|
||||
|
||||
static void readCharacter(std::string &str) {
|
||||
// This is essentially a simplified `readString`
|
||||
Defer reenableExpansions = scopedDisableExpansions();
|
||||
|
||||
bool rawMode = lexerState->mode == LEXER_RAW;
|
||||
|
||||
// We reach this function after reading a single quote
|
||||
if (rawMode) {
|
||||
str += '\'';
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
int c = peek();
|
||||
|
||||
// '\r', '\n' or EOF ends a character early
|
||||
if (c == EOF || c == '\r' || c == '\n') {
|
||||
error("Unterminated character");
|
||||
return;
|
||||
}
|
||||
|
||||
// We'll be staying in the character, so we can safely consume the char
|
||||
shiftChar();
|
||||
|
||||
// Close the character and return if it's terminated
|
||||
if (c == '\'') {
|
||||
if (rawMode) {
|
||||
str += c;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Append the character or handle special ones
|
||||
appendCharInLiteral(str, c);
|
||||
}
|
||||
}
|
||||
|
||||
// Lexer core
|
||||
|
||||
static Token yylex_SKIP_TO_ENDC(); // forward declaration for yylex_NORMAL
|
||||
@@ -1896,7 +1934,7 @@ static Token yylex_NORMAL() {
|
||||
case '`': // Gfx constant
|
||||
return Token(T_(NUMBER), readGfxConstant());
|
||||
|
||||
// Handle strings
|
||||
// Handle string and character literals
|
||||
|
||||
case '"': {
|
||||
std::string str;
|
||||
@@ -1904,6 +1942,12 @@ static Token yylex_NORMAL() {
|
||||
return Token(T_(STRING), str);
|
||||
}
|
||||
|
||||
case '\'': {
|
||||
std::string chr;
|
||||
readCharacter(chr);
|
||||
return Token(T_(CHARACTER), chr);
|
||||
}
|
||||
|
||||
// Handle newlines and EOF
|
||||
|
||||
case '\r':
|
||||
@@ -2036,6 +2080,11 @@ static Token yylex_RAW() {
|
||||
readString(str, false);
|
||||
break;
|
||||
|
||||
case '\'': // Character literals inside macro args
|
||||
shiftChar();
|
||||
readCharacter(str);
|
||||
break;
|
||||
|
||||
case '#': // Raw string literals inside macro args
|
||||
str += c;
|
||||
shiftChar();
|
||||
@@ -2093,6 +2142,7 @@ backslash:
|
||||
case ')':
|
||||
case '\\': // Escapes shared with string literals
|
||||
case '"':
|
||||
case '\'':
|
||||
case '{':
|
||||
case '}':
|
||||
break;
|
||||
|
||||
@@ -339,6 +339,7 @@
|
||||
// Literals
|
||||
%token <int32_t> NUMBER "number"
|
||||
%token <std::string> STRING "string"
|
||||
%token <std::string> CHARACTER "character"
|
||||
%token <std::string> SYMBOL "symbol"
|
||||
%token <std::string> LABEL "label"
|
||||
%token <std::string> LOCAL "local label"
|
||||
@@ -1415,6 +1416,15 @@ relocexpr_no_str:
|
||||
NUMBER {
|
||||
$$.makeNumber($1);
|
||||
}
|
||||
| CHARACTER {
|
||||
std::vector<int32_t> output = charmap_Convert($1);
|
||||
if (output.size() == 1) {
|
||||
$$.makeNumber(static_cast<uint32_t>(output[0]));
|
||||
} else {
|
||||
::error("Character literals must be a single charmap unit");
|
||||
$$.makeNumber(0);
|
||||
}
|
||||
}
|
||||
| OP_LOGICNOT relocexpr %prec NEG {
|
||||
$$.makeUnaryOp(RPN_LOGNOT, std::move($2));
|
||||
}
|
||||
|
||||
@@ -293,7 +293,7 @@ yy::parser::symbol_type yylex() {
|
||||
c = '\r';
|
||||
} else if (c == 't') {
|
||||
c = '\t';
|
||||
} else if (c != '\\' && c != '"') {
|
||||
} else if (c != '\\' && c != '"' && c != '\'') {
|
||||
scriptError(context, "Cannot escape character %s", printChar(c));
|
||||
}
|
||||
context.file.sbumpc();
|
||||
|
||||
34
test/asm/character-literals.asm
Normal file
34
test/asm/character-literals.asm
Normal file
@@ -0,0 +1,34 @@
|
||||
def s equs "d"
|
||||
|
||||
charmap "A", 1
|
||||
charmap "B", 2
|
||||
charmap "c{s}e", 3
|
||||
charmap "F", 4, 5, 6
|
||||
charmap "'", 42
|
||||
charmap "\"", 1234
|
||||
charmap "\n\r\t\0", 1337
|
||||
charmap "',\",\\", 99
|
||||
|
||||
MACRO char
|
||||
assert (\1) == (\2)
|
||||
ENDM
|
||||
|
||||
char 'A', 1
|
||||
char 'B', 2
|
||||
char 'c{s}e', 3
|
||||
char '\'', 42
|
||||
char '"', 1234
|
||||
char '\n\r\t\0', 1337
|
||||
char '\',",\\', 99
|
||||
|
||||
char charval("c{s}e", 0), 'c{s}e'
|
||||
|
||||
def v equs "\n\r\t\0"
|
||||
def x = '{v}'
|
||||
char x, '\n\r\t\0'
|
||||
|
||||
; errors
|
||||
char '?', $3f ; ASCII
|
||||
char 'F', 0
|
||||
char 'ABF', 0
|
||||
char '\n\r\t', 0
|
||||
15
test/asm/character-literals.err
Normal file
15
test/asm/character-literals.err
Normal file
@@ -0,0 +1,15 @@
|
||||
warning: character-literals.asm(31) -> character-literals.asm::char(13): [-Wunmapped-char]
|
||||
Unmapped character '?'
|
||||
error: character-literals.asm(32) -> character-literals.asm::char(13):
|
||||
Character literals must be a single charmap unit
|
||||
error: character-literals.asm(33) -> character-literals.asm::char(13):
|
||||
Character literals must be a single charmap unit
|
||||
warning: character-literals.asm(34) -> character-literals.asm::char(13): [-Wunmapped-char]
|
||||
Unmapped character '\n'
|
||||
warning: character-literals.asm(34) -> character-literals.asm::char(13): [-Wunmapped-char]
|
||||
Unmapped character '\r'
|
||||
warning: character-literals.asm(34) -> character-literals.asm::char(13): [-Wunmapped-char]
|
||||
Unmapped character '\t'
|
||||
error: character-literals.asm(34) -> character-literals.asm::char(13):
|
||||
Character literals must be a single charmap unit
|
||||
Assembly aborted with 3 errors!
|
||||
@@ -1,3 +1,3 @@
|
||||
assert 1 +# 1 == 2
|
||||
assert 2 '?* 2 == 4
|
||||
assert 3 **?''?##?? 3 == 27
|
||||
assert 2 ?<EFBFBD>* 2 == 4
|
||||
assert 3 **?<EFBFBD>?##?? 3 == 27
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
error: garbage_sequence.asm(1):
|
||||
Unknown character '#'
|
||||
error: garbage_sequence.asm(2):
|
||||
Unknown characters ''', '?'
|
||||
Unknown characters '?', 0xFF
|
||||
error: garbage_sequence.asm(3):
|
||||
Unknown characters '?', ''', ''', '?'
|
||||
Unknown characters '?', 0xFF, '?'
|
||||
error: garbage_sequence.asm(3):
|
||||
Unknown character '#'
|
||||
error: garbage_sequence.asm(3):
|
||||
|
||||
Reference in New Issue
Block a user