mirror of
https://github.com/gbdev/rgbds.git
synced 2025-11-20 10:12:06 +00:00
Implement 'character' literals (#1747)
This commit is contained in:
12
man/rgbasm.5
12
man/rgbasm.5
@@ -280,7 +280,7 @@ There are a number of numeric formats.
|
|||||||
.It Binary Ta Li % , 0b , 0B Ta 01
|
.It Binary Ta Li % , 0b , 0B Ta 01
|
||||||
.It Fixed-point Ta none Ta 01234.56789
|
.It Fixed-point Ta none Ta 01234.56789
|
||||||
.It Precise fixed-point Ta none Ta 12.34q8
|
.It Precise fixed-point Ta none Ta 12.34q8
|
||||||
.It Character constant Ta none Ta \(dqABYZ\(dq
|
.It Character constant Ta none Ta 'ABYZ'
|
||||||
.It Game Boy graphics Ta Li \` Ta 0123
|
.It Game Boy graphics Ta Li \` Ta 0123
|
||||||
.El
|
.El
|
||||||
.Pp
|
.Pp
|
||||||
@@ -293,11 +293,14 @@ or
|
|||||||
The "character constant" form yields the value the character maps to in the current charmap.
|
The "character constant" form yields the value the character maps to in the current charmap.
|
||||||
For example, by default
|
For example, by default
|
||||||
.Pq refer to Xr ascii 7
|
.Pq refer to Xr ascii 7
|
||||||
.Sq \(dqA\(dq
|
.Sq 'A'
|
||||||
yields 65.
|
yields 65.
|
||||||
|
A character constant must represent a single value, so it cannot include multiple characters, or characters which map to multiple values.
|
||||||
See
|
See
|
||||||
.Sx Character maps
|
.Sx Character maps
|
||||||
for information on charmaps.
|
for information on charmaps, and
|
||||||
|
.Sx String expressions
|
||||||
|
for information on escape characters allowed in character constants.
|
||||||
.Pp
|
.Pp
|
||||||
The last one, Game Boy graphics, is quite interesting and useful.
|
The last one, Game Boy graphics, is quite interesting and useful.
|
||||||
After the backtick, 8 digits between 0 and 3 are expected, corresponding to pixel values.
|
After the backtick, 8 digits between 0 and 3 are expected, corresponding to pixel values.
|
||||||
@@ -538,7 +541,8 @@ There are a number of escape sequences you can use within a string:
|
|||||||
.Bl -column -offset indent "Sequence"
|
.Bl -column -offset indent "Sequence"
|
||||||
.It Sy Sequence Ta Sy Meaning
|
.It Sy Sequence Ta Sy Meaning
|
||||||
.It Ql \e\e Ta Backslash Pq escapes the escape character itself
|
.It Ql \e\e Ta Backslash Pq escapes the escape character itself
|
||||||
.It Ql \e" Ta Double quote Pq does not terminate the string
|
.It Ql \e" Ta Double quote Pq does not terminate a string
|
||||||
|
.It Ql \e' Ta Single quote Pq does not terminate a character literal
|
||||||
.It Ql \e{ Ta Open curly brace Pq does not start interpolation
|
.It Ql \e{ Ta Open curly brace Pq does not start interpolation
|
||||||
.It Ql \e} Ta Close curly brace Pq does not end interpolation
|
.It Ql \e} Ta Close curly brace Pq does not end interpolation
|
||||||
.It Ql \en Ta Newline Pq ASCII $0A
|
.It Ql \en Ta Newline Pq ASCII $0A
|
||||||
|
|||||||
@@ -1428,6 +1428,7 @@ static void appendExpandedString(std::string &str, std::string const &expanded)
|
|||||||
break;
|
break;
|
||||||
case '\\':
|
case '\\':
|
||||||
case '"':
|
case '"':
|
||||||
|
case '\'':
|
||||||
case '{':
|
case '{':
|
||||||
str += '\\';
|
str += '\\';
|
||||||
[[fallthrough]];
|
[[fallthrough]];
|
||||||
@@ -1448,6 +1449,7 @@ static void appendCharInLiteral(std::string &str, int c) {
|
|||||||
// Character escape
|
// Character escape
|
||||||
case '\\':
|
case '\\':
|
||||||
case '"':
|
case '"':
|
||||||
|
case '\'':
|
||||||
case '{':
|
case '{':
|
||||||
case '}':
|
case '}':
|
||||||
if (rawMode) {
|
if (rawMode) {
|
||||||
@@ -1518,7 +1520,7 @@ static void appendCharInLiteral(std::string &str, int c) {
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case '{': // Symbol interpolation
|
case '{': // Symbol interpolation
|
||||||
// We'll be exiting the string scope, so re-enable expansions
|
// We'll be exiting the string/character scope, so re-enable expansions
|
||||||
// (Not interpolations, since they're handled by the function itself...)
|
// (Not interpolations, since they're handled by the function itself...)
|
||||||
lexerState->disableMacroArgs = false;
|
lexerState->disableMacroArgs = false;
|
||||||
if (auto interpolation = readInterpolation(0); interpolation) {
|
if (auto interpolation = readInterpolation(0); interpolation) {
|
||||||
@@ -1614,6 +1616,42 @@ static void readString(std::string &str, bool rawString) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void readCharacter(std::string &str) {
|
||||||
|
// This is essentially a simplified `readString`
|
||||||
|
Defer reenableExpansions = scopedDisableExpansions();
|
||||||
|
|
||||||
|
bool rawMode = lexerState->mode == LEXER_RAW;
|
||||||
|
|
||||||
|
// We reach this function after reading a single quote
|
||||||
|
if (rawMode) {
|
||||||
|
str += '\'';
|
||||||
|
}
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
int c = peek();
|
||||||
|
|
||||||
|
// '\r', '\n' or EOF ends a character early
|
||||||
|
if (c == EOF || c == '\r' || c == '\n') {
|
||||||
|
error("Unterminated character");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We'll be staying in the character, so we can safely consume the char
|
||||||
|
shiftChar();
|
||||||
|
|
||||||
|
// Close the character and return if it's terminated
|
||||||
|
if (c == '\'') {
|
||||||
|
if (rawMode) {
|
||||||
|
str += c;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Append the character or handle special ones
|
||||||
|
appendCharInLiteral(str, c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Lexer core
|
// Lexer core
|
||||||
|
|
||||||
static Token yylex_SKIP_TO_ENDC(); // forward declaration for yylex_NORMAL
|
static Token yylex_SKIP_TO_ENDC(); // forward declaration for yylex_NORMAL
|
||||||
@@ -1896,7 +1934,7 @@ static Token yylex_NORMAL() {
|
|||||||
case '`': // Gfx constant
|
case '`': // Gfx constant
|
||||||
return Token(T_(NUMBER), readGfxConstant());
|
return Token(T_(NUMBER), readGfxConstant());
|
||||||
|
|
||||||
// Handle strings
|
// Handle string and character literals
|
||||||
|
|
||||||
case '"': {
|
case '"': {
|
||||||
std::string str;
|
std::string str;
|
||||||
@@ -1904,6 +1942,12 @@ static Token yylex_NORMAL() {
|
|||||||
return Token(T_(STRING), str);
|
return Token(T_(STRING), str);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case '\'': {
|
||||||
|
std::string chr;
|
||||||
|
readCharacter(chr);
|
||||||
|
return Token(T_(CHARACTER), chr);
|
||||||
|
}
|
||||||
|
|
||||||
// Handle newlines and EOF
|
// Handle newlines and EOF
|
||||||
|
|
||||||
case '\r':
|
case '\r':
|
||||||
@@ -2036,6 +2080,11 @@ static Token yylex_RAW() {
|
|||||||
readString(str, false);
|
readString(str, false);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case '\'': // Character literals inside macro args
|
||||||
|
shiftChar();
|
||||||
|
readCharacter(str);
|
||||||
|
break;
|
||||||
|
|
||||||
case '#': // Raw string literals inside macro args
|
case '#': // Raw string literals inside macro args
|
||||||
str += c;
|
str += c;
|
||||||
shiftChar();
|
shiftChar();
|
||||||
@@ -2093,6 +2142,7 @@ backslash:
|
|||||||
case ')':
|
case ')':
|
||||||
case '\\': // Escapes shared with string literals
|
case '\\': // Escapes shared with string literals
|
||||||
case '"':
|
case '"':
|
||||||
|
case '\'':
|
||||||
case '{':
|
case '{':
|
||||||
case '}':
|
case '}':
|
||||||
break;
|
break;
|
||||||
|
|||||||
@@ -339,6 +339,7 @@
|
|||||||
// Literals
|
// Literals
|
||||||
%token <int32_t> NUMBER "number"
|
%token <int32_t> NUMBER "number"
|
||||||
%token <std::string> STRING "string"
|
%token <std::string> STRING "string"
|
||||||
|
%token <std::string> CHARACTER "character"
|
||||||
%token <std::string> SYMBOL "symbol"
|
%token <std::string> SYMBOL "symbol"
|
||||||
%token <std::string> LABEL "label"
|
%token <std::string> LABEL "label"
|
||||||
%token <std::string> LOCAL "local label"
|
%token <std::string> LOCAL "local label"
|
||||||
@@ -1415,6 +1416,15 @@ relocexpr_no_str:
|
|||||||
NUMBER {
|
NUMBER {
|
||||||
$$.makeNumber($1);
|
$$.makeNumber($1);
|
||||||
}
|
}
|
||||||
|
| CHARACTER {
|
||||||
|
std::vector<int32_t> output = charmap_Convert($1);
|
||||||
|
if (output.size() == 1) {
|
||||||
|
$$.makeNumber(static_cast<uint32_t>(output[0]));
|
||||||
|
} else {
|
||||||
|
::error("Character literals must be a single charmap unit");
|
||||||
|
$$.makeNumber(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
| OP_LOGICNOT relocexpr %prec NEG {
|
| OP_LOGICNOT relocexpr %prec NEG {
|
||||||
$$.makeUnaryOp(RPN_LOGNOT, std::move($2));
|
$$.makeUnaryOp(RPN_LOGNOT, std::move($2));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -293,7 +293,7 @@ yy::parser::symbol_type yylex() {
|
|||||||
c = '\r';
|
c = '\r';
|
||||||
} else if (c == 't') {
|
} else if (c == 't') {
|
||||||
c = '\t';
|
c = '\t';
|
||||||
} else if (c != '\\' && c != '"') {
|
} else if (c != '\\' && c != '"' && c != '\'') {
|
||||||
scriptError(context, "Cannot escape character %s", printChar(c));
|
scriptError(context, "Cannot escape character %s", printChar(c));
|
||||||
}
|
}
|
||||||
context.file.sbumpc();
|
context.file.sbumpc();
|
||||||
|
|||||||
34
test/asm/character-literals.asm
Normal file
34
test/asm/character-literals.asm
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
def s equs "d"
|
||||||
|
|
||||||
|
charmap "A", 1
|
||||||
|
charmap "B", 2
|
||||||
|
charmap "c{s}e", 3
|
||||||
|
charmap "F", 4, 5, 6
|
||||||
|
charmap "'", 42
|
||||||
|
charmap "\"", 1234
|
||||||
|
charmap "\n\r\t\0", 1337
|
||||||
|
charmap "',\",\\", 99
|
||||||
|
|
||||||
|
MACRO char
|
||||||
|
assert (\1) == (\2)
|
||||||
|
ENDM
|
||||||
|
|
||||||
|
char 'A', 1
|
||||||
|
char 'B', 2
|
||||||
|
char 'c{s}e', 3
|
||||||
|
char '\'', 42
|
||||||
|
char '"', 1234
|
||||||
|
char '\n\r\t\0', 1337
|
||||||
|
char '\',",\\', 99
|
||||||
|
|
||||||
|
char charval("c{s}e", 0), 'c{s}e'
|
||||||
|
|
||||||
|
def v equs "\n\r\t\0"
|
||||||
|
def x = '{v}'
|
||||||
|
char x, '\n\r\t\0'
|
||||||
|
|
||||||
|
; errors
|
||||||
|
char '?', $3f ; ASCII
|
||||||
|
char 'F', 0
|
||||||
|
char 'ABF', 0
|
||||||
|
char '\n\r\t', 0
|
||||||
15
test/asm/character-literals.err
Normal file
15
test/asm/character-literals.err
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
warning: character-literals.asm(31) -> character-literals.asm::char(13): [-Wunmapped-char]
|
||||||
|
Unmapped character '?'
|
||||||
|
error: character-literals.asm(32) -> character-literals.asm::char(13):
|
||||||
|
Character literals must be a single charmap unit
|
||||||
|
error: character-literals.asm(33) -> character-literals.asm::char(13):
|
||||||
|
Character literals must be a single charmap unit
|
||||||
|
warning: character-literals.asm(34) -> character-literals.asm::char(13): [-Wunmapped-char]
|
||||||
|
Unmapped character '\n'
|
||||||
|
warning: character-literals.asm(34) -> character-literals.asm::char(13): [-Wunmapped-char]
|
||||||
|
Unmapped character '\r'
|
||||||
|
warning: character-literals.asm(34) -> character-literals.asm::char(13): [-Wunmapped-char]
|
||||||
|
Unmapped character '\t'
|
||||||
|
error: character-literals.asm(34) -> character-literals.asm::char(13):
|
||||||
|
Character literals must be a single charmap unit
|
||||||
|
Assembly aborted with 3 errors!
|
||||||
@@ -1,3 +1,3 @@
|
|||||||
assert 1 +# 1 == 2
|
assert 1 +# 1 == 2
|
||||||
assert 2 '?* 2 == 4
|
assert 2 ?<EFBFBD>* 2 == 4
|
||||||
assert 3 **?''?##?? 3 == 27
|
assert 3 **?<EFBFBD>?##?? 3 == 27
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
error: garbage_sequence.asm(1):
|
error: garbage_sequence.asm(1):
|
||||||
Unknown character '#'
|
Unknown character '#'
|
||||||
error: garbage_sequence.asm(2):
|
error: garbage_sequence.asm(2):
|
||||||
Unknown characters ''', '?'
|
Unknown characters '?', 0xFF
|
||||||
error: garbage_sequence.asm(3):
|
error: garbage_sequence.asm(3):
|
||||||
Unknown characters '?', ''', ''', '?'
|
Unknown characters '?', 0xFF, '?'
|
||||||
error: garbage_sequence.asm(3):
|
error: garbage_sequence.asm(3):
|
||||||
Unknown character '#'
|
Unknown character '#'
|
||||||
error: garbage_sequence.asm(3):
|
error: garbage_sequence.asm(3):
|
||||||
|
|||||||
Reference in New Issue
Block a user