diff --git a/man/rgbasm.5 b/man/rgbasm.5 index cff8204b..ab0f01c5 100644 --- a/man/rgbasm.5 +++ b/man/rgbasm.5 @@ -280,7 +280,7 @@ There are a number of numeric formats. .It Binary Ta Li % , 0b , 0B Ta 01 .It Fixed-point Ta none Ta 01234.56789 .It Precise fixed-point Ta none Ta 12.34q8 -.It Character constant Ta none Ta \(dqABYZ\(dq +.It Character constant Ta none Ta 'ABYZ' .It Game Boy graphics Ta Li \` Ta 0123 .El .Pp @@ -293,11 +293,14 @@ or The "character constant" form yields the value the character maps to in the current charmap. For example, by default .Pq refer to Xr ascii 7 -.Sq \(dqA\(dq +.Sq 'A' yields 65. +A character constant must represent a single value, so it cannot include multiple characters, or characters which map to multiple values. See .Sx Character maps -for information on charmaps. +for information on charmaps, and +.Sx String expressions +for information on escape characters allowed in character constants. .Pp The last one, Game Boy graphics, is quite interesting and useful. After the backtick, 8 digits between 0 and 3 are expected, corresponding to pixel values. @@ -538,7 +541,8 @@ There are a number of escape sequences you can use within a string: .Bl -column -offset indent "Sequence" .It Sy Sequence Ta Sy Meaning .It Ql \e\e Ta Backslash Pq escapes the escape character itself -.It Ql \e" Ta Double quote Pq does not terminate the string +.It Ql \e" Ta Double quote Pq does not terminate a string +.It Ql \e' Ta Single quote Pq does not terminate a character literal .It Ql \e{ Ta Open curly brace Pq does not start interpolation .It Ql \e} Ta Close curly brace Pq does not end interpolation .It Ql \en Ta Newline Pq ASCII $0A diff --git a/src/asm/lexer.cpp b/src/asm/lexer.cpp index 67d30d5a..dff28c81 100644 --- a/src/asm/lexer.cpp +++ b/src/asm/lexer.cpp @@ -1428,6 +1428,7 @@ static void appendExpandedString(std::string &str, std::string const &expanded) break; case '\\': case '"': + case '\'': case '{': str += '\\'; [[fallthrough]]; @@ -1448,6 +1449,7 @@ static void appendCharInLiteral(std::string &str, int c) { // Character escape case '\\': case '"': + case '\'': case '{': case '}': if (rawMode) { @@ -1518,7 +1520,7 @@ static void appendCharInLiteral(std::string &str, int c) { break; case '{': // Symbol interpolation - // We'll be exiting the string scope, so re-enable expansions + // We'll be exiting the string/character scope, so re-enable expansions // (Not interpolations, since they're handled by the function itself...) lexerState->disableMacroArgs = false; if (auto interpolation = readInterpolation(0); interpolation) { @@ -1614,6 +1616,42 @@ static void readString(std::string &str, bool rawString) { } } +static void readCharacter(std::string &str) { + // This is essentially a simplified `readString` + Defer reenableExpansions = scopedDisableExpansions(); + + bool rawMode = lexerState->mode == LEXER_RAW; + + // We reach this function after reading a single quote + if (rawMode) { + str += '\''; + } + + for (;;) { + int c = peek(); + + // '\r', '\n' or EOF ends a character early + if (c == EOF || c == '\r' || c == '\n') { + error("Unterminated character"); + return; + } + + // We'll be staying in the character, so we can safely consume the char + shiftChar(); + + // Close the character and return if it's terminated + if (c == '\'') { + if (rawMode) { + str += c; + } + return; + } + + // Append the character or handle special ones + appendCharInLiteral(str, c); + } +} + // Lexer core static Token yylex_SKIP_TO_ENDC(); // forward declaration for yylex_NORMAL @@ -1896,7 +1934,7 @@ static Token yylex_NORMAL() { case '`': // Gfx constant return Token(T_(NUMBER), readGfxConstant()); - // Handle strings + // Handle string and character literals case '"': { std::string str; @@ -1904,6 +1942,12 @@ static Token yylex_NORMAL() { return Token(T_(STRING), str); } + case '\'': { + std::string chr; + readCharacter(chr); + return Token(T_(CHARACTER), chr); + } + // Handle newlines and EOF case '\r': @@ -2036,6 +2080,11 @@ static Token yylex_RAW() { readString(str, false); break; + case '\'': // Character literals inside macro args + shiftChar(); + readCharacter(str); + break; + case '#': // Raw string literals inside macro args str += c; shiftChar(); @@ -2093,6 +2142,7 @@ backslash: case ')': case '\\': // Escapes shared with string literals case '"': + case '\'': case '{': case '}': break; diff --git a/src/asm/parser.y b/src/asm/parser.y index 0220acb3..400f8694 100644 --- a/src/asm/parser.y +++ b/src/asm/parser.y @@ -339,6 +339,7 @@ // Literals %token NUMBER "number" %token STRING "string" +%token CHARACTER "character" %token SYMBOL "symbol" %token LABEL "label" %token LOCAL "local label" @@ -1415,6 +1416,15 @@ relocexpr_no_str: NUMBER { $$.makeNumber($1); } + | CHARACTER { + std::vector output = charmap_Convert($1); + if (output.size() == 1) { + $$.makeNumber(static_cast(output[0])); + } else { + ::error("Character literals must be a single charmap unit"); + $$.makeNumber(0); + } + } | OP_LOGICNOT relocexpr %prec NEG { $$.makeUnaryOp(RPN_LOGNOT, std::move($2)); } diff --git a/src/link/script.y b/src/link/script.y index 3f97af3b..82a282f0 100644 --- a/src/link/script.y +++ b/src/link/script.y @@ -293,7 +293,7 @@ yy::parser::symbol_type yylex() { c = '\r'; } else if (c == 't') { c = '\t'; - } else if (c != '\\' && c != '"') { + } else if (c != '\\' && c != '"' && c != '\'') { scriptError(context, "Cannot escape character %s", printChar(c)); } context.file.sbumpc(); diff --git a/test/asm/character-literals.asm b/test/asm/character-literals.asm new file mode 100644 index 00000000..f50f146f --- /dev/null +++ b/test/asm/character-literals.asm @@ -0,0 +1,34 @@ +def s equs "d" + +charmap "A", 1 +charmap "B", 2 +charmap "c{s}e", 3 +charmap "F", 4, 5, 6 +charmap "'", 42 +charmap "\"", 1234 +charmap "\n\r\t\0", 1337 +charmap "',\",\\", 99 + +MACRO char + assert (\1) == (\2) +ENDM + +char 'A', 1 +char 'B', 2 +char 'c{s}e', 3 +char '\'', 42 +char '"', 1234 +char '\n\r\t\0', 1337 +char '\',",\\', 99 + +char charval("c{s}e", 0), 'c{s}e' + +def v equs "\n\r\t\0" +def x = '{v}' +char x, '\n\r\t\0' + +; errors +char '?', $3f ; ASCII +char 'F', 0 +char 'ABF', 0 +char '\n\r\t', 0 diff --git a/test/asm/character-literals.err b/test/asm/character-literals.err new file mode 100644 index 00000000..b2eaa6f9 --- /dev/null +++ b/test/asm/character-literals.err @@ -0,0 +1,15 @@ +warning: character-literals.asm(31) -> character-literals.asm::char(13): [-Wunmapped-char] + Unmapped character '?' +error: character-literals.asm(32) -> character-literals.asm::char(13): + Character literals must be a single charmap unit +error: character-literals.asm(33) -> character-literals.asm::char(13): + Character literals must be a single charmap unit +warning: character-literals.asm(34) -> character-literals.asm::char(13): [-Wunmapped-char] + Unmapped character '\n' +warning: character-literals.asm(34) -> character-literals.asm::char(13): [-Wunmapped-char] + Unmapped character '\r' +warning: character-literals.asm(34) -> character-literals.asm::char(13): [-Wunmapped-char] + Unmapped character '\t' +error: character-literals.asm(34) -> character-literals.asm::char(13): + Character literals must be a single charmap unit +Assembly aborted with 3 errors! diff --git a/test/asm/garbage_sequence.asm b/test/asm/garbage_sequence.asm index d9be77da..c88ecd4a 100644 --- a/test/asm/garbage_sequence.asm +++ b/test/asm/garbage_sequence.asm @@ -1,3 +1,3 @@ assert 1 +# 1 == 2 -assert 2 '?* 2 == 4 -assert 3 **?''?##?? 3 == 27 +assert 2 ?ÿ* 2 == 4 +assert 3 **?ÿ?##?? 3 == 27 diff --git a/test/asm/garbage_sequence.err b/test/asm/garbage_sequence.err index 69abd8c1..353c0cde 100644 --- a/test/asm/garbage_sequence.err +++ b/test/asm/garbage_sequence.err @@ -1,9 +1,9 @@ error: garbage_sequence.asm(1): Unknown character '#' error: garbage_sequence.asm(2): - Unknown characters ''', '?' + Unknown characters '?', 0xFF error: garbage_sequence.asm(3): - Unknown characters '?', ''', ''', '?' + Unknown characters '?', 0xFF, '?' error: garbage_sequence.asm(3): Unknown character '#' error: garbage_sequence.asm(3):