Implement [[ fragment literals ]] (#1614)

This feature is referred to as "code/data literals" in ASMotor,
and simply as "literals" in some older assemblers like MIDAS
for the PDP-10. RGBASM already had the "section fragments"
feature for keeping disparate contents together when linked,
so these worked naturally as "fragment literals".
This commit is contained in:
Rangi
2025-07-09 12:13:01 -04:00
committed by GitHub
parent 5e43ece578
commit 41ab5dff5a
19 changed files with 343 additions and 13 deletions

View File

@@ -354,6 +354,7 @@ void LexerState::clear(uint32_t lineNo_) {
mode = LEXER_NORMAL;
atLineStart = true;
lastToken = T_(YYEOF);
nextToken = 0;
ifStack.clear();
@@ -1735,6 +1736,11 @@ static bool isGarbageCharacter(int c) {
}
static Token yylex_NORMAL() {
if (int nextToken = lexerState->nextToken; nextToken) {
lexerState->nextToken = 0;
return Token(nextToken);
}
for (;;) {
int c = nextChar();
@@ -1758,10 +1764,6 @@ static Token yylex_NORMAL() {
return Token(T_(SYMBOL), symName);
}
case '[':
return Token(T_(LBRACK));
case ']':
return Token(T_(RBRACK));
case '(':
return Token(T_(LPAREN));
case ')':
@@ -1771,6 +1773,23 @@ static Token yylex_NORMAL() {
// Handle ambiguous 1- or 2-char tokens
case '[': // Either [ or [[
if (peek() == '[') {
shiftChar();
return Token(T_(LBRACKS));
}
return Token(T_(LBRACK));
case ']': // Either ] or ]]
if (peek() == ']') {
shiftChar();
// `[[ Fragment literals ]]` inject an EOL token to end their contents
// even without a newline. Retroactively lex the `]]` after it.
lexerState->nextToken = T_(RBRACKS);
return Token(T_(EOL));
}
return Token(T_(RBRACK));
case '+': // Either +=, ADD, or CAT
switch (peek()) {
case '=':