Implement [[ fragment literals ]] (#1614)

This feature is referred to as "code/data literals" in ASMotor,
and simply as "literals" in some older assemblers like MIDAS
for the PDP-10. RGBASM already had the "section fragments"
feature for keeping disparate contents together when linked,
so these worked naturally as "fragment literals".
This commit is contained in:
Rangi
2025-07-09 12:13:01 -04:00
committed by GitHub
parent 5e43ece578
commit 41ab5dff5a
19 changed files with 343 additions and 13 deletions

View File

@@ -84,6 +84,7 @@ struct LexerState {
bool atLineStart;
uint32_t lineNo;
int lastToken;
int nextToken;
std::deque<IfStackEntry> ifStack;

View File

@@ -104,4 +104,6 @@ void sect_PushSection();
void sect_PopSection();
void sect_CheckStack();
std::string sect_PushSectionFragmentLiteral();
#endif // RGBDS_ASM_SECTION_HPP

View File

@@ -1112,6 +1112,120 @@ first, followed by the one from
and the one from
.Ql bar.o
last.
.Ss Fragment literals
Fragment literals are useful for short blocks of code or data that are only referenced once.
They are section fragments created by surrounding instructions or directives with
.Ql [[
double brackets
.Ql ]] ,
without a separate
.Ic SECTION FRAGMENT
declaration.
.Pp
The content of a fragment literal becomes a
.Ic SECTION FRAGMENT ,
sharing the same name and bank as its parent ROM section, but without any other constraints.
The parent section also becomes a
.Ic FRAGMENT
if it was not one already, so that it can be merged with its fragment literals.
RGBLINK merges the fragments in no particular order.
.Pp
A fragment literal can take the place of any 16-bit integer constant
.Ql n16
from the
.Xr gbz80 7
documentation, as well as a
.Ic DW
item.
The fragment literal then evaluates to its starting address.
For example, you can
.Ic CALL
or
.Ic JP
to a fragment literal.
.Pp
This code using named labels:
.Bd -literal -offset indent
DataTable:
dw First
dw Second
dw Third
First: db 1
Second: db 4
Third: db 9
Routine:
push hl
ld hl, Left
jr z, .got_it
ld hl, Right
\&.got_it
call .print
pop hl
ret
\&.print:
ld de, $1003
ld bc, STARTOF(VRAM)
jp Print
Left: db "left\e0"
Right: db "right\e0"
.Ed
.Pp
is equivalent to this code using fragment literals:
.Bd -literal -offset indent
DataTable:
dw [[ db 1 ]]
dw [[ db 4 ]]
dw [[ db 9 ]]
Routine:
push hl
ld hl, [[ db "left\e0" ]]
jr z, .got_it
ld hl, [[ db "right\e0" ]]
\&.got_it
call [[
ld de, $1003
ld bc, STARTOF(VRAM)
jp Print
]]
pop hl
ret
.Ed
.Pp
The difference is that the example using fragment literals does not declare a particular order for its pieces.
.Pp
Fragment literals can be arbitrarily nested, so extreme use cases are
.Em technically
possible.
This code using named labels:
.Bd -literal -offset indent
dw FortyTwo
FortyTwo:
call Sub1
jr Sub2
Sub1:
ld a, [Twenty]
ret
Twenty: db 20
Sub2:
jp Sub3
Sub3:
call Sub1
inc a
add a
ret
.Ed
.Pp
is equivalent to this code using fragment literals:
.Bd -literal -offset indent
dw [[
call [[
Sub1: ld a, [ [[db 20]] ] :: ret
]]
jr [[
jp [[ call Sub1 :: inc a :: add a :: ret ]]
]]
]]
.Ed
.Sh SYMBOLS
RGBDS supports several types of symbols:
.Bl -hang

View File

@@ -354,6 +354,7 @@ void LexerState::clear(uint32_t lineNo_) {
mode = LEXER_NORMAL;
atLineStart = true;
lastToken = T_(YYEOF);
nextToken = 0;
ifStack.clear();
@@ -1735,6 +1736,11 @@ static bool isGarbageCharacter(int c) {
}
static Token yylex_NORMAL() {
if (int nextToken = lexerState->nextToken; nextToken) {
lexerState->nextToken = 0;
return Token(nextToken);
}
for (;;) {
int c = nextChar();
@@ -1758,10 +1764,6 @@ static Token yylex_NORMAL() {
return Token(T_(SYMBOL), symName);
}
case '[':
return Token(T_(LBRACK));
case ']':
return Token(T_(RBRACK));
case '(':
return Token(T_(LPAREN));
case ')':
@@ -1771,6 +1773,23 @@ static Token yylex_NORMAL() {
// Handle ambiguous 1- or 2-char tokens
case '[': // Either [ or [[
if (peek() == '[') {
shiftChar();
return Token(T_(LBRACKS));
}
return Token(T_(LBRACK));
case ']': // Either ] or ]]
if (peek() == ']') {
shiftChar();
// `[[ Fragment literals ]]` inject an EOL token to end their contents
// even without a newline. Retroactively lex the `]]` after it.
lexerState->nextToken = T_(RBRACKS);
return Token(T_(EOL));
}
return Token(T_(RBRACK));
case '+': // Either +=, ADD, or CAT
switch (peek()) {
case '=':

View File

@@ -68,8 +68,12 @@ static uint32_t getSectIDIfAny(Section *sect) {
return UINT32_MAX;
}
if (auto search = sectionMap.find(sect->name); search != sectionMap.end()) {
return static_cast<uint32_t>(search->second);
// Search in `sectionList` instead of `sectionMap`, since section fragments share the
// same name but have different IDs
if (auto search =
std::find_if(RANGE(sectionList), [&sect](Section const &s) { return &s == sect; });
search != sectionList.end()) {
return static_cast<uint32_t>(std::distance(sectionList.begin(), search));
}
// Every section that exists should be in `sectionMap`

View File

@@ -113,11 +113,13 @@
%token YYEOF 0 "end of file"
%token NEWLINE "end of line"
%token EOB "end of buffer"
%token EOL "end of fragment literal"
// General punctuation
%token COMMA ","
%token COLON ":" DOUBLE_COLON "::"
%token LBRACK "[" RBRACK "]"
%token LBRACKS "[[" RBRACKS "]]"
%token LPAREN "(" RPAREN ")"
// Arithmetic operators
@@ -381,6 +383,8 @@
// `scoped_sym_no_anon` exists because anonymous labels usually count as "scoped symbols", but some
// contexts treat anonymous labels and other labels/symbols differently, e.g. `purge` or `export`.
%type <std::string> scoped_sym_no_anon
%type <std::string> fragment_literal
%type <std::string> fragment_literal_name
// SM83 instruction parameters
%type <int32_t> reg_r
@@ -455,7 +459,7 @@ line:
| line_directive // Directives that manage newlines themselves
;
endofline: NEWLINE | EOB;
endofline: NEWLINE | EOB | EOL;
// For "logistical" reasons, these directives must manage newlines themselves.
// This is because we need to switch the lexer's mode *after* the newline has been read,
@@ -1308,6 +1312,12 @@ constlist_16bit_entry:
}
);
}
| fragment_literal {
Expression expr;
expr.makeSymbol($1);
expr.checkNBit(16);
sect_RelWord(expr, 0);
}
;
constlist_32bit:
@@ -1358,6 +1368,23 @@ reloc_16bit:
$$ = std::move($1);
$$.checkNBit(16);
}
| fragment_literal {
$$.makeSymbol($1);
}
;
fragment_literal:
LBRACKS fragment_literal_name asm_file RBRACKS {
sect_PopSection();
$$ = std::move($2);
}
;
fragment_literal_name:
%empty {
$$ = sect_PushSectionFragmentLiteral();
sym_AddLabel($$);
}
;
relocexpr:

View File

@@ -22,6 +22,8 @@
#include "asm/symbol.hpp"
#include "asm/warning.hpp"
using namespace std::literals;
uint8_t fillByte;
struct UnionStackEntry {
@@ -311,6 +313,32 @@ static Section *createSection(
return &sect;
}
// Create a new section fragment literal, not yet in the list.
static Section *createSectionFragmentLiteral(Section const &parent) {
// Add the new section to the list, but do not update the map
Section &sect = sectionList.emplace_back();
assume(sectionMap.find(parent.name) != sectionMap.end());
sect.name = parent.name;
sect.type = parent.type;
sect.modifier = SECTION_FRAGMENT;
sect.src = fstk_GetFileStack();
sect.fileLine = lexer_GetLineNo();
sect.size = 0;
sect.org = UINT32_MAX;
sect.bank = parent.bank == 0 ? UINT32_MAX : parent.bank;
sect.align = 0;
sect.alignOfs = 0;
out_RegisterNode(sect.src);
// Section fragment literals must be ROM sections.
assume(sect_HasData(sect.type));
sect.data.resize(sectionTypeInfo[sect.type].size);
return &sect;
}
// Find a section by name and type. If it doesn't exist, create it.
static Section *getSection(
std::string const &name,
@@ -1067,3 +1095,40 @@ void sect_EndSection() {
currentSection = nullptr;
sym_ResetCurrentLabelScopes();
}
std::string sect_PushSectionFragmentLiteral() {
static uint64_t nextFragmentLiteralID = 0;
// Like `requireCodeSection` but fatal
if (!currentSection) {
fatal("Cannot output fragment literals outside of a SECTION");
}
if (!sect_HasData(currentSection->type)) {
fatal(
"Section '%s' cannot contain fragment literals (not ROM0 or ROMX)",
currentSection->name.c_str()
);
}
if (currentLoadSection) {
fatal("`LOAD` blocks cannot contain fragment literals");
}
if (currentSection->modifier == SECTION_UNION) {
fatal("`SECTION UNION` cannot contain fragment literals");
}
// A section containing a fragment literal has to become a fragment too
currentSection->modifier = SECTION_FRAGMENT;
Section *parent = currentSection;
sect_PushSection(); // Resets `currentSection`
Section *sect = createSectionFragmentLiteral(*parent);
changeSection();
curOffset = sect->size;
currentSection = sect;
// Return a symbol ID to use for the address of this section fragment
return "$"s + std::to_string(nextFragmentLiteralID++);
}

View File

@@ -7,7 +7,8 @@
#include <stdio.h>
bool startsIdentifier(int c) {
// This returns false for anonymous labels, which internally start with a '!'
// This returns false for anonymous labels, which internally start with a '!',
// and for section fragment literal labels, which internally start with a '$'.
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '.' || c == '_';
}

View File

@@ -1,15 +1,15 @@
error: code-after-endm-endr-endc.asm(6):
syntax error, unexpected PRINTLN, expecting end of line or end of buffer
syntax error, unexpected PRINTLN, expecting end of line or end of buffer or end of fragment literal
error: code-after-endm-endr-endc.asm(7):
Macro "mac" not defined
error: code-after-endm-endr-endc.asm(12):
syntax error, unexpected PRINTLN, expecting end of line or end of buffer
syntax error, unexpected PRINTLN, expecting end of line or end of buffer or end of fragment literal
error: code-after-endm-endr-endc.asm(17):
syntax error, unexpected PRINTLN, expecting end of line
error: code-after-endm-endr-endc.asm(19):
syntax error, unexpected PRINTLN, expecting end of line or end of buffer
syntax error, unexpected PRINTLN, expecting end of line or end of buffer or end of fragment literal
error: code-after-endm-endr-endc.asm(23):
syntax error, unexpected PRINTLN, expecting end of line
error: code-after-endm-endr-endc.asm(25):
syntax error, unexpected PRINTLN, expecting end of line or end of buffer
syntax error, unexpected PRINTLN, expecting end of line or end of buffer or end of fragment literal
Assembly aborted with 7 errors!

View File

@@ -0,0 +1,14 @@
SECTION "OAMDMACode", ROM0
OAMDMACode:
LOAD "hOAMDMA", HRAM
hOAMDMA::
ldh [$ff46], a
ld a, 40
jp [[
: dec a
jr nz, :-
ret
]]
.end
ENDL
OAMDMACodeEnd:

View File

@@ -0,0 +1,2 @@
FATAL: fragment-literal-in-load.asm(7):
`LOAD` blocks cannot contain fragment literals

View File

@@ -0,0 +1,9 @@
SECTION "RAM", WRAM0
wFoo:: db
wBar:: ds 3
println "ok"
wQux:: dw [[
ds 4
println "inline"
]]

View File

@@ -0,0 +1,2 @@
FATAL: fragment-literal-in-ram.asm(6):
Section 'RAM' cannot contain fragment literals (not ROM0 or ROMX)

View File

@@ -0,0 +1 @@
ok

View File

@@ -0,0 +1,5 @@
SECTION UNION "U", ROM0
db $11
dw [[ db $22 ]]
SECTION UNION "U", ROM0
db $33

View File

@@ -0,0 +1,2 @@
FATAL: fragment-literal-in-union.asm(3):
`SECTION UNION` cannot contain fragment literals

View File

@@ -0,0 +1,62 @@
SECTION "1", ROM0[0]
DEF VERSION EQU $11
GetVersion::
ld a, [ [[db VERSION]] ]
ret
SECTION "2", ROM0, ALIGN[4]
MACRO text
db \1, 0
ENDM
MACRO text_pointer
dw [[
text \1
]]
ENDM
GetText::
ld hl, [[
dw [[ db "Alpha", 0 ]]
dw [[
text "Beta"
]]
text_pointer "Gamma"
dw 0
]]
ld c, a
ld b, 0
add hl, bc
add hl, bc
ld a, [hli]
ld h, [hl]
ld l, a
ret
SECTION "C", ROM0
Foo::
call [[ jp [[ jp [[ ret ]] ]] ]]
call [[
Label::
call GetVersion
DEF MYTEXT EQU 3
ld a, MYTEXT
call GetText
ld b, h
ld c, l
ret
]]
jp [[
Bar:
inc hl
.loop
nop
: dec l
jr nz, :-
dec h
jr nz, .loop
ret
]]

Binary file not shown.

View File

@@ -7,5 +7,5 @@ error: syntax-error-after-syntax-error.asm(6):
error: syntax-error-after-syntax-error.asm(9):
syntax error, unexpected :
error: syntax-error-after-syntax-error.asm(10):
syntax error, unexpected stop, expecting end of line or end of buffer or ::
syntax error, unexpected stop, expecting end of line or end of buffer or end of fragment literal or ::
Assembly aborted with 5 errors!