From 41ab5dff5a2996117c2798af2687ff75b482a429 Mon Sep 17 00:00:00 2001 From: Rangi <35663410+Rangi42@users.noreply.github.com> Date: Wed, 9 Jul 2025 12:13:01 -0400 Subject: [PATCH] Implement `[[ fragment literals ]]` (#1614) This feature is referred to as "code/data literals" in ASMotor, and simply as "literals" in some older assemblers like MIDAS for the PDP-10. RGBASM already had the "section fragments" feature for keeping disparate contents together when linked, so these worked naturally as "fragment literals". --- include/asm/lexer.hpp | 1 + include/asm/section.hpp | 2 + man/rgbasm.5 | 114 +++++++++++++++++++ src/asm/lexer.cpp | 27 ++++- src/asm/output.cpp | 8 +- src/asm/parser.y | 29 ++++- src/asm/section.cpp | 65 +++++++++++ src/util.cpp | 3 +- test/asm/code-after-endm-endr-endc.err | 8 +- test/asm/fragment-literal-in-load.asm | 14 +++ test/asm/fragment-literal-in-load.err | 2 + test/asm/fragment-literal-in-ram.asm | 9 ++ test/asm/fragment-literal-in-ram.err | 2 + test/asm/fragment-literal-in-ram.out | 1 + test/asm/fragment-literal-in-union.asm | 5 + test/asm/fragment-literal-in-union.err | 2 + test/asm/fragment-literals.asm | 62 ++++++++++ test/asm/fragment-literals.out.bin | Bin 0 -> 89 bytes test/asm/syntax-error-after-syntax-error.err | 2 +- 19 files changed, 343 insertions(+), 13 deletions(-) create mode 100644 test/asm/fragment-literal-in-load.asm create mode 100644 test/asm/fragment-literal-in-load.err create mode 100644 test/asm/fragment-literal-in-ram.asm create mode 100644 test/asm/fragment-literal-in-ram.err create mode 100644 test/asm/fragment-literal-in-ram.out create mode 100644 test/asm/fragment-literal-in-union.asm create mode 100644 test/asm/fragment-literal-in-union.err create mode 100644 test/asm/fragment-literals.asm create mode 100644 test/asm/fragment-literals.out.bin diff --git a/include/asm/lexer.hpp b/include/asm/lexer.hpp index 8dfdeba8..a55532d3 100644 --- a/include/asm/lexer.hpp +++ b/include/asm/lexer.hpp @@ -84,6 +84,7 @@ struct LexerState { bool atLineStart; uint32_t lineNo; int lastToken; + int nextToken; std::deque ifStack; diff --git a/include/asm/section.hpp b/include/asm/section.hpp index 9df672da..9ef80efc 100644 --- a/include/asm/section.hpp +++ b/include/asm/section.hpp @@ -104,4 +104,6 @@ void sect_PushSection(); void sect_PopSection(); void sect_CheckStack(); +std::string sect_PushSectionFragmentLiteral(); + #endif // RGBDS_ASM_SECTION_HPP diff --git a/man/rgbasm.5 b/man/rgbasm.5 index 9f991701..967bb33e 100644 --- a/man/rgbasm.5 +++ b/man/rgbasm.5 @@ -1112,6 +1112,120 @@ first, followed by the one from and the one from .Ql bar.o last. +.Ss Fragment literals +Fragment literals are useful for short blocks of code or data that are only referenced once. +They are section fragments created by surrounding instructions or directives with +.Ql [[ +double brackets +.Ql ]] , +without a separate +.Ic SECTION FRAGMENT +declaration. +.Pp +The content of a fragment literal becomes a +.Ic SECTION FRAGMENT , +sharing the same name and bank as its parent ROM section, but without any other constraints. +The parent section also becomes a +.Ic FRAGMENT +if it was not one already, so that it can be merged with its fragment literals. +RGBLINK merges the fragments in no particular order. +.Pp +A fragment literal can take the place of any 16-bit integer constant +.Ql n16 +from the +.Xr gbz80 7 +documentation, as well as a +.Ic DW +item. +The fragment literal then evaluates to its starting address. +For example, you can +.Ic CALL +or +.Ic JP +to a fragment literal. +.Pp +This code using named labels: +.Bd -literal -offset indent +DataTable: + dw First + dw Second + dw Third +First: db 1 +Second: db 4 +Third: db 9 +Routine: + push hl + ld hl, Left + jr z, .got_it + ld hl, Right +\&.got_it + call .print + pop hl + ret +\&.print: + ld de, $1003 + ld bc, STARTOF(VRAM) + jp Print +Left: db "left\e0" +Right: db "right\e0" +.Ed +.Pp +is equivalent to this code using fragment literals: +.Bd -literal -offset indent +DataTable: + dw [[ db 1 ]] + dw [[ db 4 ]] + dw [[ db 9 ]] +Routine: + push hl + ld hl, [[ db "left\e0" ]] + jr z, .got_it + ld hl, [[ db "right\e0" ]] +\&.got_it + call [[ + ld de, $1003 + ld bc, STARTOF(VRAM) + jp Print + ]] + pop hl + ret +.Ed +.Pp +The difference is that the example using fragment literals does not declare a particular order for its pieces. +.Pp +Fragment literals can be arbitrarily nested, so extreme use cases are +.Em technically +possible. +This code using named labels: +.Bd -literal -offset indent +dw FortyTwo +FortyTwo: + call Sub1 + jr Sub2 +Sub1: + ld a, [Twenty] + ret +Twenty: db 20 +Sub2: + jp Sub3 +Sub3: + call Sub1 + inc a + add a + ret +.Ed +.Pp +is equivalent to this code using fragment literals: +.Bd -literal -offset indent +dw [[ + call [[ + Sub1: ld a, [ [[db 20]] ] :: ret + ]] + jr [[ + jp [[ call Sub1 :: inc a :: add a :: ret ]] + ]] +]] +.Ed .Sh SYMBOLS RGBDS supports several types of symbols: .Bl -hang diff --git a/src/asm/lexer.cpp b/src/asm/lexer.cpp index d66cb544..a4c4b25c 100644 --- a/src/asm/lexer.cpp +++ b/src/asm/lexer.cpp @@ -354,6 +354,7 @@ void LexerState::clear(uint32_t lineNo_) { mode = LEXER_NORMAL; atLineStart = true; lastToken = T_(YYEOF); + nextToken = 0; ifStack.clear(); @@ -1735,6 +1736,11 @@ static bool isGarbageCharacter(int c) { } static Token yylex_NORMAL() { + if (int nextToken = lexerState->nextToken; nextToken) { + lexerState->nextToken = 0; + return Token(nextToken); + } + for (;;) { int c = nextChar(); @@ -1758,10 +1764,6 @@ static Token yylex_NORMAL() { return Token(T_(SYMBOL), symName); } - case '[': - return Token(T_(LBRACK)); - case ']': - return Token(T_(RBRACK)); case '(': return Token(T_(LPAREN)); case ')': @@ -1771,6 +1773,23 @@ static Token yylex_NORMAL() { // Handle ambiguous 1- or 2-char tokens + case '[': // Either [ or [[ + if (peek() == '[') { + shiftChar(); + return Token(T_(LBRACKS)); + } + return Token(T_(LBRACK)); + + case ']': // Either ] or ]] + if (peek() == ']') { + shiftChar(); + // `[[ Fragment literals ]]` inject an EOL token to end their contents + // even without a newline. Retroactively lex the `]]` after it. + lexerState->nextToken = T_(RBRACKS); + return Token(T_(EOL)); + } + return Token(T_(RBRACK)); + case '+': // Either +=, ADD, or CAT switch (peek()) { case '=': diff --git a/src/asm/output.cpp b/src/asm/output.cpp index e5a6b197..72a919c9 100644 --- a/src/asm/output.cpp +++ b/src/asm/output.cpp @@ -68,8 +68,12 @@ static uint32_t getSectIDIfAny(Section *sect) { return UINT32_MAX; } - if (auto search = sectionMap.find(sect->name); search != sectionMap.end()) { - return static_cast(search->second); + // Search in `sectionList` instead of `sectionMap`, since section fragments share the + // same name but have different IDs + if (auto search = + std::find_if(RANGE(sectionList), [§](Section const &s) { return &s == sect; }); + search != sectionList.end()) { + return static_cast(std::distance(sectionList.begin(), search)); } // Every section that exists should be in `sectionMap` diff --git a/src/asm/parser.y b/src/asm/parser.y index 71da6665..e0e8e2c7 100644 --- a/src/asm/parser.y +++ b/src/asm/parser.y @@ -113,11 +113,13 @@ %token YYEOF 0 "end of file" %token NEWLINE "end of line" %token EOB "end of buffer" +%token EOL "end of fragment literal" // General punctuation %token COMMA "," %token COLON ":" DOUBLE_COLON "::" %token LBRACK "[" RBRACK "]" +%token LBRACKS "[[" RBRACKS "]]" %token LPAREN "(" RPAREN ")" // Arithmetic operators @@ -381,6 +383,8 @@ // `scoped_sym_no_anon` exists because anonymous labels usually count as "scoped symbols", but some // contexts treat anonymous labels and other labels/symbols differently, e.g. `purge` or `export`. %type scoped_sym_no_anon +%type fragment_literal +%type fragment_literal_name // SM83 instruction parameters %type reg_r @@ -455,7 +459,7 @@ line: | line_directive // Directives that manage newlines themselves ; -endofline: NEWLINE | EOB; +endofline: NEWLINE | EOB | EOL; // For "logistical" reasons, these directives must manage newlines themselves. // This is because we need to switch the lexer's mode *after* the newline has been read, @@ -1308,6 +1312,12 @@ constlist_16bit_entry: } ); } + | fragment_literal { + Expression expr; + expr.makeSymbol($1); + expr.checkNBit(16); + sect_RelWord(expr, 0); + } ; constlist_32bit: @@ -1358,6 +1368,23 @@ reloc_16bit: $$ = std::move($1); $$.checkNBit(16); } + | fragment_literal { + $$.makeSymbol($1); + } +; + +fragment_literal: + LBRACKS fragment_literal_name asm_file RBRACKS { + sect_PopSection(); + $$ = std::move($2); + } +; + +fragment_literal_name: + %empty { + $$ = sect_PushSectionFragmentLiteral(); + sym_AddLabel($$); + } ; relocexpr: diff --git a/src/asm/section.cpp b/src/asm/section.cpp index ce301113..4a9b9b5f 100644 --- a/src/asm/section.cpp +++ b/src/asm/section.cpp @@ -22,6 +22,8 @@ #include "asm/symbol.hpp" #include "asm/warning.hpp" +using namespace std::literals; + uint8_t fillByte; struct UnionStackEntry { @@ -311,6 +313,32 @@ static Section *createSection( return § } +// Create a new section fragment literal, not yet in the list. +static Section *createSectionFragmentLiteral(Section const &parent) { + // Add the new section to the list, but do not update the map + Section § = sectionList.emplace_back(); + assume(sectionMap.find(parent.name) != sectionMap.end()); + + sect.name = parent.name; + sect.type = parent.type; + sect.modifier = SECTION_FRAGMENT; + sect.src = fstk_GetFileStack(); + sect.fileLine = lexer_GetLineNo(); + sect.size = 0; + sect.org = UINT32_MAX; + sect.bank = parent.bank == 0 ? UINT32_MAX : parent.bank; + sect.align = 0; + sect.alignOfs = 0; + + out_RegisterNode(sect.src); + + // Section fragment literals must be ROM sections. + assume(sect_HasData(sect.type)); + sect.data.resize(sectionTypeInfo[sect.type].size); + + return § +} + // Find a section by name and type. If it doesn't exist, create it. static Section *getSection( std::string const &name, @@ -1067,3 +1095,40 @@ void sect_EndSection() { currentSection = nullptr; sym_ResetCurrentLabelScopes(); } + +std::string sect_PushSectionFragmentLiteral() { + static uint64_t nextFragmentLiteralID = 0; + + // Like `requireCodeSection` but fatal + if (!currentSection) { + fatal("Cannot output fragment literals outside of a SECTION"); + } + if (!sect_HasData(currentSection->type)) { + fatal( + "Section '%s' cannot contain fragment literals (not ROM0 or ROMX)", + currentSection->name.c_str() + ); + } + + if (currentLoadSection) { + fatal("`LOAD` blocks cannot contain fragment literals"); + } + if (currentSection->modifier == SECTION_UNION) { + fatal("`SECTION UNION` cannot contain fragment literals"); + } + + // A section containing a fragment literal has to become a fragment too + currentSection->modifier = SECTION_FRAGMENT; + + Section *parent = currentSection; + sect_PushSection(); // Resets `currentSection` + + Section *sect = createSectionFragmentLiteral(*parent); + + changeSection(); + curOffset = sect->size; + currentSection = sect; + + // Return a symbol ID to use for the address of this section fragment + return "$"s + std::to_string(nextFragmentLiteralID++); +} diff --git a/src/util.cpp b/src/util.cpp index 6c525b54..c4127592 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -7,7 +7,8 @@ #include bool startsIdentifier(int c) { - // This returns false for anonymous labels, which internally start with a '!' + // This returns false for anonymous labels, which internally start with a '!', + // and for section fragment literal labels, which internally start with a '$'. return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '.' || c == '_'; } diff --git a/test/asm/code-after-endm-endr-endc.err b/test/asm/code-after-endm-endr-endc.err index 41a6fdad..e69c1c2b 100644 --- a/test/asm/code-after-endm-endr-endc.err +++ b/test/asm/code-after-endm-endr-endc.err @@ -1,15 +1,15 @@ error: code-after-endm-endr-endc.asm(6): - syntax error, unexpected PRINTLN, expecting end of line or end of buffer + syntax error, unexpected PRINTLN, expecting end of line or end of buffer or end of fragment literal error: code-after-endm-endr-endc.asm(7): Macro "mac" not defined error: code-after-endm-endr-endc.asm(12): - syntax error, unexpected PRINTLN, expecting end of line or end of buffer + syntax error, unexpected PRINTLN, expecting end of line or end of buffer or end of fragment literal error: code-after-endm-endr-endc.asm(17): syntax error, unexpected PRINTLN, expecting end of line error: code-after-endm-endr-endc.asm(19): - syntax error, unexpected PRINTLN, expecting end of line or end of buffer + syntax error, unexpected PRINTLN, expecting end of line or end of buffer or end of fragment literal error: code-after-endm-endr-endc.asm(23): syntax error, unexpected PRINTLN, expecting end of line error: code-after-endm-endr-endc.asm(25): - syntax error, unexpected PRINTLN, expecting end of line or end of buffer + syntax error, unexpected PRINTLN, expecting end of line or end of buffer or end of fragment literal Assembly aborted with 7 errors! diff --git a/test/asm/fragment-literal-in-load.asm b/test/asm/fragment-literal-in-load.asm new file mode 100644 index 00000000..87768688 --- /dev/null +++ b/test/asm/fragment-literal-in-load.asm @@ -0,0 +1,14 @@ +SECTION "OAMDMACode", ROM0 +OAMDMACode: +LOAD "hOAMDMA", HRAM +hOAMDMA:: + ldh [$ff46], a + ld a, 40 + jp [[ +: dec a + jr nz, :- + ret + ]] +.end +ENDL +OAMDMACodeEnd: diff --git a/test/asm/fragment-literal-in-load.err b/test/asm/fragment-literal-in-load.err new file mode 100644 index 00000000..c103d4b2 --- /dev/null +++ b/test/asm/fragment-literal-in-load.err @@ -0,0 +1,2 @@ +FATAL: fragment-literal-in-load.asm(7): + `LOAD` blocks cannot contain fragment literals diff --git a/test/asm/fragment-literal-in-ram.asm b/test/asm/fragment-literal-in-ram.asm new file mode 100644 index 00000000..4b36a16a --- /dev/null +++ b/test/asm/fragment-literal-in-ram.asm @@ -0,0 +1,9 @@ +SECTION "RAM", WRAM0 + +wFoo:: db +wBar:: ds 3 + println "ok" +wQux:: dw [[ + ds 4 + println "inline" +]] diff --git a/test/asm/fragment-literal-in-ram.err b/test/asm/fragment-literal-in-ram.err new file mode 100644 index 00000000..e4ddc2f1 --- /dev/null +++ b/test/asm/fragment-literal-in-ram.err @@ -0,0 +1,2 @@ +FATAL: fragment-literal-in-ram.asm(6): + Section 'RAM' cannot contain fragment literals (not ROM0 or ROMX) diff --git a/test/asm/fragment-literal-in-ram.out b/test/asm/fragment-literal-in-ram.out new file mode 100644 index 00000000..9766475a --- /dev/null +++ b/test/asm/fragment-literal-in-ram.out @@ -0,0 +1 @@ +ok diff --git a/test/asm/fragment-literal-in-union.asm b/test/asm/fragment-literal-in-union.asm new file mode 100644 index 00000000..9f2e75cf --- /dev/null +++ b/test/asm/fragment-literal-in-union.asm @@ -0,0 +1,5 @@ +SECTION UNION "U", ROM0 + db $11 + dw [[ db $22 ]] +SECTION UNION "U", ROM0 + db $33 diff --git a/test/asm/fragment-literal-in-union.err b/test/asm/fragment-literal-in-union.err new file mode 100644 index 00000000..c368c377 --- /dev/null +++ b/test/asm/fragment-literal-in-union.err @@ -0,0 +1,2 @@ +FATAL: fragment-literal-in-union.asm(3): + `SECTION UNION` cannot contain fragment literals diff --git a/test/asm/fragment-literals.asm b/test/asm/fragment-literals.asm new file mode 100644 index 00000000..c9ab16b3 --- /dev/null +++ b/test/asm/fragment-literals.asm @@ -0,0 +1,62 @@ +SECTION "1", ROM0[0] + +DEF VERSION EQU $11 +GetVersion:: + ld a, [ [[db VERSION]] ] + ret + +SECTION "2", ROM0, ALIGN[4] + +MACRO text + db \1, 0 +ENDM + +MACRO text_pointer + dw [[ + text \1 + ]] +ENDM + +GetText:: + ld hl, [[ + dw [[ db "Alpha", 0 ]] + dw [[ + text "Beta" + ]] + text_pointer "Gamma" + dw 0 + ]] + ld c, a + ld b, 0 + add hl, bc + add hl, bc + ld a, [hli] + ld h, [hl] + ld l, a + ret + +SECTION "C", ROM0 + +Foo:: + call [[ jp [[ jp [[ ret ]] ]] ]] + call [[ +Label:: + call GetVersion + DEF MYTEXT EQU 3 + ld a, MYTEXT + call GetText + ld b, h + ld c, l + ret + ]] + jp [[ +Bar: + inc hl +.loop + nop +: dec l + jr nz, :- + dec h + jr nz, .loop + ret + ]] diff --git a/test/asm/fragment-literals.out.bin b/test/asm/fragment-literals.out.bin new file mode 100644 index 0000000000000000000000000000000000000000..b39e217402583a7cc2dcf19ca2084467ae32cf32 GIT binary patch literal 89 zcmeyx!f;ZM0Ra?c82s57I61Y_@=vNTXffymr5tk#G7=e_QcDsU+!J$i6B*9hF`RW} kI2^!m*pcC|3&Y8?3=DS6X9XBsd`~Jf=qmhGRrq-l05akh!~g&Q literal 0 HcmV?d00001 diff --git a/test/asm/syntax-error-after-syntax-error.err b/test/asm/syntax-error-after-syntax-error.err index 0961e78e..5b0d18a7 100644 --- a/test/asm/syntax-error-after-syntax-error.err +++ b/test/asm/syntax-error-after-syntax-error.err @@ -7,5 +7,5 @@ error: syntax-error-after-syntax-error.asm(6): error: syntax-error-after-syntax-error.asm(9): syntax error, unexpected : error: syntax-error-after-syntax-error.asm(10): - syntax error, unexpected stop, expecting end of line or end of buffer or :: + syntax error, unexpected stop, expecting end of line or end of buffer or end of fragment literal or :: Assembly aborted with 5 errors!