Implement [[ fragment literals ]] (#1614)

This feature is referred to as "code/data literals" in ASMotor,
and simply as "literals" in some older assemblers like MIDAS
for the PDP-10. RGBASM already had the "section fragments"
feature for keeping disparate contents together when linked,
so these worked naturally as "fragment literals".
This commit is contained in:
Rangi
2025-07-09 12:13:01 -04:00
committed by GitHub
parent 5e43ece578
commit 41ab5dff5a
19 changed files with 343 additions and 13 deletions

View File

@@ -354,6 +354,7 @@ void LexerState::clear(uint32_t lineNo_) {
mode = LEXER_NORMAL;
atLineStart = true;
lastToken = T_(YYEOF);
nextToken = 0;
ifStack.clear();
@@ -1735,6 +1736,11 @@ static bool isGarbageCharacter(int c) {
}
static Token yylex_NORMAL() {
if (int nextToken = lexerState->nextToken; nextToken) {
lexerState->nextToken = 0;
return Token(nextToken);
}
for (;;) {
int c = nextChar();
@@ -1758,10 +1764,6 @@ static Token yylex_NORMAL() {
return Token(T_(SYMBOL), symName);
}
case '[':
return Token(T_(LBRACK));
case ']':
return Token(T_(RBRACK));
case '(':
return Token(T_(LPAREN));
case ')':
@@ -1771,6 +1773,23 @@ static Token yylex_NORMAL() {
// Handle ambiguous 1- or 2-char tokens
case '[': // Either [ or [[
if (peek() == '[') {
shiftChar();
return Token(T_(LBRACKS));
}
return Token(T_(LBRACK));
case ']': // Either ] or ]]
if (peek() == ']') {
shiftChar();
// `[[ Fragment literals ]]` inject an EOL token to end their contents
// even without a newline. Retroactively lex the `]]` after it.
lexerState->nextToken = T_(RBRACKS);
return Token(T_(EOL));
}
return Token(T_(RBRACK));
case '+': // Either +=, ADD, or CAT
switch (peek()) {
case '=':

View File

@@ -68,8 +68,12 @@ static uint32_t getSectIDIfAny(Section *sect) {
return UINT32_MAX;
}
if (auto search = sectionMap.find(sect->name); search != sectionMap.end()) {
return static_cast<uint32_t>(search->second);
// Search in `sectionList` instead of `sectionMap`, since section fragments share the
// same name but have different IDs
if (auto search =
std::find_if(RANGE(sectionList), [&sect](Section const &s) { return &s == sect; });
search != sectionList.end()) {
return static_cast<uint32_t>(std::distance(sectionList.begin(), search));
}
// Every section that exists should be in `sectionMap`

View File

@@ -113,11 +113,13 @@
%token YYEOF 0 "end of file"
%token NEWLINE "end of line"
%token EOB "end of buffer"
%token EOL "end of fragment literal"
// General punctuation
%token COMMA ","
%token COLON ":" DOUBLE_COLON "::"
%token LBRACK "[" RBRACK "]"
%token LBRACKS "[[" RBRACKS "]]"
%token LPAREN "(" RPAREN ")"
// Arithmetic operators
@@ -381,6 +383,8 @@
// `scoped_sym_no_anon` exists because anonymous labels usually count as "scoped symbols", but some
// contexts treat anonymous labels and other labels/symbols differently, e.g. `purge` or `export`.
%type <std::string> scoped_sym_no_anon
%type <std::string> fragment_literal
%type <std::string> fragment_literal_name
// SM83 instruction parameters
%type <int32_t> reg_r
@@ -455,7 +459,7 @@ line:
| line_directive // Directives that manage newlines themselves
;
endofline: NEWLINE | EOB;
endofline: NEWLINE | EOB | EOL;
// For "logistical" reasons, these directives must manage newlines themselves.
// This is because we need to switch the lexer's mode *after* the newline has been read,
@@ -1308,6 +1312,12 @@ constlist_16bit_entry:
}
);
}
| fragment_literal {
Expression expr;
expr.makeSymbol($1);
expr.checkNBit(16);
sect_RelWord(expr, 0);
}
;
constlist_32bit:
@@ -1358,6 +1368,23 @@ reloc_16bit:
$$ = std::move($1);
$$.checkNBit(16);
}
| fragment_literal {
$$.makeSymbol($1);
}
;
fragment_literal:
LBRACKS fragment_literal_name asm_file RBRACKS {
sect_PopSection();
$$ = std::move($2);
}
;
fragment_literal_name:
%empty {
$$ = sect_PushSectionFragmentLiteral();
sym_AddLabel($$);
}
;
relocexpr:

View File

@@ -22,6 +22,8 @@
#include "asm/symbol.hpp"
#include "asm/warning.hpp"
using namespace std::literals;
uint8_t fillByte;
struct UnionStackEntry {
@@ -311,6 +313,32 @@ static Section *createSection(
return &sect;
}
// Create a new section fragment literal, not yet in the list.
static Section *createSectionFragmentLiteral(Section const &parent) {
// Add the new section to the list, but do not update the map
Section &sect = sectionList.emplace_back();
assume(sectionMap.find(parent.name) != sectionMap.end());
sect.name = parent.name;
sect.type = parent.type;
sect.modifier = SECTION_FRAGMENT;
sect.src = fstk_GetFileStack();
sect.fileLine = lexer_GetLineNo();
sect.size = 0;
sect.org = UINT32_MAX;
sect.bank = parent.bank == 0 ? UINT32_MAX : parent.bank;
sect.align = 0;
sect.alignOfs = 0;
out_RegisterNode(sect.src);
// Section fragment literals must be ROM sections.
assume(sect_HasData(sect.type));
sect.data.resize(sectionTypeInfo[sect.type].size);
return &sect;
}
// Find a section by name and type. If it doesn't exist, create it.
static Section *getSection(
std::string const &name,
@@ -1067,3 +1095,40 @@ void sect_EndSection() {
currentSection = nullptr;
sym_ResetCurrentLabelScopes();
}
std::string sect_PushSectionFragmentLiteral() {
static uint64_t nextFragmentLiteralID = 0;
// Like `requireCodeSection` but fatal
if (!currentSection) {
fatal("Cannot output fragment literals outside of a SECTION");
}
if (!sect_HasData(currentSection->type)) {
fatal(
"Section '%s' cannot contain fragment literals (not ROM0 or ROMX)",
currentSection->name.c_str()
);
}
if (currentLoadSection) {
fatal("`LOAD` blocks cannot contain fragment literals");
}
if (currentSection->modifier == SECTION_UNION) {
fatal("`SECTION UNION` cannot contain fragment literals");
}
// A section containing a fragment literal has to become a fragment too
currentSection->modifier = SECTION_FRAGMENT;
Section *parent = currentSection;
sect_PushSection(); // Resets `currentSection`
Section *sect = createSectionFragmentLiteral(*parent);
changeSection();
curOffset = sect->size;
currentSection = sect;
// Return a symbol ID to use for the address of this section fragment
return "$"s + std::to_string(nextFragmentLiteralID++);
}