diff --git a/man/rgblink.5 b/man/rgblink.5 index da1ed26e..245dfc57 100644 --- a/man/rgblink.5 +++ b/man/rgblink.5 @@ -24,18 +24,20 @@ They are simply ignored. .Pp Keywords are composed of letters and digits (but they can't start with a digit); they are all case-insensitive. .Pp -Numbers can be written in decimal format, or in binary using the -.Ql % -prefix, or in hexadecimal using the -.Ql $ -prefix (hexadecimal digits are case-insensitive). -Note that unlike -.Xr rgbasm 5 , -an octal -.Ql & -prefix is not supported, nor are -.Ql _ -digit separators. +Numbers can be written in a number of formats. +.Bl -column -offset indent "Hexadecimal" "Possible prefixes" +.It Sy Format type Ta Sy Possible prefixes Ta Sy Accepted characters +.It Decimal Ta none Ta 0123456789 +.It Hexadecimal Ta Li $ , 0x , 0X Ta 0123456789ABCDEF +.It Octal Ta Li & , 0o , 0O Ta 01234567 +.It Binary Ta Li % , 0b , 0B Ta 01 +.El +.Pp +Underscores are also accepted in numbers, except at the beginning of one. +This can be useful for grouping digits, like +.Ql 1_234 +or +.Ql $ff_80 . .Pp Strings begin with a double quote, and end at the next (non-escaped) double quote. Strings must not contain literal newline characters. @@ -46,8 +48,9 @@ are supported, specifically .Ql \e" , .Ql \en , .Ql \er , +.Ql \et , and -.Ql \et . +.Ql \e0 . Other backslash escape sequences in .Xr rgbasm 5 are only relevant to assembly code and do not apply in linker scripts. diff --git a/src/link/script.y b/src/link/script.y index 6cd4ecca..6e43332d 100644 --- a/src/link/script.y +++ b/src/link/script.y @@ -17,9 +17,9 @@ #include #include #include + #include #include #include - #include #include #include #include @@ -209,18 +209,100 @@ static bool isNewline(int c) { return c == '\r' || c == '\n'; } +static yy::parser::symbol_type yywrap() { + if (lexerStack.size() != 1) { + if (!atEof) { + // Inject a newline at EOF to simplify parsing. + atEof = true; + return yy::parser::make_newline(); + } + lexerStack.pop_back(); + return yylex(); + } + if (!atEof) { + // Inject a newline at EOF to simplify parsing. + atEof = true; + return yy::parser::make_newline(); + } + return yy::parser::make_YYEOF(); +} + static bool isIdentChar(int c) { return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9'); } +static std::string readIdent(int c) { + auto &context = lexerStack.back(); + std::string ident; + ident.push_back(c); + for (c = context.file.sgetc(); isIdentChar(c); c = context.file.snextc()) { + ident.push_back(c); + } + return ident; +} + static bool isDecDigit(int c) { return c >= '0' && c <= '9'; } +static yy::parser::symbol_type parseDecNumber(int c) { + auto &context = lexerStack.back(); + uint32_t number = c - '0'; + for (c = context.file.sgetc(); isDecDigit(c) || c == '_'; c = context.file.sgetc()) { + if (c != '_') { + number = number * 10 + (c - '0'); + } + context.file.sbumpc(); + } + return yy::parser::make_number(number); +} + static bool isBinDigit(int c) { return c >= '0' && c <= '1'; } +static yy::parser::symbol_type parseBinNumber(char const *prefix) { + auto &context = lexerStack.back(); + auto c = context.file.sgetc(); + if (!isBinDigit(c)) { + scriptError(context, "No binary digits found after '%s'", prefix); + return yy::parser::make_number(0); + } + + uint32_t number = c - '0'; + context.file.sbumpc(); + for (c = context.file.sgetc(); isBinDigit(c) || c == '_'; c = context.file.sgetc()) { + if (c != '_') { + number = number * 2 + (c - '0'); + } + context.file.sbumpc(); + } + return yy::parser::make_number(number); +} + +static bool isOctDigit(int c) { + return c >= '0' && c <= '7'; +} + +static yy::parser::symbol_type parseOctNumber(char const *prefix) { + auto &context = lexerStack.back(); + auto c = context.file.sgetc(); + if (!isOctDigit(c)) { + scriptError(context, "No octal digits found after '%s'", prefix); + return yy::parser::make_number(0); + } + + uint32_t number = c - '0'; + context.file.sbumpc(); + for (c = context.file.sgetc(); isOctDigit(c) || c == '_'; c = context.file.sgetc()) { + if (c != '_') { + number = number * 8 + (c - '0'); + } + context.file.sbumpc(); + } + return yy::parser::make_number(number); +} + static bool isHexDigit(int c) { return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); } @@ -237,6 +319,88 @@ static uint8_t parseHexDigit(int c) { } } +static yy::parser::symbol_type parseHexNumber(char const *prefix) { + auto &context = lexerStack.back(); + auto c = context.file.sgetc(); + if (!isHexDigit(c)) { + scriptError(context, "No hexadecimal digits found after '%s'", prefix); + return yy::parser::make_number(0); + } + + uint32_t number = parseHexDigit(c); + context.file.sbumpc(); + for (c = context.file.sgetc(); isHexDigit(c) || c == '_'; c = context.file.sgetc()) { + if (c != '_') { + number = number * 16 + parseHexDigit(c); + } + context.file.sbumpc(); + } + return yy::parser::make_number(number); +} + +static yy::parser::symbol_type parseNumber(int c) { + auto &context = lexerStack.back(); + if (c == '0') { + switch (context.file.sgetc()) { + case 'x': + context.file.sbumpc(); + return parseHexNumber("0x"); + case 'X': + context.file.sbumpc(); + return parseHexNumber("0X"); + case 'o': + context.file.sbumpc(); + return parseOctNumber("0o"); + case 'O': + context.file.sbumpc(); + return parseOctNumber("0O"); + case 'b': + context.file.sbumpc(); + return parseBinNumber("0b"); + case 'B': + context.file.sbumpc(); + return parseBinNumber("0B"); + } + } + return parseDecNumber(c); +} + +static yy::parser::symbol_type parseString() { + auto &context = lexerStack.back(); + auto c = context.file.sgetc(); + std::string str; + for (; c != '"'; c = context.file.sgetc()) { + if (c == EOF || isNewline(c)) { + scriptError(context, "Unterminated string"); + break; + } + context.file.sbumpc(); + if (c == '\\') { + c = context.file.sgetc(); + if (c == EOF || isNewline(c)) { + scriptError(context, "Unterminated string"); + break; + } else if (c == 'n') { + c = '\n'; + } else if (c == 'r') { + c = '\r'; + } else if (c == 't') { + c = '\t'; + } else if (c == '0') { + c = '\0'; + } else if (c != '\\' && c != '"' && c != '\'') { + scriptError(context, "Cannot escape character %s", printChar(c)); + } + context.file.sbumpc(); + } + str.push_back(c); + } + if (c == '"') { + context.file.sbumpc(); + } + return yy::parser::make_string(std::move(str)); +} + yy::parser::symbol_type yylex() { auto &context = lexerStack.back(); auto c = context.file.sbumpc(); @@ -254,23 +418,7 @@ yy::parser::symbol_type yylex() { // Alright, what token should we return? if (c == EOF) { - // Basically yywrap(). - if (lexerStack.size() != 1) { - if (!atEof) { - // Inject a newline at EOF to simplify parsing. - atEof = true; - return yy::parser::make_newline(); - } else { - lexerStack.pop_back(); - return yylex(); - } - } else if (!atEof) { - // Inject a newline at EOF to simplify parsing. - atEof = true; - return yy::parser::make_newline(); - } else { - return yy::parser::make_YYEOF(); - } + return yywrap(); } else if (c == ',') { return yy::parser::make_COMMA(); } else if (isNewline(c)) { @@ -280,85 +428,21 @@ yy::parser::symbol_type yylex() { } return yy::parser::make_newline(); } else if (c == '"') { - std::string str; - - for (c = context.file.sgetc(); c != '"'; c = context.file.sgetc()) { - if (c == EOF || isNewline(c)) { - scriptError(context, "Unterminated string"); - break; - } - context.file.sbumpc(); - if (c == '\\') { - c = context.file.sgetc(); - if (c == EOF || isNewline(c)) { - scriptError(context, "Unterminated string"); - break; - } else if (c == 'n') { - c = '\n'; - } else if (c == 'r') { - c = '\r'; - } else if (c == 't') { - c = '\t'; - } else if (c != '\\' && c != '"' && c != '\'') { - scriptError(context, "Cannot escape character %s", printChar(c)); - } - context.file.sbumpc(); - } - str.push_back(c); - } - if (c == '"') { - context.file.sbumpc(); - } - - return yy::parser::make_string(std::move(str)); + return parseString(); } else if (c == '$') { - c = context.file.sgetc(); - if (!isHexDigit(c)) { - scriptError(context, "No hexadecimal digits found after '$'"); - return yy::parser::make_number(0); - } - - uint32_t number = parseHexDigit(c); - context.file.sbumpc(); - for (c = context.file.sgetc(); isHexDigit(c); c = context.file.sgetc()) { - number = number * 16 + parseHexDigit(c); - context.file.sbumpc(); - } - return yy::parser::make_number(number); + return parseHexNumber("$"); } else if (c == '%') { - c = context.file.sgetc(); - if (!isBinDigit(c)) { - scriptError(context, "No binary digits found after '%%'"); - return yy::parser::make_number(0); - } - - uint32_t number = c - '0'; - context.file.sbumpc(); - for (c = context.file.sgetc(); isBinDigit(c); c = context.file.sgetc()) { - number = number * 2 + (c - '0'); - context.file.sbumpc(); - } - return yy::parser::make_number(number); + return parseBinNumber("%"); + } else if (c == '&') { + return parseOctNumber("&"); } else if (isDecDigit(c)) { - uint32_t number = c - '0'; - for (c = context.file.sgetc(); isDecDigit(c); c = context.file.sgetc()) { - number = number * 10 + (c - '0'); - context.file.sbumpc(); - } - return yy::parser::make_number(number); + return parseNumber(c); } else if (isIdentChar(c)) { // Note that we match these *after* digit characters! - std::string ident; - auto strUpperCmp = [](char cmp, char ref) { - // `locale::classic()` yields the "C" locale. - assume(!std::use_facet>(std::locale::classic()) - .is(std::ctype_base::lower, ref)); - return std::use_facet>(std::locale::classic()).toupper(cmp) == ref; - }; + std::string ident = readIdent(c); - ident.push_back(c); - for (c = context.file.sgetc(); isIdentChar(c); c = context.file.snextc()) { - ident.push_back(c); - } + auto strUpperCmp = [](char cmp, char ref) { + return toupper(cmp) == ref; + }; for (SectionType type : EnumSeq(SECTTYPE_INVALID)) { if (std::equal(RANGE(ident), RANGE(sectionTypeInfo[type].name), strUpperCmp)) { diff --git a/test/link/script-num-fmt.link b/test/link/script-num-fmt.link index 1465027b..09386e9f 100644 --- a/test/link/script-num-fmt.link +++ b/test/link/script-num-fmt.link @@ -1,5 +1,9 @@ ROM0 - org 42 - org %101010 - org $2A + org 4_2 + org %10_10_10 + org &52_ + org $2A_ + org 0b101_010 + org 0o5_2 + org 0x2_A org 41 ; Error! diff --git a/test/link/script-num-fmt.out b/test/link/script-num-fmt.out index 6e3ca3ec..3ea2a44a 100644 --- a/test/link/script-num-fmt.out +++ b/test/link/script-num-fmt.out @@ -1,2 +1,2 @@ -error: script-num-fmt.link(5): Cannot decrease the current address (from $002a to $0029) +error: script-num-fmt.link(9): Cannot decrease the current address (from $002a to $0029) Linking failed with 1 error diff --git a/test/link/script-okay.link b/test/link/script-okay.link new file mode 100644 index 00000000..5a5014fc --- /dev/null +++ b/test/link/script-okay.link @@ -0,0 +1,8 @@ +ROM0 + "ROM0" + "\\\"\'\n\r\t\0" +ROMX 1 + "ROM1" +ROMX 2 + "ROM2 1K" + "ROM2 1" diff --git a/test/link/script-okay.out b/test/link/script-okay.out new file mode 100644 index 00000000..e69de29b diff --git a/test/link/script.asm b/test/link/script.asm index 1b272ba5..44f175c9 100644 --- a/test/link/script.asm +++ b/test/link/script.asm @@ -7,3 +7,4 @@ SECTION "ROM2 1K", ROMX,BANK[2] ds $1000 SECTION "ROM2 1", ROMX,BANK[2] ds 1 +SECTION "\\\"\'\n\r\t\0", ROM0