Reuse isWhitespace and isNewline, also refactoring readAtFile

This commit is contained in:
Rangi42
2025-08-05 13:46:53 -04:00
parent 98c5c7f776
commit 504a45a4ed
5 changed files with 34 additions and 61 deletions

View File

@@ -12,6 +12,9 @@
#include "helpers.hpp" #include "helpers.hpp"
bool isWhitespace(int c);
bool isNewline(int c);
bool startsIdentifier(int c); bool startsIdentifier(int c);
bool continuesIdentifier(int c); bool continuesIdentifier(int c);

View File

@@ -253,14 +253,6 @@ static UpperMap<int> const keywordDict{
static auto ldio = keywordDict.find("LDIO"); static auto ldio = keywordDict.find("LDIO");
static bool isWhitespace(int c) {
return c == ' ' || c == '\t';
}
static bool isNewline(int c) {
return c == '\r' || c == '\n';
}
static LexerState *lexerState = nullptr; static LexerState *lexerState = nullptr;
static LexerState *lexerStateEOL = nullptr; static LexerState *lexerStateEOL = nullptr;

View File

@@ -20,6 +20,7 @@
#include "platform.hpp" #include "platform.hpp"
#include "style.hpp" #include "style.hpp"
#include "usage.hpp" #include "usage.hpp"
#include "util.hpp"
#include "verbosity.hpp" #include "verbosity.hpp"
#include "version.hpp" #include "version.hpp"
@@ -210,68 +211,44 @@ static std::vector<size_t> readAtFile(std::string const &path, std::vector<char>
fatal("Error reading @%s: %s", file.c_str(path), strerror(errno)); fatal("Error reading @%s: %s", file.c_str(path), strerror(errno));
} }
// We only filter out `EOF`, but calling `isblank()` on anything else is UB! for (std::vector<size_t> argvOfs;;) {
static_assert( int c = file->sbumpc();
std::streambuf::traits_type::eof() == EOF,
"isblank(std::streambuf::traits_type::eof()) is UB!"
);
std::vector<size_t> argvOfs;
for (;;) {
int c;
// First, discard any leading whitespace // First, discard any leading whitespace
do { while (isWhitespace(c)) {
c = file->sbumpc(); c = file->sbumpc();
if (c == EOF) { }
return argvOfs;
}
} while (isblank(c));
switch (c) { // If it's a comment, discard everything until EOL
case '#': // If it's a comment, discard everything until EOL if (c == '#') {
while ((c = file->sbumpc()) != '\n') { c = file->sbumpc();
if (c == EOF) { while (c != EOF && !isNewline(c)) {
return argvOfs; c = file->sbumpc();
}
} }
continue; // Start processing the next line }
// If it's an empty line, ignore it
case '\r': // Assuming CRLF here if (c == EOF) {
file->sbumpc(); // Discard the upcoming '\n' return argvOfs;
[[fallthrough]]; } else if (isNewline(c)) {
case '\n':
continue; // Start processing the next line continue; // Start processing the next line
} }
// Alright, now we can parse the line // Alright, now we can parse the line
do { do {
argvOfs.push_back(argPool.size());
// Read one argument (until the next whitespace char). // Read one argument (until the next whitespace char).
// We know there is one because we already have its first character in `c`. // We know there is one because we already have its first character in `c`.
argvOfs.push_back(argPool.size()); for (; c != EOF && !isNewline(c) && !isWhitespace(c); c = file->sbumpc()) {
// Reading and appending characters one at a time may be inefficient, but I'm counting
// on `vector` and `sbumpc` to do the right thing here.
argPool.push_back(c); // Push the character we've already read
for (;;) {
c = file->sbumpc();
if (c == EOF || c == '\n' || isblank(c)) {
break;
} else if (c == '\r') {
file->sbumpc(); // Discard the '\n'
break;
}
argPool.push_back(c); argPool.push_back(c);
} }
argPool.push_back('\0'); argPool.push_back('\0');
// Discard whitespace until the next argument (candidate) // Discard whitespace until the next argument (candidate)
while (isblank(c)) { while (isWhitespace(c)) {
c = file->sbumpc(); c = file->sbumpc();
} }
if (c == '\r') { } while (c != EOF && !isNewline(c)); // End if we reached EOL
c = file->sbumpc(); // Skip the '\n'
}
} while (c != '\n' && c != EOF); // End if we reached EOL
} }
} }

View File

@@ -60,14 +60,6 @@ void lexer_IncLineNo() {
++lexerStack.back().lineNo; ++lexerStack.back().lineNo;
} }
static bool isWhiteSpace(int c) {
return c == ' ' || c == '\t';
}
static bool isNewline(int c) {
return c == '\r' || c == '\n';
}
yy::parser::symbol_type yylex(); // Forward declaration for `yywrap` yy::parser::symbol_type yylex(); // Forward declaration for `yywrap`
static yy::parser::symbol_type yywrap() { static yy::parser::symbol_type yywrap() {
@@ -268,7 +260,7 @@ yy::parser::symbol_type yylex() {
int c = context.file.sbumpc(); int c = context.file.sbumpc();
// First, skip leading whitespace. // First, skip leading whitespace.
while (isWhiteSpace(c)) { while (isWhitespace(c)) {
c = context.file.sbumpc(); c = context.file.sbumpc();
} }
// Then, skip a comment if applicable. // Then, skip a comment if applicable.

View File

@@ -5,6 +5,14 @@
#include <stdint.h> #include <stdint.h>
#include <stdio.h> #include <stdio.h>
bool isWhitespace(int c) {
return c == ' ' || c == '\t';
}
bool isNewline(int c) {
return c == '\r' || c == '\n';
}
bool startsIdentifier(int c) { bool startsIdentifier(int c) {
// This returns false for anonymous labels, which internally start with a '!', // This returns false for anonymous labels, which internally start with a '!',
// and for section fragment literal labels, which internally start with a '$'. // and for section fragment literal labels, which internally start with a '$'.
@@ -25,7 +33,8 @@ char const *printChar(int c) {
return "EOF"; return "EOF";
} }
if (isprint(c)) { // Handle printable ASCII characters
if (c >= ' ' && c <= '~') {
buf[0] = '\''; buf[0] = '\'';
buf[1] = c; buf[1] = c;
buf[2] = '\''; buf[2] = '\'';