Reuse isWhitespace and isNewline, also refactoring readAtFile

2025-11-20 10:12:06 +00:00 · 2025-08-05 13:46:53 -04:00
parent 98c5c7f776
commit 504a45a4ed
5 changed files with 34 additions and 61 deletions
--- a/include/util.hpp
+++ b/include/util.hpp
@@ -12,6 +12,9 @@
 #include "helpers.hpp"
 bool isWhitespace(int c);
 bool isNewline(int c);
 bool startsIdentifier(int c);
 bool continuesIdentifier(int c);
--- a/src/asm/lexer.cpp
+++ b/src/asm/lexer.cpp
@@ -253,14 +253,6 @@ static UpperMap<int> const keywordDict{
 static auto ldio = keywordDict.find("LDIO");
 static bool isWhitespace(int c) {
 	return c == ' ' || c == '\t';
 }
 static bool isNewline(int c) {
 	return c == '\r' || c == '\n';
 }
 static LexerState *lexerState = nullptr;
 static LexerState *lexerStateEOL = nullptr;
--- a/src/gfx/main.cpp
+++ b/src/gfx/main.cpp
@@ -20,6 +20,7 @@
 #include "platform.hpp"
 #include "style.hpp"
 #include "usage.hpp"
 #include "util.hpp"
 #include "verbosity.hpp"
 #include "version.hpp"
@@ -210,68 +211,44 @@ static std::vector<size_t> readAtFile(std::string const &path, std::vector<char>
 		fatal("Error reading @%s: %s", file.c_str(path), strerror(errno));
 	}
-	// We only filter out `EOF`, but calling `isblank()` on anything else is UB!
+	for (std::vector<size_t> argvOfs;;) {
-	static_assert(
+		int c = file->sbumpc();
 	    std::streambuf::traits_type::eof() == EOF,
 	    "isblank(std::streambuf::traits_type::eof()) is UB!"
 	);
 	std::vector<size_t> argvOfs;
 	for (;;) {
 		int c;
 		// First, discard any leading whitespace
-		do {
+		while (isWhitespace(c)) {
 			c = file->sbumpc();
-			if (c == EOF) {
+		}
 				return argvOfs;
 			}
 		} while (isblank(c));
-		switch (c) {
+		// If it's a comment, discard everything until EOL
-		case '#': // If it's a comment, discard everything until EOL
+		if (c == '#') {
-			while ((c = file->sbumpc()) != '\n') {
+			c = file->sbumpc();
-				if (c == EOF) {
+			while (c != EOF && !isNewline(c)) {
-					return argvOfs;
+				c = file->sbumpc();
 				}
 			}
-			continue; // Start processing the next line
+		}
-		// If it's an empty line, ignore it
+
-		case '\r':          // Assuming CRLF here
+		if (c == EOF) {
-			file->sbumpc(); // Discard the upcoming '\n'
+			return argvOfs;
-			[[fallthrough]];
+		} else if (isNewline(c)) {
 		case '\n':
 			continue; // Start processing the next line
 		}
 		// Alright, now we can parse the line
 		do {
 			argvOfs.push_back(argPool.size());
 			// Read one argument (until the next whitespace char).
 			// We know there is one because we already have its first character in `c`.
-			argvOfs.push_back(argPool.size());
+			for (; c != EOF && !isNewline(c) && !isWhitespace(c); c = file->sbumpc()) {
 			// Reading and appending characters one at a time may be inefficient, but I'm counting
 			// on `vector` and `sbumpc` to do the right thing here.
 			argPool.push_back(c); // Push the character we've already read
 			for (;;) {
 				c = file->sbumpc();
 				if (c == EOF || c == '\n' || isblank(c)) {
 					break;
 				} else if (c == '\r') {
 					file->sbumpc(); // Discard the '\n'
 					break;
 				}
 				argPool.push_back(c);
 			}
 			argPool.push_back('\0');
 			// Discard whitespace until the next argument (candidate)
-			while (isblank(c)) {
+			while (isWhitespace(c)) {
 				c = file->sbumpc();
 			}
-			if (c == '\r') {
+		} while (c != EOF && !isNewline(c)); // End if we reached EOL
 				c = file->sbumpc(); // Skip the '\n'
 			}
 		} while (c != '\n' && c != EOF); // End if we reached EOL
 	}
 }
--- a/src/link/lexer.cpp
+++ b/src/link/lexer.cpp
@@ -60,14 +60,6 @@ void lexer_IncLineNo() {
 	++lexerStack.back().lineNo;
 }
 static bool isWhiteSpace(int c) {
 	return c == ' ' || c == '\t';
 }
 static bool isNewline(int c) {
 	return c == '\r' || c == '\n';
 }
 yy::parser::symbol_type yylex(); // Forward declaration for `yywrap`
 static yy::parser::symbol_type yywrap() {
@@ -268,7 +260,7 @@ yy::parser::symbol_type yylex() {
 	int c = context.file.sbumpc();
 	// First, skip leading whitespace.
-	while (isWhiteSpace(c)) {
+	while (isWhitespace(c)) {
 		c = context.file.sbumpc();
 	}
 	// Then, skip a comment if applicable.
--- a/src/util.cpp
+++ b/src/util.cpp
@@ -5,6 +5,14 @@
 #include <stdint.h>
 #include <stdio.h>
 bool isWhitespace(int c) {
 	return c == ' ' || c == '\t';
 }
 bool isNewline(int c) {
 	return c == '\r' || c == '\n';
 }
 bool startsIdentifier(int c) {
 	// This returns false for anonymous labels, which internally start with a '!',
 	// and for section fragment literal labels, which internally start with a '$'.
@@ -25,7 +33,8 @@ char const *printChar(int c) {
 		return "EOF";
 	}
-	if (isprint(c)) {
+	// Handle printable ASCII characters
 	if (c >= ' ' && c <= '~') {
 		buf[0] = '\'';
 		buf[1] = c;
 		buf[2] = '\'';