diff --git a/include/util.hpp b/include/util.hpp index 2ec110e0..25568416 100644 --- a/include/util.hpp +++ b/include/util.hpp @@ -12,6 +12,9 @@ #include "helpers.hpp" +bool isWhitespace(int c); +bool isNewline(int c); + bool startsIdentifier(int c); bool continuesIdentifier(int c); diff --git a/src/asm/lexer.cpp b/src/asm/lexer.cpp index efde10c0..1d458205 100644 --- a/src/asm/lexer.cpp +++ b/src/asm/lexer.cpp @@ -253,14 +253,6 @@ static UpperMap const keywordDict{ static auto ldio = keywordDict.find("LDIO"); -static bool isWhitespace(int c) { - return c == ' ' || c == '\t'; -} - -static bool isNewline(int c) { - return c == '\r' || c == '\n'; -} - static LexerState *lexerState = nullptr; static LexerState *lexerStateEOL = nullptr; diff --git a/src/gfx/main.cpp b/src/gfx/main.cpp index a967be07..44e7a523 100644 --- a/src/gfx/main.cpp +++ b/src/gfx/main.cpp @@ -20,6 +20,7 @@ #include "platform.hpp" #include "style.hpp" #include "usage.hpp" +#include "util.hpp" #include "verbosity.hpp" #include "version.hpp" @@ -210,68 +211,44 @@ static std::vector readAtFile(std::string const &path, std::vector fatal("Error reading @%s: %s", file.c_str(path), strerror(errno)); } - // We only filter out `EOF`, but calling `isblank()` on anything else is UB! - static_assert( - std::streambuf::traits_type::eof() == EOF, - "isblank(std::streambuf::traits_type::eof()) is UB!" - ); - std::vector argvOfs; - - for (;;) { - int c; + for (std::vector argvOfs;;) { + int c = file->sbumpc(); // First, discard any leading whitespace - do { + while (isWhitespace(c)) { c = file->sbumpc(); - if (c == EOF) { - return argvOfs; - } - } while (isblank(c)); + } - switch (c) { - case '#': // If it's a comment, discard everything until EOL - while ((c = file->sbumpc()) != '\n') { - if (c == EOF) { - return argvOfs; - } + // If it's a comment, discard everything until EOL + if (c == '#') { + c = file->sbumpc(); + while (c != EOF && !isNewline(c)) { + c = file->sbumpc(); } - continue; // Start processing the next line - // If it's an empty line, ignore it - case '\r': // Assuming CRLF here - file->sbumpc(); // Discard the upcoming '\n' - [[fallthrough]]; - case '\n': + } + + if (c == EOF) { + return argvOfs; + } else if (isNewline(c)) { continue; // Start processing the next line } // Alright, now we can parse the line do { + argvOfs.push_back(argPool.size()); + // Read one argument (until the next whitespace char). // We know there is one because we already have its first character in `c`. - argvOfs.push_back(argPool.size()); - // Reading and appending characters one at a time may be inefficient, but I'm counting - // on `vector` and `sbumpc` to do the right thing here. - argPool.push_back(c); // Push the character we've already read - for (;;) { - c = file->sbumpc(); - if (c == EOF || c == '\n' || isblank(c)) { - break; - } else if (c == '\r') { - file->sbumpc(); // Discard the '\n' - break; - } + for (; c != EOF && !isNewline(c) && !isWhitespace(c); c = file->sbumpc()) { argPool.push_back(c); } argPool.push_back('\0'); // Discard whitespace until the next argument (candidate) - while (isblank(c)) { + while (isWhitespace(c)) { c = file->sbumpc(); } - if (c == '\r') { - c = file->sbumpc(); // Skip the '\n' - } - } while (c != '\n' && c != EOF); // End if we reached EOL + } while (c != EOF && !isNewline(c)); // End if we reached EOL } } diff --git a/src/link/lexer.cpp b/src/link/lexer.cpp index 9cfeb851..a913a87b 100644 --- a/src/link/lexer.cpp +++ b/src/link/lexer.cpp @@ -60,14 +60,6 @@ void lexer_IncLineNo() { ++lexerStack.back().lineNo; } -static bool isWhiteSpace(int c) { - return c == ' ' || c == '\t'; -} - -static bool isNewline(int c) { - return c == '\r' || c == '\n'; -} - yy::parser::symbol_type yylex(); // Forward declaration for `yywrap` static yy::parser::symbol_type yywrap() { @@ -268,7 +260,7 @@ yy::parser::symbol_type yylex() { int c = context.file.sbumpc(); // First, skip leading whitespace. - while (isWhiteSpace(c)) { + while (isWhitespace(c)) { c = context.file.sbumpc(); } // Then, skip a comment if applicable. diff --git a/src/util.cpp b/src/util.cpp index d00cc11b..a698e881 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -5,6 +5,14 @@ #include #include +bool isWhitespace(int c) { + return c == ' ' || c == '\t'; +} + +bool isNewline(int c) { + return c == '\r' || c == '\n'; +} + bool startsIdentifier(int c) { // This returns false for anonymous labels, which internally start with a '!', // and for section fragment literal labels, which internally start with a '$'. @@ -25,7 +33,8 @@ char const *printChar(int c) { return "EOF"; } - if (isprint(c)) { + // Handle printable ASCII characters + if (c >= ' ' && c <= '~') { buf[0] = '\''; buf[1] = c; buf[2] = '\'';