From e090e112e612e50a0959d454a0857eed221e2475 Mon Sep 17 00:00:00 2001 From: Jan Date: Sat, 30 Dec 2023 20:03:35 +0100 Subject: [PATCH] Implement token pasting --- .../Parsing/Impl/DefinesStreamProxy.cpp | 195 +++++++++++++++++- src/Parser/Parsing/Impl/DefinesStreamProxy.h | 3 +- .../Parsing/Impl/DefinesStreamProxyTests.cpp | 60 +++++- 3 files changed, 240 insertions(+), 18 deletions(-) diff --git a/src/Parser/Parsing/Impl/DefinesStreamProxy.cpp b/src/Parser/Parsing/Impl/DefinesStreamProxy.cpp index 449280ad..5ad03661 100644 --- a/src/Parser/Parsing/Impl/DefinesStreamProxy.cpp +++ b/src/Parser/Parsing/Impl/DefinesStreamProxy.cpp @@ -12,6 +12,19 @@ #include #include +namespace +{ + bool IsStringizeParameterForwardLookup(const std::string& value, unsigned pos) + { + return pos + 1 && (isalpha(value[pos + 1]) || value[pos + 1] == '_'); + } + + bool IsTokenJoiningOperatorForwardLookup(const std::string& value, unsigned pos) + { + return pos + 1 < value.size() && value[pos + 1] == '#'; + } +} // namespace + DefinesStreamProxy::DefineParameterPosition::DefineParameterPosition() : m_parameter_index(0u), m_parameter_position(0u), @@ -41,17 +54,6 @@ DefinesStreamProxy::MacroParameterState::MacroParameterState() { } -bool DefinesStreamProxy::Define::IsStringizeParameterForwardLookup(const std::string& value, unsigned pos) -{ - // Check if # is prepended to the word - return pos + 1 && (isalpha(value[pos + 1]) || value[pos + 1] == '_'); -} - -bool DefinesStreamProxy::Define::IsTokenJoiningOperatorForwardLookup(const std::string& value, unsigned pos) -{ - return pos + 1 < value.size() && value[pos + 1] == '#'; -} - void DefinesStreamProxy::Define::IdentifyTokenJoinsOnly() { for (auto i = 0u; i < m_value.size(); i++) @@ -651,6 +653,174 @@ bool DefinesStreamProxy::FindNextMacro(const std::string& input, unsigned& input return false; } +namespace +{ + enum class TokenJoinTokenType + { + NONE, + STRING, + IDENTIFIER, + SYMBOL + }; + + class TokenJoinToken + { + public: + TokenJoinToken() + : m_type(TokenJoinTokenType::NONE), + m_start(0u), + m_end(0u) + { + } + + ~TokenJoinToken() = default; + TokenJoinToken(const TokenJoinToken& other) = default; + TokenJoinToken(TokenJoinToken&& other) = default; + TokenJoinToken& operator=(const TokenJoinToken& other) = default; + TokenJoinToken& operator=(TokenJoinToken&& other) noexcept = default; + + void SetFromInput(ParserLine& line, unsigned& linePos, const std::string& input, unsigned& offset) + { + m_start = offset; + + const auto firstChar = input[offset++]; + const auto inputSize = input.size(); + if (firstChar == '"') + { + m_type = TokenJoinTokenType::STRING; + for (; offset < inputSize; offset++) + { + const auto c = input[offset]; + if (c == '\\') + offset++; // Skip next char + else if (c == '"') + break; + } + + if (offset >= inputSize) + throw new ParsingException(TokenPos(*line.m_filename, line.m_line_number, static_cast(linePos + 1)), + "Token-pasting operator cannot be used on unclosed string"); + + offset++; + } + else if (isalpha(firstChar) || firstChar == '_') + { + m_type = TokenJoinTokenType::IDENTIFIER; + for (; offset < inputSize; offset++) + { + const auto c = input[offset]; + if (!isalnum(c) && c != '_') + break; + } + } + else + { + m_type = TokenJoinTokenType::SYMBOL; + } + + m_end = offset; + } + + void EmitValue(std::ostream& out, const std::string& input) const + { + if (m_end <= m_start) + return; + + if (m_type == TokenJoinTokenType::STRING) + { + if (m_end - m_start > 2) + out << std::string(input, m_start + 1, m_end - m_start - 2); + } + else + { + assert(m_type == TokenJoinTokenType::IDENTIFIER || m_type == TokenJoinTokenType::SYMBOL); + out << std::string(input, m_start, m_end - m_start); + } + } + + TokenJoinTokenType m_type; + unsigned m_start; + unsigned m_end; + }; + + void EmitJoinedToken( + ParserLine& line, unsigned& linePos, std::ostream& out, const std::string& input, const TokenJoinToken& token0, const TokenJoinToken& token1) + { + if ((token0.m_type == TokenJoinTokenType::STRING) != (token1.m_type == TokenJoinTokenType::STRING)) + throw new ParsingException(TokenPos(*line.m_filename, line.m_line_number, static_cast(linePos + 1)), + "String token can only use token-pasting operator on other string token"); + if (token0.m_type == TokenJoinTokenType::STRING) + { + out << '"'; + token0.EmitValue(out, input); + token1.EmitValue(out, input); + out << '"'; + } + else + { + assert(token0.m_type == TokenJoinTokenType::IDENTIFIER || token0.m_type == TokenJoinTokenType::SYMBOL); + + token0.EmitValue(out, input); + token1.EmitValue(out, input); + } + } +} // namespace + +void DefinesStreamProxy::ProcessTokenJoiningOperators( + ParserLine& line, unsigned& linePos, std::vector& callstack, std::string& input, unsigned& inputPos) +{ + std::ostringstream ss; + + auto joinNext = false; + TokenJoinToken previousToken; + TokenJoinToken currentToken; + + const auto inputSize = input.size(); + for (auto i = 0u; i < inputSize;) + { + const auto c = input[i]; + + if (isspace(c)) + { + i++; + continue; + } + + if (c == '#' && IsTokenJoiningOperatorForwardLookup(input, i)) + { + if (currentToken.m_type == TokenJoinTokenType::NONE) + throw new ParsingException(CreatePos(line, linePos), "Cannot use token-joining operator without previous token"); + + if (previousToken.m_end < currentToken.m_start) + ss << std::string(input, previousToken.m_end, currentToken.m_start - previousToken.m_end); + + previousToken = currentToken; + joinNext = true; + + // Skip second # + i += 2; + } + else + { + currentToken.SetFromInput(line, linePos, input, i); + if (joinNext) + { + EmitJoinedToken(line, linePos, ss, input, previousToken, currentToken); + previousToken = currentToken; + joinNext = false; + } + } + } + + if (inputSize > previousToken.m_end) + ss << std::string(input, previousToken.m_end, inputSize - previousToken.m_end); + + if (joinNext) + throw new ParsingException(CreatePos(line, linePos), "Cannot use token-joining operator without following token"); + + input = ss.str(); +} + void DefinesStreamProxy::InsertMacroParameters(std::ostringstream& out, const DefinesStreamProxy::Define* macro, std::vector& parameterValues) { if (parameterValues.empty() || macro->m_parameter_positions.empty()) @@ -696,6 +866,9 @@ void DefinesStreamProxy::ExpandMacro(ParserLine& line, unsigned nestedPos = 0; ProcessNestedMacros(line, linePos, callstack, str, nestedPos); + if (macro->m_contains_token_joining_operators) + ProcessTokenJoiningOperators(line, linePos, callstack, str, nestedPos); + out << str; } diff --git a/src/Parser/Parsing/Impl/DefinesStreamProxy.h b/src/Parser/Parsing/Impl/DefinesStreamProxy.h index 0a51f01e..73071eb6 100644 --- a/src/Parser/Parsing/Impl/DefinesStreamProxy.h +++ b/src/Parser/Parsing/Impl/DefinesStreamProxy.h @@ -49,8 +49,6 @@ public: void IdentifyParameters(const std::vector& parameterNames); private: - static bool IsStringizeParameterForwardLookup(const std::string& value, unsigned pos); - static bool IsTokenJoiningOperatorForwardLookup(const std::string& value, unsigned pos); void IdentifyTokenJoinsOnly(); }; @@ -118,6 +116,7 @@ private: bool FindNextMacro(const std::string& input, unsigned& inputPos, unsigned& defineStart, const DefinesStreamProxy::Define*& define); + void ProcessTokenJoiningOperators(ParserLine& line, unsigned& linePos, std::vector& callstack, std::string& input, unsigned& inputPos); void InsertMacroParameters(std::ostringstream& out, const DefinesStreamProxy::Define* macro, std::vector& parameterValues); void ExpandMacro(ParserLine& line, unsigned& linePos, diff --git a/test/ParserTests/Parsing/Impl/DefinesStreamProxyTests.cpp b/test/ParserTests/Parsing/Impl/DefinesStreamProxyTests.cpp index 7300a3fe..a41e8b84 100644 --- a/test/ParserTests/Parsing/Impl/DefinesStreamProxyTests.cpp +++ b/test/ParserTests/Parsing/Impl/DefinesStreamProxyTests.cpp @@ -1001,12 +1001,46 @@ namespace test::parsing::impl::defines_stream_proxy REQUIRE(proxy.Eof()); } - TEST_CASE("DefinesStreamProxy: Token-pasting operator ignores whitespace", "[parsing][parsingstream]") + TEST_CASE("DefinesStreamProxy: Can token-join symbols", "[parsing][parsingstream]") { const std::vector lines{ - "#define glue(a, b) a ## b", - "glue(\"Hello\", \"World\")", - "testMacro(Hello, 5)", + "#define GLUE(a, b) a ## b", + "GLUE(+, =)", + }; + + MockParserLineStream mockStream(lines); + DefinesStreamProxy proxy(&mockStream); + + ExpectLine(&proxy, 1, ""); + ExpectLine(&proxy, 2, "+="); + + REQUIRE(proxy.Eof()); + } + + TEST_CASE("DefinesStreamProxy: Can token-join symbols and identifiers", "[parsing][parsingstream]") + { + const std::vector lines{ + "#define GLUE(a, b) a ## b", + "GLUE(+, hello)", + "GLUE(world, =)", + }; + + MockParserLineStream mockStream(lines); + DefinesStreamProxy proxy(&mockStream); + + ExpectLine(&proxy, 1, ""); + ExpectLine(&proxy, 2, "+hello"); + ExpectLine(&proxy, 3, "world="); + + REQUIRE(proxy.Eof()); + } + + TEST_CASE("DefinesStreamProxy: Can token-join strings", "[parsing][parsingstream]") + { + const std::vector lines{ + "#define GLUE(a, b) a ## b", + "GLUE(\"Hello\", \"World\")", + "GLUE(\"\", \"Cat\")", }; MockParserLineStream mockStream(lines); @@ -1014,7 +1048,23 @@ namespace test::parsing::impl::defines_stream_proxy ExpectLine(&proxy, 1, ""); ExpectLine(&proxy, 2, "\"HelloWorld\""); - ExpectLine(&proxy, 3, "Hello5"); + ExpectLine(&proxy, 3, "\"Cat\""); + + REQUIRE(proxy.Eof()); + } + + TEST_CASE("DefinesStreamProxy: Combined string tokens keep escape sequences", "[parsing][parsingstream]") + { + const std::vector lines{ + "#define GLUE(a, b) a ## b", + "GLUE(\"He\\\"llo\", \"W\\\\orld\")", + }; + + MockParserLineStream mockStream(lines); + DefinesStreamProxy proxy(&mockStream); + + ExpectLine(&proxy, 1, ""); + ExpectLine(&proxy, 2, "\"He\\\"lloW\\\\orld\""); REQUIRE(proxy.Eof()); }