diff --git a/src/ZoneCodeGeneratorLib/Parsing/Matcher/AbstractMatcher.h b/src/ZoneCodeGeneratorLib/Parsing/Matcher/AbstractMatcher.h index 5a6a16bc..90891c82 100644 --- a/src/ZoneCodeGeneratorLib/Parsing/Matcher/AbstractMatcher.h +++ b/src/ZoneCodeGeneratorLib/Parsing/Matcher/AbstractMatcher.h @@ -15,11 +15,13 @@ class AbstractMatcher public: static constexpr int NO_ID = -1; + typedef std::vector> token_list_t; + private: int m_tag_id; int m_capture_id; bool m_no_consume; - std::function>&)> m_transform_func; + std::function m_transform_func; protected: AbstractMatcher() @@ -68,26 +70,29 @@ public: if (m_tag_id != NO_ID) result.m_tags.push_back(m_tag_id); + if (m_transform_func) + { + std::vector> tokens; + tokens.reserve(result.m_consumed_token_count); + + for (auto i = 0u; i < result.m_consumed_token_count; i++) + tokens.emplace_back(lexer->GetToken(tokenOffset + i)); + + result.m_fabricated_tokens.push_back(m_transform_func(tokens)); + + result.m_matched_tokens.clear(); + result.m_matched_tokens.emplace_back(result.m_fabricated_tokens.size() - 1, true); + } + else if(result.m_matched_tokens.empty()) + { + for (auto i = 0u; i < result.m_consumed_token_count; i++) + result.m_matched_tokens.emplace_back(tokenOffset + i, false); + } + if (m_capture_id != NO_ID) { - if (m_transform_func) - { - std::vector> tokens; - tokens.reserve(result.m_consumed_token_count); - - for (auto i = 0u; i < result.m_consumed_token_count; i++) - tokens.emplace_back(lexer->GetToken(tokenOffset + i)); - - result.m_fabricated_tokens.push_back(m_transform_func(tokens)); - result.m_captures.emplace_back(m_capture_id, result.m_fabricated_tokens.size() - 1, true); - } - else - { - for (auto i = 0u; i < result.m_consumed_token_count; i++) - { - result.m_captures.emplace_back(m_capture_id, tokenOffset + i); - } - } + for (const auto& match : result.m_matched_tokens) + result.m_captures.emplace_back(m_capture_id, match); } return result; diff --git a/src/ZoneCodeGeneratorLib/Parsing/Matcher/AbstractMatcherFactory.h b/src/ZoneCodeGeneratorLib/Parsing/Matcher/AbstractMatcherFactory.h index 068392bd..bed9699a 100644 --- a/src/ZoneCodeGeneratorLib/Parsing/Matcher/AbstractMatcherFactory.h +++ b/src/ZoneCodeGeneratorLib/Parsing/Matcher/AbstractMatcherFactory.h @@ -20,6 +20,8 @@ class MatcherFactoryWrapper std::unique_ptr> m_matcher; public: + typedef typename AbstractMatcher::token_list_t token_list_t; + explicit MatcherFactoryWrapper(std::unique_ptr> matcher) : m_matcher(std::move(matcher)) { @@ -43,7 +45,7 @@ public: return *this; } - MatcherFactoryWrapper& Transform(std::function>&)> transform) + MatcherFactoryWrapper& Transform(std::function transform) { m_matcher->SetTransform(std::move(transform)); return *this; @@ -76,6 +78,8 @@ class AbstractMatcherFactory const IMatcherForLabelSupplier* m_label_supplier; public: + typedef typename AbstractMatcher::token_list_t token_list_t; + explicit AbstractMatcherFactory(const IMatcherForLabelSupplier* labelSupplier) : m_label_supplier(labelSupplier) { diff --git a/src/ZoneCodeGeneratorLib/Parsing/Matcher/MatcherResult.h b/src/ZoneCodeGeneratorLib/Parsing/Matcher/MatcherResult.h index 8e0aab13..e9b18abe 100644 --- a/src/ZoneCodeGeneratorLib/Parsing/Matcher/MatcherResult.h +++ b/src/ZoneCodeGeneratorLib/Parsing/Matcher/MatcherResult.h @@ -13,29 +13,19 @@ class MatcherResult static_assert(std::is_base_of::value); public: - class Capture + class TokenIndex { static constexpr unsigned FABRICATED_FLAG_MASK = std::numeric_limits::max() ^ std::numeric_limits::max(); static constexpr unsigned TOKEN_INDEX_MASK = ~FABRICATED_FLAG_MASK; - int m_capture_id; unsigned m_token_index; public: - Capture(const int captureId, const unsigned tokenIndex) - : Capture(captureId, tokenIndex, false) + TokenIndex(const unsigned index, const bool isFabricated) { - } - - Capture(const int captureId, const unsigned tokenIndex, const bool isFabricated) - : m_capture_id(captureId), - m_token_index(!isFabricated ? tokenIndex : tokenIndex | FABRICATED_FLAG_MASK) - { - } - - _NODISCARD int GetCaptureId() const - { - return m_capture_id; + m_token_index = index & TOKEN_INDEX_MASK; + if (isFabricated) + m_token_index |= FABRICATED_FLAG_MASK; } _NODISCARD bool IsFabricated() const @@ -49,10 +39,40 @@ public: } }; + class Capture + { + public: + int m_capture_id; + TokenIndex m_token_index; + + Capture(const int captureId, const unsigned tokenIndex) + : Capture(captureId, tokenIndex, false) + { + } + + Capture(const int captureId, const unsigned tokenIndex, const bool isFabricated) + : m_capture_id(captureId), + m_token_index(tokenIndex, isFabricated) + { + } + + Capture(const int captureId, const TokenIndex index) + : m_capture_id(captureId), + m_token_index(index) + { + } + + _NODISCARD int GetCaptureId() const + { + return m_capture_id; + } + }; + bool m_matches; unsigned m_consumed_token_count; std::vector m_tags; std::vector m_captures; + std::vector m_matched_tokens; std::vector m_fabricated_tokens; private: @@ -82,13 +102,21 @@ public: for (const auto& capture : other.m_captures) { - if (capture.IsFabricated()) - m_captures.emplace_back(capture.GetCaptureId(), m_fabricated_tokens.size() + capture.GetTokenIndex(), true); + if (capture.m_token_index.IsFabricated()) + m_captures.emplace_back(capture.GetCaptureId(), TokenIndex(m_fabricated_tokens.size() + capture.m_token_index.GetTokenIndex(), true)); else - m_captures.emplace_back(capture.GetCaptureId(), capture.GetTokenIndex()); + m_captures.emplace_back(capture.GetCaptureId(), capture.m_token_index); } - for(auto& fabricated : other.m_fabricated_tokens) + for (const auto& token : other.m_matched_tokens) + { + if (token.IsFabricated()) + m_matched_tokens.emplace_back(m_fabricated_tokens.size() + token.GetTokenIndex(), true); + else + m_matched_tokens.emplace_back(token.GetTokenIndex(), false); + } + + for (auto& fabricated : other.m_fabricated_tokens) { m_fabricated_tokens.emplace_back(std::move(fabricated)); } diff --git a/src/ZoneCodeGeneratorLib/Parsing/Sequence/SequenceResult.h b/src/ZoneCodeGeneratorLib/Parsing/Sequence/SequenceResult.h index 63b1b8a0..c2f4d327 100644 --- a/src/ZoneCodeGeneratorLib/Parsing/Sequence/SequenceResult.h +++ b/src/ZoneCodeGeneratorLib/Parsing/Sequence/SequenceResult.h @@ -37,10 +37,10 @@ public: { for (const typename MatcherResult::Capture& capture : result.m_captures) { - if (capture.IsFabricated()) - m_captures[capture.GetCaptureId()].m_tokens.push_back(result.m_fabricated_tokens[capture.GetTokenIndex()]); + if (capture.m_token_index.IsFabricated()) + m_captures[capture.GetCaptureId()].m_tokens.push_back(result.m_fabricated_tokens[capture.m_token_index.GetTokenIndex()]); else - m_captures[capture.GetCaptureId()].m_tokens.push_back(lexer->GetToken(capture.GetTokenIndex())); + m_captures[capture.GetCaptureId()].m_tokens.push_back(lexer->GetToken(capture.m_token_index.GetTokenIndex())); } } diff --git a/test/ZoneCodeGeneratorLibTests/Parsing/Matcher/MatcherTests.cpp b/test/ZoneCodeGeneratorLibTests/Parsing/Matcher/MatcherTests.cpp index 05ed573a..60409bdb 100644 --- a/test/ZoneCodeGeneratorLibTests/Parsing/Matcher/MatcherTests.cpp +++ b/test/ZoneCodeGeneratorLibTests/Parsing/Matcher/MatcherTests.cpp @@ -1,5 +1,7 @@ #include +#include + #include "Utils/ClassUtils.h" #include "Parsing/Header/Impl/HeaderParserValue.h" #include "Parsing/Header/Matcher/HeaderMatcherFactory.h" @@ -812,9 +814,15 @@ namespace test::parsing::matcher create.Char(':'), create.Identifier() })) - }).Transform([](std::vector>& values) + }).Transform([](HeaderMatcherFactory::token_list_t& values) { - return HeaderParserValue::TypeName(values[0].get().GetPos(), new std::string()); + std::ostringstream str; + str << values[0].get().IdentifierValue(); + + for (auto i = 3u; i < values.size(); i += 3) + str << "::" << values[i].get().IdentifierValue(); + + return HeaderParserValue::TypeName(values[0].get().GetPos(), new std::string(str.str())); }) }, LABEL_TYPENAME); @@ -845,4 +853,122 @@ namespace test::parsing::matcher REQUIRE(test.PerformTest()); REQUIRE(test.GetConsumedTokenCount() == 6); } + + TEST_CASE("Matcher: Can capture within transform", "[parsing][matcher]") + { + static constexpr auto LABEL_TYPENAME = 1; + + static constexpr auto CAPTURE_TYPENAME = 1; + static constexpr auto CAPTURE_FIRST_TYPENAME_IDENTIFIER = 2; + + MatchersTestsHelper test; + const TokenPos pos; + const auto create = test.Factory(); + test.Matchers( + { + create.Type(HeaderParserValueType::STRUCT), + create.Label(LABEL_TYPENAME).Capture(CAPTURE_TYPENAME), + create.Char('{') + }); + test.LabeledMatchers( + { + create.And({ + create.Identifier().Capture(CAPTURE_FIRST_TYPENAME_IDENTIFIER), + create.OptionalLoop(create.And({ + create.Char(':'), + create.Char(':'), + create.Identifier() + })) + }).Transform([](HeaderMatcherFactory::token_list_t& values) + { + std::ostringstream str; + str << values[0].get().IdentifierValue(); + + for (auto i = 3u; i < values.size(); i += 3) + str << "::" << values[i].get().IdentifierValue(); + + return HeaderParserValue::TypeName(values[0].get().GetPos(), new std::string(str.str())); + }) + }, LABEL_TYPENAME); + + test.Tokens({ + HeaderParserValue::Keyword(pos, HeaderParserValueType::STRUCT), + HeaderParserValue::Identifier(pos, new std::string("hello")), + HeaderParserValue::Character(pos, ':'), + HeaderParserValue::Character(pos, ':'), + HeaderParserValue::Identifier(pos, new std::string("world")), + HeaderParserValue::Character(pos, '{'), + HeaderParserValue::Invalid(pos) + }); + + test.MatchCallback([](sequence_result_t& result) + { + REQUIRE(result.NextTag() == matcher_t::NO_ID); + + REQUIRE(result.HasNextCapture(CAPTURE_TYPENAME)); + { + const auto& capture = result.NextCapture(CAPTURE_TYPENAME); + REQUIRE(capture.m_type == HeaderParserValueType::TYPE_NAME); + REQUIRE(capture.TypeNameValue() == "hello::world"); + } + + REQUIRE(!result.HasNextCapture(CAPTURE_TYPENAME)); + + REQUIRE(result.HasNextCapture(CAPTURE_FIRST_TYPENAME_IDENTIFIER)); + { + const auto& capture = result.NextCapture(CAPTURE_FIRST_TYPENAME_IDENTIFIER); + REQUIRE(capture.m_type == HeaderParserValueType::IDENTIFIER); + REQUIRE(capture.IdentifierValue() == "hello"); + } + + REQUIRE(!result.HasNextCapture(CAPTURE_FIRST_TYPENAME_IDENTIFIER)); + }); + + REQUIRE(test.PerformTest()); + REQUIRE(test.GetConsumedTokenCount() == 6); + } + + TEST_CASE("Matcher: Can transform and capture in the same matcher", "[parsing][matcher]") + { + static constexpr auto CAPTURE_NAME = 1; + + MatchersTestsHelper test; + const TokenPos pos; + const auto create = test.Factory(); + test.Matchers( + { + create.Type(HeaderParserValueType::STRUCT), + create.Identifier().Capture(CAPTURE_NAME).Transform([](HeaderMatcherFactory::token_list_t& tokens) + { + auto str = tokens[0].get().IdentifierValue(); + std::transform(str.begin(), str.end(), str.begin(), toupper); + return HeaderParserValue::Identifier(tokens[0].get().GetPos(), new std::string(std::move(str))); + }), + create.Char('{') + }); + + test.Tokens({ + HeaderParserValue::Keyword(pos, HeaderParserValueType::STRUCT), + HeaderParserValue::Identifier(pos, new std::string("hello_world")), + HeaderParserValue::Character(pos, '{'), + HeaderParserValue::Invalid(pos) + }); + + test.MatchCallback([](sequence_result_t& result) + { + REQUIRE(result.NextTag() == matcher_t::NO_ID); + + REQUIRE(result.HasNextCapture(CAPTURE_NAME)); + { + const auto& capture = result.NextCapture(CAPTURE_NAME); + REQUIRE(capture.m_type == HeaderParserValueType::IDENTIFIER); + REQUIRE(capture.IdentifierValue() == "HELLO_WORLD"); + } + + REQUIRE(!result.HasNextCapture(CAPTURE_NAME)); + }); + + REQUIRE(test.PerformTest()); + REQUIRE(test.GetConsumedTokenCount() == 3); + } }