Implement sequence matcher and parser magic

This commit is contained in:
Jan 2021-02-13 00:12:26 +01:00
parent fe1f391bcc
commit 0f70f9586c
48 changed files with 1061 additions and 141 deletions

View File

@ -16,4 +16,22 @@
#else
#define _NODISCARD
#endif
#endif
#endif
template <class T>
struct Movable
{
mutable T m_val;
// ReSharper disable once CppNonExplicitConversionOperator
operator T() const &&
{
return std::move(m_val);
}
// ReSharper disable once CppNonExplicitConvertingConstructor
Movable(T&& in)
: m_val(std::move(in))
{
}
};

View File

@ -5,12 +5,16 @@
#include "Utils/ClassUtils.h"
#include "ILexer.h"
#include "IParserValue.h"
#include "IParserLineStream.h"
#include "ParsingException.h"
template <typename TokenType>
class AbstractLexer : public ILexer
{
// TokenType must inherit IParserValue
static_assert(std::is_base_of<IParserValue, TokenType>::value);
protected:
std::deque<TokenType> m_token_cache;
IParserLineStream* const m_stream;
@ -164,7 +168,7 @@ protected:
const auto* start = &m_current_line.m_line.c_str()[m_current_line_offset - 1];
char* end;
integerValue = std::strtoul(start, &end, 16);
integerValue = static_cast<int>(std::strtoul(start, &end, 16));
const auto numberLength = static_cast<unsigned>(end - start);
if (numberLength == 0 || isalnum(*end) || *end == '_')
throw ParsingException(GetPreviousCharacterPos(), "Invalid hex number");
@ -179,9 +183,7 @@ protected:
auto dot = false;
auto exponent = false;
if (*currentCharacter == '-')
currentCharacter++;
else if (*currentCharacter == '+')
if (*currentCharacter == '-' || *currentCharacter == '+')
currentCharacter++;
while (*currentCharacter)
@ -289,4 +291,14 @@ public:
{
m_token_cache.erase(m_token_cache.begin(), m_token_cache.begin() + amount);
}
_NODISCARD bool IsEof()
{
return GetToken(0).IsEof();
}
_NODISCARD const TokenPos& GetPos()
{
return GetToken(0).GetPos();
}
};

View File

@ -0,0 +1,74 @@
#pragma once
#include <iostream>
#include <vector>
#include "AbstractLexer.h"
#include "Sequence/AbstractSequence.h"
template <typename TokenType, typename ParserState>
class AbstractParser
{
// TokenType must inherit IParserValue
static_assert(std::is_base_of<IParserValue, TokenType>::value);
public:
typedef AbstractSequence<TokenType, ParserState> sequence_t;
protected:
AbstractLexer<TokenType>* m_lexer;
std::unique_ptr<ParserState> m_state;
explicit AbstractParser(AbstractLexer<TokenType>* lexer, std::unique_ptr<ParserState> state)
: m_lexer(lexer),
m_state(std::move(state))
{
}
virtual const std::vector<sequence_t*>& GetTestsForState() = 0;
public:
virtual ~AbstractParser() = default;
AbstractParser(const AbstractParser& other) = default;
AbstractParser(AbstractParser&& other) noexcept = default;
AbstractParser& operator=(const AbstractParser& other) = default;
AbstractParser& operator=(AbstractParser&& other) noexcept = default;
bool Parse()
{
try
{
while (!m_lexer->IsEof())
{
auto testSuccessful = false;
const auto& availableTests = GetTestsForState();
for (const sequence_t* test : availableTests)
{
unsigned consumedTokenCount;
if (test->MatchSequence(m_lexer, m_state.get(), consumedTokenCount))
{
m_lexer->PopTokens(consumedTokenCount);
testSuccessful = true;
break;
}
}
if (!testSuccessful)
{
const TokenPos& pos = m_lexer->GetPos();
std::cout << "Error: " << pos.m_filename << " L" << pos.m_line << ':' << pos.m_column << " Could not parse expression." << std::endl;
return false;
}
}
}
catch (const ParsingException& e)
{
std::cout << "Error: " << e.FullMessage() << std::endl;
return false;
}
return true;
}
};

View File

@ -16,7 +16,8 @@ CommandsParserValue CommandsParserValue::EndOfFile(const TokenPos pos)
CommandsParserValue CommandsParserValue::Character(const TokenPos pos, const char c)
{
CommandsParserValue pv(pos, c);
CommandsParserValue pv(pos, CommandsParserValueType::CHARACTER);
pv.m_value.char_value = c;
return pv;
}
@ -93,6 +94,7 @@ CommandsParserValue CommandsParserValue::Identifier(const TokenPos pos, std::str
{
CommandsParserValue pv(pos, CommandsParserValueType::IDENTIFIER);
pv.m_value.string_value = identifier;
pv.m_hash = std::hash<std::string>()(*identifier);
return pv;
}
@ -103,10 +105,11 @@ CommandsParserValue CommandsParserValue::TypeName(const TokenPos pos, std::strin
return pv;
}
CommandsParserValue::CommandsParserValue(const TokenPos pos, const int type)
CommandsParserValue::CommandsParserValue(const TokenPos pos, const CommandsParserValueType type)
: m_pos(pos),
m_type(type),
m_value()
m_hash(0),
m_value{}
{
}
@ -129,6 +132,7 @@ CommandsParserValue::~CommandsParserValue()
CommandsParserValue::CommandsParserValue(CommandsParserValue&& other) noexcept
: m_type(other.m_type),
m_hash(other.m_hash),
m_value(other.m_value)
{
other.m_value = ValueType();
@ -138,11 +142,28 @@ CommandsParserValue& CommandsParserValue::operator=(CommandsParserValue&& other)
{
m_type = other.m_type;
m_value = other.m_value;
m_hash = other.m_hash;
other.m_value = ValueType();
return *this;
}
bool CommandsParserValue::IsEof() const
{
return m_type == CommandsParserValueType::END_OF_FILE;
}
const TokenPos& CommandsParserValue::GetPos() const
{
return m_pos;
}
char CommandsParserValue::CharacterValue() const
{
assert(m_type == CommandsParserValueType::CHARACTER);
return m_value.char_value;
}
int CommandsParserValue::IntegerValue() const
{
assert(m_type == CommandsParserValueType::INTEGER);
@ -167,6 +188,12 @@ std::string& CommandsParserValue::IdentifierValue() const
return *m_value.string_value;
}
size_t CommandsParserValue::IdentifierHash() const
{
assert(m_type == CommandsParserValueType::IDENTIFIER);
return m_hash;
}
std::string& CommandsParserValue::TypeNameValue() const
{
assert(m_type == CommandsParserValueType::TYPE_NAME);

View File

@ -2,53 +2,52 @@
#include <string>
#include "Parsing/IParserValue.h"
#include "Utils/ClassUtils.h"
#include "Parsing/TokenPos.h"
class CommandsParserValueType
enum class CommandsParserValueType
{
CommandsParserValueType() = default;
// Meta tokens
INVALID,
END_OF_FILE,
public:
enum
{
FIRST = 0x100,
// Single character
CHARACTER,
// Meta tokens
INVALID = FIRST,
END_OF_FILE,
// Symbol tokens
SHIFT_LEFT,
SHIFT_RIGHT,
EQUALS,
NOT_EQUAL,
GREATER_EQUAL,
LESS_EQUAL,
LOGICAL_AND,
LOGICAL_OR,
// Symbol tokens
SHIFT_LEFT,
SHIFT_RIGHT,
EQUALS,
NOT_EQUAL,
GREATER_EQUAL,
LESS_EQUAL,
LOGICAL_AND,
LOGICAL_OR,
// Generic token types
INTEGER,
FLOATING_POINT,
STRING,
IDENTIFIER,
// Generic token types
INTEGER,
FLOATING_POINT,
STRING,
IDENTIFIER,
// Parser created
TYPE_NAME,
// Parser created
TYPE_NAME,
// End
MAX
};
// End
MAX
};
class CommandsParserValue
class CommandsParserValue final : public IParserValue
{
public:
TokenPos m_pos;
int m_type;
CommandsParserValueType m_type;
size_t m_hash;
union ValueType
{
char char_value;
int int_value;
double double_value;
std::string* string_value;
@ -72,18 +71,23 @@ public:
static CommandsParserValue TypeName(TokenPos pos, std::string* typeName);
private:
CommandsParserValue(TokenPos pos, int type);
CommandsParserValue(TokenPos pos, CommandsParserValueType type);
public:
~CommandsParserValue();
~CommandsParserValue() override;
CommandsParserValue(const CommandsParserValue& other) = delete;
CommandsParserValue(CommandsParserValue&& other) noexcept;
CommandsParserValue& operator=(const CommandsParserValue& other) = delete;
CommandsParserValue& operator=(CommandsParserValue&& other) noexcept;
_NODISCARD bool IsEof() const override;
_NODISCARD const TokenPos& GetPos() const override;
_NODISCARD char CharacterValue() const;
_NODISCARD int IntegerValue() const;
_NODISCARD double FloatingPointValue() const;
_NODISCARD std::string& StringValue() const;
_NODISCARD std::string& IdentifierValue() const;
_NODISCARD size_t IdentifierHash() const;
_NODISCARD std::string& TypeNameValue() const;
};

View File

@ -3,6 +3,7 @@
#include <iostream>
#include "Impl/HeaderLexer.h"
#include "Impl/HeaderParser.h"
#include "Parsing/ParsingException.h"
#include "Parsing/Impl/CommentRemovingStreamProxy.h"
#include "Parsing/Impl/DefinesStreamProxy.h"
@ -53,100 +54,8 @@ bool HeaderFileReader::ReadHeaderFile(IDataRepository* repository)
SetupStreamProxies();
auto lexer = std::make_unique<HeaderLexer>(m_stream);
const auto lexer = std::make_unique<HeaderLexer>(m_stream);
const auto parser = std::make_unique<HeaderParser>(lexer.get(), repository);
try
{
/*while (true)
{
auto line = m_stream->NextLine();
if (line.IsEof())
break;
std::cout << "Line " << line.m_filename.get() << ":" << line.m_line_number << ": " << line.m_line << "\n";
}*/
auto eof = false;
while (!eof)
{
const auto& token = lexer->GetToken(0);
switch (token.m_type)
{
case HeaderParserValueType::END_OF_FILE:
case HeaderParserValueType::INVALID:
eof = true;
break;
case HeaderParserValueType::CHARACTER:
std::cout << "Token " << token.CharacterValue() << "\n";
break;
case HeaderParserValueType::IDENTIFIER:
std::cout << "Token IDENTIFIER \"" << token.IdentifierValue() << "\"\n";
break;
case HeaderParserValueType::STRING:
std::cout << "Token STRING \"" << token.StringValue() << "\"\n";
break;
case HeaderParserValueType::INTEGER:
std::cout << "Token INTEGER " << token.IntegerValue() << "\n";
break;
case HeaderParserValueType::FLOATING_POINT:
std::cout << "Token FLOATINGPOINT " << token.FloatingPointValue() << "\n";
break;
case HeaderParserValueType::DECLSPEC:
std::cout << "Token DECLSPEC\n";
break;
case HeaderParserValueType::ALIGN:
std::cout << "Token ALIGN\n";
break;
case HeaderParserValueType::ALIGNAS:
std::cout << "Token ALIGNAS\n";
break;
case HeaderParserValueType::CONST:
std::cout << "Token CONST\n";
break;
case HeaderParserValueType::ENUM:
std::cout << "Token ENUM\n";
break;
case HeaderParserValueType::NAMESPACE:
std::cout << "Token NAMESPACE\n";
break;
case HeaderParserValueType::STRUCT:
std::cout << "Token STRUCT\n";
break;
case HeaderParserValueType::TYPEDEF:
std::cout << "Token TYPEDEF\n";
break;
case HeaderParserValueType::UNION:
std::cout << "Token UNION\n";
break;
default:
std::cout << "Token UNKNOWN\n";
break;
}
lexer->PopTokens(1);
}
}
catch (const ParsingException& e)
{
std::cout << "Error: " << e.FullMessage() << std::endl;
}
return true;
return parser->Parse();
}

View File

@ -0,0 +1,17 @@
#include "HeaderParser.h"
#include "Parsing/Header/Sequence/SequenceNamespace.h"
HeaderParser::HeaderParser(HeaderLexer* lexer, IDataRepository* targetRepository)
: AbstractParser(lexer, std::make_unique<HeaderParserState>()),
m_repository(targetRepository)
{
auto sequenceNamespace = std::make_unique<SequenceNamespace>();
m_normal_tests.push_back(sequenceNamespace.get());
m_tests.emplace_back(std::move(sequenceNamespace));
}
const std::vector<HeaderParser::sequence_t*>& HeaderParser::GetTestsForState()
{
return m_normal_tests;
}

View File

@ -1,6 +1,20 @@
#pragma once
class HeaderParser
{
#include "HeaderLexer.h"
#include "HeaderParserState.h"
#include "Parsing/AbstractParser.h"
#include "Persistence/IDataRepository.h"
};
class HeaderParser final : public AbstractParser<HeaderParserValue, HeaderParserState>
{
IDataRepository* m_repository;
std::vector<std::unique_ptr<sequence_t>> m_tests;
std::vector<sequence_t*> m_normal_tests;
protected:
const std::vector<sequence_t*>& GetTestsForState() override;
public:
HeaderParser(HeaderLexer* lexer, IDataRepository* targetRepository);
};

View File

@ -0,0 +1,7 @@
#pragma once
class HeaderParserState
{
public:
};

View File

@ -149,6 +149,16 @@ HeaderParserValue& HeaderParserValue::operator=(HeaderParserValue&& other) noexc
return *this;
}
bool HeaderParserValue::IsEof() const
{
return m_type == HeaderParserValueType::END_OF_FILE;
}
const TokenPos& HeaderParserValue::GetPos() const
{
return m_pos;
}
char HeaderParserValue::CharacterValue() const
{
assert(m_type == HeaderParserValueType::CHARACTER);

View File

@ -2,6 +2,8 @@
#include <string>
#include "Parsing/IParserValue.h"
#include "Utils/ClassUtils.h"
#include "Parsing/TokenPos.h"
@ -48,7 +50,7 @@ enum class HeaderParserValueType
MAX
};
class HeaderParserValue
class HeaderParserValue final : public IParserValue
{
public:
TokenPos m_pos;
@ -83,12 +85,15 @@ private:
HeaderParserValue(TokenPos pos, HeaderParserValueType type);
public:
~HeaderParserValue();
~HeaderParserValue() override;
HeaderParserValue(const HeaderParserValue& other) = delete;
HeaderParserValue(HeaderParserValue&& other) noexcept;
HeaderParserValue& operator=(const HeaderParserValue& other) = delete;
HeaderParserValue& operator=(HeaderParserValue&& other) noexcept;
_NODISCARD bool IsEof() const override;
_NODISCARD const TokenPos& GetPos() const override;
_NODISCARD char CharacterValue() const;
_NODISCARD int IntegerValue() const;
_NODISCARD double FloatingPointValue() const;

View File

@ -0,0 +1,14 @@
#include "HeaderMatcherCharacter.h"
HeaderMatcherCharacter::HeaderMatcherCharacter(const char c)
: m_char(c)
{
}
MatcherResult<HeaderParserValue> HeaderMatcherCharacter::CanMatch(AbstractLexer<HeaderParserValue>* lexer, const unsigned tokenOffset)
{
const auto& token = lexer->GetToken(tokenOffset);
return token.m_type == HeaderParserValueType::CHARACTER && token.CharacterValue() == m_char
? MatcherResult<HeaderParserValue>::Match(1)
: MatcherResult<HeaderParserValue>::NoMatch();
}

View File

@ -0,0 +1,15 @@
#pragma once
#include "Parsing/Header/Impl/HeaderParserValue.h"
#include "Parsing/Matcher/AbstractMatcher.h"
class HeaderMatcherCharacter final : public AbstractMatcher<HeaderParserValue>
{
char m_char;
protected:
MatcherResult<HeaderParserValue> CanMatch(AbstractLexer<HeaderParserValue>* lexer, unsigned tokenOffset) override;
public:
explicit HeaderMatcherCharacter(char c);
};

View File

@ -0,0 +1,19 @@
#include "HeaderMatcherFactory.h"
#include "HeaderMatcherCharacter.h"
#include "HeaderMatcherValueType.h"
HeaderMatcherFactory::HeaderMatcherFactory(const IMatcherForLabelSupplier<HeaderParserValue>* labelSupplier)
: AbstractMatcherFactory(labelSupplier)
{
}
MatcherFactoryWrapper<HeaderParserValue> HeaderMatcherFactory::Type(HeaderParserValueType type) const
{
return MatcherFactoryWrapper<HeaderParserValue>(std::make_unique<HeaderMatcherValueType>(type));
}
MatcherFactoryWrapper<HeaderParserValue> HeaderMatcherFactory::Char(char c) const
{
return MatcherFactoryWrapper<HeaderParserValue>(std::make_unique<HeaderMatcherCharacter>(c));
}

View File

@ -0,0 +1,13 @@
#pragma once
#include "Parsing/Header/Impl/HeaderParserValue.h"
#include "Parsing/Matcher/AbstractMatcherFactory.h"
class HeaderMatcherFactory final : public AbstractMatcherFactory<HeaderParserValue>
{
public:
explicit HeaderMatcherFactory(const IMatcherForLabelSupplier<HeaderParserValue>* labelSupplier);
_NODISCARD MatcherFactoryWrapper<HeaderParserValue> Type(HeaderParserValueType type) const;
_NODISCARD MatcherFactoryWrapper<HeaderParserValue> Char(char c) const;
};

View File

@ -0,0 +1,13 @@
#include "HeaderMatcherValueType.h"
HeaderMatcherValueType::HeaderMatcherValueType(HeaderParserValueType type)
: m_type(type)
{
}
MatcherResult<HeaderParserValue> HeaderMatcherValueType::CanMatch(AbstractLexer<HeaderParserValue>* lexer, const unsigned tokenOffset)
{
return lexer->GetToken(tokenOffset).m_type == m_type
? MatcherResult<HeaderParserValue>::Match(1)
: MatcherResult<HeaderParserValue>::NoMatch();
}

View File

@ -0,0 +1,15 @@
#pragma once
#include "Parsing/Header/Impl/HeaderParserValue.h"
#include "Parsing/Matcher/AbstractMatcher.h"
class HeaderMatcherValueType final : public AbstractMatcher<HeaderParserValue>
{
HeaderParserValueType m_type;
protected:
MatcherResult<HeaderParserValue> CanMatch(AbstractLexer<HeaderParserValue>* lexer, unsigned tokenOffset) override;
public:
explicit HeaderMatcherValueType(HeaderParserValueType type);
};

View File

@ -0,0 +1,18 @@
#include "SequenceNamespace.h"
#include "Parsing/Header/Matcher/HeaderMatcherFactory.h"
SequenceNamespace::SequenceNamespace()
{
const HeaderMatcherFactory create(this);
AddMatchers({
create.Type(HeaderParserValueType::NAMESPACE),
create.Type(HeaderParserValueType::IDENTIFIER).Capture(CAPTURE_NAME),
create.Char('{')
});
}
void SequenceNamespace::ProcessMatch(HeaderParserState* state, const SequenceResult<HeaderParserValue>& result) const
{
}

View File

@ -0,0 +1,16 @@
#pragma once
#include "Parsing/Sequence/AbstractSequence.h"
#include "Parsing/Header/Impl/HeaderParserState.h"
#include "Parsing/Header/Impl/HeaderParserValue.h"
class SequenceNamespace final : public AbstractSequence<HeaderParserValue, HeaderParserState>
{
static constexpr int CAPTURE_NAME = 0;
protected:
void ProcessMatch(HeaderParserState* state, const SequenceResult<HeaderParserValue>& result) const override;
public:
SequenceNamespace();
};

View File

@ -0,0 +1,20 @@
#pragma once
#include "Utils/ClassUtils.h"
#include "TokenPos.h"
class IParserValue
{
protected:
IParserValue() = default;
public:
virtual ~IParserValue() = default;
IParserValue(const IParserValue& other) = default;
IParserValue(IParserValue&& other) noexcept = default;
IParserValue& operator=(const IParserValue& other) = default;
IParserValue& operator=(IParserValue&& other) noexcept = default;
_NODISCARD virtual bool IsEof() const = 0;
_NODISCARD virtual const TokenPos& GetPos() const = 0;
};

View File

@ -0,0 +1,95 @@
#pragma once
#include <functional>
#include "Parsing/IParserValue.h"
#include "Parsing/AbstractLexer.h"
#include "Parsing/Matcher/MatcherResult.h"
template <typename TokenType>
class AbstractMatcher
{
// TokenType must inherit IParserValue
static_assert(std::is_base_of<IParserValue, TokenType>::value);
public:
static constexpr int NO_ID = -1;
private:
int m_tag_id;
int m_capture_id;
bool m_no_consume;
std::function<TokenType(std::vector<std::reference_wrapper<const TokenType>>)> m_transform_func;
protected:
AbstractMatcher()
: m_tag_id(NO_ID),
m_capture_id(NO_ID),
m_no_consume(false)
{
}
virtual MatcherResult<TokenType> CanMatch(AbstractLexer<TokenType>* lexer, unsigned tokenOffset) = 0;
public:
virtual ~AbstractMatcher() = default;
AbstractMatcher(const AbstractMatcher& other) = default;
AbstractMatcher(AbstractMatcher&& other) noexcept = default;
AbstractMatcher& operator=(const AbstractMatcher& other) = default;
AbstractMatcher& operator=(AbstractMatcher&& other) noexcept = default;
void SetTag(const int tagId)
{
m_tag_id = tagId;
}
void SetCapture(const int captureId)
{
m_capture_id = captureId;
}
void SetConsume(const bool value)
{
m_no_consume = !value;
}
void SetTransform(std::function<TokenType(std::vector<std::reference_wrapper<const TokenType>>)> transform)
{
m_transform_func = std::move(transform);
}
MatcherResult<TokenType> Match(AbstractLexer<TokenType>* lexer, const unsigned tokenOffset)
{
MatcherResult<TokenType> result = CanMatch(lexer, tokenOffset);
if (!result.m_matches)
return result;
if (m_tag_id != NO_ID)
result.m_tags.push_back(m_tag_id);
if (m_capture_id != NO_ID)
{
if (m_transform_func)
{
std::vector<std::reference_wrapper<const TokenType>> tokens;
tokens.reserve(result.m_consumed_token_count);
for (auto i = 0u; i < result.m_consumed_token_count; i++)
tokens.emplace_back(lexer->GetToken(tokenOffset + i));
result.m_fabricated_tokens.push_back(m_transform_func(tokens));
result.m_captures.emplace_back(m_capture_id, result.m_fabricated_tokens.size() - 1, true);
}
else
{
for (auto i = 0u; i < result.m_consumed_token_count; i++)
{
result.m_captures.emplace_back(m_capture_id, tokenOffset + i);
}
}
}
return result;
}
};

View File

@ -0,0 +1,113 @@
#pragma once
#include <memory>
#include "Utils/ClassUtils.h"
#include "AbstractMatcher.h"
#include "MatcherAnd.h"
#include "MatcherLabel.h"
#include "MatcherLoop.h"
#include "MatcherOptional.h"
#include "MatcherOr.h"
#include "Parsing/IParserValue.h"
template <typename TokenType>
class MatcherFactoryWrapper
{
// TokenType must inherit IParserValue
static_assert(std::is_base_of<IParserValue, TokenType>::value);
std::unique_ptr<AbstractMatcher<TokenType>> m_matcher;
public:
explicit MatcherFactoryWrapper(std::unique_ptr<AbstractMatcher<TokenType>> matcher)
: m_matcher(std::move(matcher))
{
}
MatcherFactoryWrapper<TokenType>& Tag(const int tagId)
{
m_matcher->SetTag(tagId);
return *this;
}
MatcherFactoryWrapper<TokenType>& Capture(const int captureId)
{
m_matcher->SetCapture(captureId);
return *this;
}
MatcherFactoryWrapper<TokenType>& NoConsume()
{
m_matcher->SetConsume(false);
return *this;
}
MatcherFactoryWrapper<TokenType>& Transform(std::function<TokenType(std::vector<std::reference_wrapper<const TokenType>>)> transform)
{
m_matcher->SetTransform(std::move(transform));
return *this;
}
std::unique_ptr<AbstractMatcher<TokenType>> Build()
{
return std::move(m_matcher);
}
// ReSharper disable once CppNonExplicitConversionOperator
operator std::unique_ptr<AbstractMatcher<TokenType>>()
{
return Build();
}
// ReSharper disable once CppNonExplicitConversionOperator
operator Movable<std::unique_ptr<AbstractMatcher<TokenType>>>()
{
return Build();
}
};
template <typename TokenType>
class AbstractMatcherFactory
{
// TokenType must inherit IParserValue
static_assert(std::is_base_of<IParserValue, TokenType>::value);
const IMatcherForLabelSupplier<TokenType>* m_label_supplier;
public:
explicit AbstractMatcherFactory(const IMatcherForLabelSupplier<TokenType>* labelSupplier)
: m_label_supplier(labelSupplier)
{
}
_NODISCARD MatcherFactoryWrapper<TokenType> And(std::initializer_list<std::unique_ptr<AbstractMatcher<TokenType>>> matchers) const
{
return MatcherFactoryWrapper<TokenType>(std::make_unique<MatcherAnd<TokenType>>(std::move(matchers)));
}
_NODISCARD MatcherFactoryWrapper<TokenType> Or(std::initializer_list<std::unique_ptr<AbstractMatcher<TokenType>>> matchers) const
{
return MatcherFactoryWrapper<TokenType>(std::make_unique<MatcherOr<TokenType>>(std::move(matchers)));
}
_NODISCARD MatcherFactoryWrapper<TokenType> Loop(std::unique_ptr<AbstractMatcher<TokenType>> matcher) const
{
return MatcherFactoryWrapper<TokenType>(std::make_unique<MatcherLoop<TokenType>>(std::move(matcher)));
}
_NODISCARD MatcherFactoryWrapper<TokenType> OptionalLoop(std::unique_ptr<AbstractMatcher<TokenType>> matcher) const
{
return MatcherFactoryWrapper<TokenType>(std::make_unique<MatcherOptional<TokenType>>(std::make_unique<MatcherLoop<TokenType>>(std::move(matcher))));
}
_NODISCARD MatcherFactoryWrapper<TokenType> Optional(std::unique_ptr<AbstractMatcher<TokenType>> matcher) const
{
return MatcherFactoryWrapper<TokenType>(std::make_unique<MatcherOptional<TokenType>>(std::move(matcher)));
}
_NODISCARD MatcherFactoryWrapper<TokenType> Label(const int label) const
{
return MatcherFactoryWrapper<TokenType>(std::make_unique<MatcherLabel<TokenType>>(label));
}
};

View File

@ -0,0 +1,40 @@
#pragma once
#include <iterator>
#include <memory>
#include "Parsing/IParserValue.h"
#include "AbstractMatcher.h"
template <typename TokenType>
class MatcherAnd final : public AbstractMatcher<TokenType>
{
// TokenType must inherit IParserValue
static_assert(std::is_base_of<IParserValue, TokenType>::value);
std::vector<std::unique_ptr<AbstractMatcher<TokenType>>> m_matchers;
protected:
MatcherResult<TokenType> CanMatch(AbstractLexer<TokenType>* lexer, const unsigned tokenOffset) override
{
auto matchResult = MatcherResult<TokenType>::Match(0);
for (const std::unique_ptr<AbstractMatcher<TokenType>>& matcher : m_matchers)
{
MatcherResult<TokenType> result = matcher->Match(lexer, tokenOffset + matchResult.m_consumed_token_count);
if (!result.m_matches)
return MatcherResult<TokenType>::NoMatch();
matchResult.Absorb(std::move(result));
}
return matchResult;
}
public:
MatcherAnd(std::initializer_list<Movable<std::unique_ptr<AbstractMatcher<TokenType>>>> matchers)
: m_matchers(std::make_move_iterator(matchers.begin()), std::make_move_iterator(matchers.end()))
{
}
};

View File

@ -0,0 +1,50 @@
#pragma once
#include "Utils/ClassUtils.h"
#include "Parsing/IParserValue.h"
#include "AbstractMatcher.h"
template <typename TokenType>
class IMatcherForLabelSupplier
{
// TokenType must inherit IParserValue
static_assert(std::is_base_of<IParserValue, TokenType>::value);
public:
IMatcherForLabelSupplier() = default;
virtual ~IMatcherForLabelSupplier() = default;
IMatcherForLabelSupplier(const IMatcherForLabelSupplier& other) = default;
IMatcherForLabelSupplier(IMatcherForLabelSupplier&& other) noexcept = default;
IMatcherForLabelSupplier& operator=(const IMatcherForLabelSupplier& other) = default;
IMatcherForLabelSupplier& operator=(IMatcherForLabelSupplier&& other) noexcept = default;
_NODISCARD virtual AbstractMatcher<TokenType>* GetMatcherForLabel(int label) const = 0;
};
template <typename TokenType>
class MatcherLabel final : public AbstractMatcher<TokenType>
{
// TokenType must inherit IParserValue
static_assert(std::is_base_of<IParserValue, TokenType>::value);
const IMatcherForLabelSupplier<TokenType>* m_supplier;
int m_label;
protected:
MatcherResult<TokenType> CanMatch(AbstractLexer<TokenType>* lexer, unsigned tokenOffset) override
{
AbstractMatcher<TokenType>* matcher = m_supplier->GetMatcherForLabel(m_label);
if (matcher)
return matcher->Match(lexer, tokenOffset);
return MatcherResult<TokenType>::NoMatch();
}
public:
MatcherLabel(const IMatcherForLabelSupplier<TokenType>* supplier, const int label)
: m_supplier(supplier),
m_label(label)
{
}
};

View File

@ -0,0 +1,44 @@
#pragma once
#include <memory>
#include "Parsing/IParserValue.h"
#include "AbstractMatcher.h"
template <typename TokenType>
class MatcherLoop final : public AbstractMatcher<TokenType>
{
// TokenType must inherit IParserValue
static_assert(std::is_base_of<IParserValue, TokenType>::value);
std::unique_ptr<AbstractMatcher<TokenType>> m_matcher;
protected:
MatcherResult<TokenType> CanMatch(AbstractLexer<TokenType>* lexer, const unsigned tokenOffset) override
{
auto matchResult = MatcherResult<TokenType>::Match(0);
auto loopedAtLeastOnce = false;
while(true)
{
auto result = m_matcher->Match(lexer, tokenOffset + matchResult.m_consumed_token_count);
if(!result.m_matches)
{
if (loopedAtLeastOnce)
return matchResult;
return MatcherResult<TokenType>::NoMatch();
}
loopedAtLeastOnce = true;
matchResult.Absorb(std::move(result));
}
}
public:
explicit MatcherLoop(std::unique_ptr<AbstractMatcher<TokenType>> matcher)
: m_matcher(std::move(matcher))
{
}
};

View File

@ -0,0 +1,33 @@
#pragma once
#include <iterator>
#include <memory>
#include "Parsing/IParserValue.h"
#include "AbstractMatcher.h"
template <typename TokenType>
class MatcherOptional final : public AbstractMatcher<TokenType>
{
// TokenType must inherit IParserValue
static_assert(std::is_base_of<IParserValue, TokenType>::value);
std::unique_ptr<AbstractMatcher<TokenType>> m_matcher;
protected:
MatcherResult<TokenType> CanMatch(AbstractLexer<TokenType>* lexer, unsigned tokenOffset) override
{
auto result = m_matcher->Match(lexer, tokenOffset);
if (result.m_matches)
return result;
return MatcherResult<TokenType>::Match(0);
}
public:
explicit MatcherOptional(std::unique_ptr<AbstractMatcher<TokenType>> matcher)
: m_matcher(std::move(matcher))
{
}
};

View File

@ -0,0 +1,38 @@
#pragma once
#include <iterator>
#include <memory>
#include "Parsing/IParserValue.h"
#include "AbstractMatcher.h"
template <typename TokenType>
class MatcherOr final : public AbstractMatcher<TokenType>
{
// TokenType must inherit IParserValue
static_assert(std::is_base_of<IParserValue, TokenType>::value);
std::vector<std::unique_ptr<AbstractMatcher<TokenType>>> m_matchers;
protected:
MatcherResult<TokenType> CanMatch(AbstractLexer<TokenType>* lexer, unsigned tokenOffset) override
{
for (const auto& matcher : m_matchers)
{
const auto result = matcher->Match(lexer, tokenOffset);
if (!result.m_matches)
continue;
return result;
}
return MatcherResult<TokenType>::NoMatch();
}
public:
MatcherOr(std::initializer_list<Movable<std::unique_ptr<AbstractMatcher<TokenType>>>> matchers)
: m_matchers(std::make_move_iterator(matchers.begin()), std::make_move_iterator(matchers.end()))
{
}
};

View File

@ -0,0 +1,96 @@
#pragma once
#include <iterator>
#include <vector>
#include "Utils/ClassUtils.h"
#include "Parsing/IParserValue.h"
template <typename TokenType>
class MatcherResult
{
// TokenType must inherit IParserValue
static_assert(std::is_base_of<IParserValue, TokenType>::value);
public:
class Capture
{
static constexpr unsigned FABRICATED_FLAG_MASK = std::numeric_limits<unsigned>::max() ^ std::numeric_limits<int>::max();
static constexpr unsigned TOKEN_INDEX_MASK = ~FABRICATED_FLAG_MASK;
int m_capture_id;
unsigned m_token_index;
public:
Capture(const int captureId, const unsigned tokenIndex)
: Capture(captureId, tokenIndex, false)
{
}
Capture(const int captureId, const unsigned tokenIndex, const bool isFabricated)
: m_capture_id(captureId),
m_token_index(!isFabricated ? tokenIndex : tokenIndex | FABRICATED_FLAG_MASK)
{
}
_NODISCARD int GetCaptureId() const
{
return m_capture_id;
}
_NODISCARD bool IsFabricated() const
{
return m_token_index & FABRICATED_FLAG_MASK;
}
_NODISCARD unsigned GetTokenIndex() const
{
return m_token_index & TOKEN_INDEX_MASK;
}
};
bool m_matches;
unsigned m_consumed_token_count;
std::vector<int> m_tags;
std::vector<Capture> m_captures;
std::vector<TokenType> m_fabricated_tokens;
private:
MatcherResult(const bool matches, const unsigned consumedTokenCount)
: m_matches(matches),
m_consumed_token_count(consumedTokenCount)
{
}
public:
static MatcherResult Match(unsigned consumedTokenCount)
{
return MatcherResult(true, consumedTokenCount);
}
static MatcherResult NoMatch()
{
return MatcherResult(false, 0);
}
void Absorb(MatcherResult<TokenType>&& other)
{
m_consumed_token_count += other.m_consumed_token_count;
if (!other.m_tags.empty())
std::copy(other.m_tags.begin(), other.m_tags.end(), std::back_inserter(m_tags));
for (const auto& capture : other.m_captures)
{
if (capture.IsFabricated())
m_captures.emplace_back(capture.GetCaptureId(), m_fabricated_tokens.size() + capture.GetTokenIndex(), true);
else
m_captures.emplace_back(capture.GetCaptureId(), capture.GetTokenIndex());
}
for(auto& fabricated : other.m_fabricated_tokens)
{
m_fabricated_tokens.emplace_back(std::move(fabricated));
}
}
};

View File

@ -0,0 +1,80 @@
#pragma once
#include <unordered_map>
#include "SequenceResult.h"
#include "Utils/ClassUtils.h"
#include "Parsing/Matcher/AbstractMatcher.h"
#include "Parsing/Matcher/MatcherAnd.h"
#include "Parsing/Matcher/MatcherLabel.h"
template<typename TokenType, typename ParserState>
class AbstractSequence : protected IMatcherForLabelSupplier<TokenType>
{
// TokenType must inherit IParserValue
static_assert(std::is_base_of<IParserValue, TokenType>::value);
public:
typedef AbstractMatcher<TokenType> matcher_t;
private:
std::unique_ptr<matcher_t> m_entry;
std::unordered_map<int, std::unique_ptr<matcher_t>> m_matchers;
protected:
static constexpr int ENTRY_LABEL = 0;
AbstractSequence() = default;
virtual void ProcessMatch(ParserState* state, const SequenceResult<TokenType>& result) const = 0;
void AddMatchers(std::initializer_list<Movable<std::unique_ptr<matcher_t>>> matchers)
{
assert(!m_entry);
m_entry = std::make_unique<MatcherAnd<TokenType>>(matchers);
}
void AddLabeledMatchers(int label, std::initializer_list<Movable<std::unique_ptr<matcher_t>>> matchers)
{
assert(m_matchers.find(label) == m_matchers.end());
m_matchers.emplace(label, std::make_unique<MatcherAnd<TokenType>>(matchers));
}
public:
~AbstractSequence() override = default;
AbstractSequence(const AbstractSequence& other) = default;
AbstractSequence(AbstractSequence&& other) noexcept = default;
AbstractSequence& operator=(const AbstractSequence& other) = default;
AbstractSequence& operator=(AbstractSequence&& other) noexcept = default;
_NODISCARD matcher_t* GetMatcherForLabel(const int label) const override
{
if (label == 0)
return m_entry.get();
const auto foundEntry = m_matchers.find(label);
if (foundEntry != m_matchers.end())
return foundEntry->second.get();
return nullptr;
}
_NODISCARD bool MatchSequence(AbstractLexer<TokenType>* lexer, ParserState* state, unsigned& consumedTokenCount) const
{
if (!m_entry)
return false;
auto result = m_entry->Match(lexer, 0);
if (result.m_matches)
{
SequenceResult<TokenType> sequenceResult(lexer, result);
ProcessMatch(state, sequenceResult);
consumedTokenCount = result.m_consumed_token_count;
}
return result.m_matches;
}
};

View File

@ -0,0 +1,84 @@
#pragma once
#include <unordered_map>
#include "Utils/ClassUtils.h"
#include "Parsing/Matcher/AbstractMatcher.h"
#include "Parsing/Matcher/MatcherResult.h"
template <typename TokenType>
class SequenceResult
{
class Capture
{
public:
unsigned m_offset;
std::vector<std::reference_wrapper<const TokenType>> m_tokens;
Capture()
: m_offset(0)
{
}
};
// TokenType must inherit IParserValue
static_assert(std::is_base_of<IParserValue, TokenType>::value);
std::vector<int> m_tags;
std::unordered_map<int, Capture> m_captures;
unsigned m_tag_offset;
public:
SequenceResult(AbstractLexer<TokenType>* lexer, const MatcherResult<TokenType>& result)
: m_tags(result.m_tags),
m_tag_offset(0)
{
for (const typename MatcherResult<TokenType>::Capture& capture : result.m_captures)
{
if (capture.IsFabricated())
m_captures[capture.GetCaptureId()].m_tokens.push_back(result.m_fabricated_tokens[capture.GetTokenIndex()]);
else
m_captures[capture.GetCaptureId()].m_tokens.push_back(lexer->GetToken(capture.GetTokenIndex()));
}
}
_NODISCARD int PeekTag() const
{
if (m_tag_offset < m_tags.size())
return m_tags[m_tag_offset];
return AbstractMatcher<TokenType>::NO_ID;
}
int NextTag()
{
if (m_tag_offset < m_tags.size())
return m_tags[m_tag_offset++];
return AbstractMatcher<TokenType>::NO_ID;
}
_NODISCARD bool HasNextCapture(int captureId)
{
auto foundEntry = m_captures.find(captureId);
if (foundEntry == m_captures.end())
return false;
return foundEntry->second.m_offset < foundEntry->second.m_tokens.size();
}
const TokenType& NextCapture(int captureId)
{
auto foundEntry = m_captures.find(captureId);
if (foundEntry == m_captures.end())
throw ParsingException(TokenPos(), "Tried to access next capture even though no captures exists!");
if(foundEntry->second.m_offset >= foundEntry->second.m_tokens.size())
throw ParsingException(TokenPos(), "Tried to access next capture even though none exists!");
return foundEntry->second.m_tokens[foundEntry->second.m_offset++];
}
};

View File

@ -1,7 +1,14 @@
#include <utility>
#include <vector>
#include "ZoneCodeGenerator.h"
int main(const int argc, const char** argv)
{
int i = 5;
std::vector<std::pair<int, const int&>> asdf;
asdf.emplace_back(1, i);
const ZoneCodeGenerator zoneCodeGenerator;
return zoneCodeGenerator.Run(argc, argv);
}