mirror of
https://github.com/Laupetin/OpenAssetTools.git
synced 2025-04-20 08:05:45 +00:00
Add multicharacter tokens to simple lexer
This commit is contained in:
parent
ab7b516918
commit
c5d7d71a51
@ -29,7 +29,12 @@ void LocalizeFileReader::SetupStreamProxies()
|
||||
|
||||
std::vector<LocalizeFileEntry> LocalizeFileReader::ReadLocalizeFile()
|
||||
{
|
||||
const auto lexer = std::make_unique<SimpleLexer>(m_stream, SimpleLexer::Config{true, true, false});
|
||||
SimpleLexer::Config lexerConfig;
|
||||
lexerConfig.m_emit_new_line_tokens = true;
|
||||
lexerConfig.m_read_strings = true;
|
||||
lexerConfig.m_read_numbers = false;
|
||||
const auto lexer = std::make_unique<SimpleLexer>(m_stream, std::move(lexerConfig));
|
||||
|
||||
const auto parser = std::make_unique<LocalizeFileParser>(lexer.get(), m_language);
|
||||
|
||||
if (parser->Parse())
|
||||
|
@ -111,7 +111,12 @@ std::unique_ptr<ParsingResult> MenuFileReader::CreateParsingResult(MenuFileParse
|
||||
|
||||
std::unique_ptr<ParsingResult> MenuFileReader::ReadMenuFile()
|
||||
{
|
||||
const auto lexer = std::make_unique<SimpleLexer>(m_stream, SimpleLexer::Config{false, true, true});
|
||||
SimpleLexer::Config lexerConfig;
|
||||
lexerConfig.m_emit_new_line_tokens = false;
|
||||
lexerConfig.m_read_strings = true;
|
||||
lexerConfig.m_read_numbers = true;
|
||||
const auto lexer = std::make_unique<SimpleLexer>(m_stream, std::move(lexerConfig));
|
||||
|
||||
const auto parser = std::make_unique<MenuFileParser>(lexer.get(), m_feature_level);
|
||||
|
||||
if (!parser->Parse())
|
||||
|
@ -66,7 +66,12 @@ std::vector<std::string> AbstractMenuDumper::CreateScriptTokenList(const char* s
|
||||
const std::string scriptString(script);
|
||||
std::istringstream stringStream(scriptString);
|
||||
ParserSingleInputStream inputStream(stringStream, "MenuScript");
|
||||
SimpleLexer lexer(&inputStream, SimpleLexer::Config{false, true, false});
|
||||
|
||||
SimpleLexer::Config lexerConfig;
|
||||
lexerConfig.m_emit_new_line_tokens = false;
|
||||
lexerConfig.m_read_strings = true;
|
||||
lexerConfig.m_read_numbers = false;
|
||||
SimpleLexer lexer(&inputStream, std::move(lexerConfig));
|
||||
|
||||
std::vector<std::string> result;
|
||||
auto hasLexerTokens = true;
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "SimpleMatcherKeyword.h"
|
||||
#include "SimpleMatcherKeywordIgnoreCase.h"
|
||||
#include "SimpleMatcherKeywordPrefix.h"
|
||||
#include "SimpleMatcherMultiCharacter.h"
|
||||
#include "SimpleMatcherValueType.h"
|
||||
|
||||
SimpleMatcherFactory::SimpleMatcherFactory(const IMatcherForLabelSupplier<SimpleParserValue>* labelSupplier)
|
||||
@ -57,6 +58,11 @@ MatcherFactoryWrapper<SimpleParserValue> SimpleMatcherFactory::Char(char c) cons
|
||||
return MatcherFactoryWrapper<SimpleParserValue>(std::make_unique<SimpleMatcherCharacter>(c));
|
||||
}
|
||||
|
||||
MatcherFactoryWrapper<SimpleParserValue> SimpleMatcherFactory::MultiChar(int multiCharacterSequenceId) const
|
||||
{
|
||||
return MatcherFactoryWrapper<SimpleParserValue>(std::make_unique<SimpleMatcherMultiCharacter>(multiCharacterSequenceId));
|
||||
}
|
||||
|
||||
MatcherFactoryWrapper<SimpleParserValue> SimpleMatcherFactory::AnyCharBesides(std::vector<char> chars) const
|
||||
{
|
||||
return MatcherFactoryWrapper<SimpleParserValue>(std::make_unique<SimpleMatcherAnyCharacterBesides>(std::move(chars)));
|
||||
|
@ -19,5 +19,6 @@ public:
|
||||
_NODISCARD MatcherFactoryWrapper<SimpleParserValue> Integer() const;
|
||||
_NODISCARD MatcherFactoryWrapper<SimpleParserValue> FloatingPoint() const;
|
||||
_NODISCARD MatcherFactoryWrapper<SimpleParserValue> Char(char c) const;
|
||||
_NODISCARD MatcherFactoryWrapper<SimpleParserValue> MultiChar(int multiCharacterSequenceId) const;
|
||||
_NODISCARD MatcherFactoryWrapper<SimpleParserValue> AnyCharBesides(std::vector<char> chars) const;
|
||||
};
|
||||
|
@ -0,0 +1,14 @@
|
||||
#include "SimpleMatcherMultiCharacter.h"
|
||||
|
||||
SimpleMatcherMultiCharacter::SimpleMatcherMultiCharacter(const int multiCharacterSequenceId)
|
||||
: m_multi_character_sequence_id(multiCharacterSequenceId)
|
||||
{
|
||||
}
|
||||
|
||||
MatcherResult<SimpleParserValue> SimpleMatcherMultiCharacter::CanMatch(ILexer<SimpleParserValue>* lexer, const unsigned tokenOffset)
|
||||
{
|
||||
const auto& token = lexer->GetToken(tokenOffset);
|
||||
return token.m_type == SimpleParserValueType::MULTI_CHARACTER && token.MultiCharacterValue() == m_multi_character_sequence_id
|
||||
? MatcherResult<SimpleParserValue>::Match(1)
|
||||
: MatcherResult<SimpleParserValue>::NoMatch();
|
||||
}
|
@ -0,0 +1,15 @@
|
||||
#pragma once
|
||||
|
||||
#include "Parsing/Simple/SimpleParserValue.h"
|
||||
#include "Parsing/Matcher/AbstractMatcher.h"
|
||||
|
||||
class SimpleMatcherMultiCharacter final : public AbstractMatcher<SimpleParserValue>
|
||||
{
|
||||
int m_multi_character_sequence_id;
|
||||
|
||||
protected:
|
||||
MatcherResult<SimpleParserValue> CanMatch(ILexer<SimpleParserValue>* lexer, unsigned tokenOffset) override;
|
||||
|
||||
public:
|
||||
explicit SimpleMatcherMultiCharacter(int multiCharacterSequenceId);
|
||||
};
|
@ -1,17 +1,77 @@
|
||||
#include "SimpleLexer.h"
|
||||
|
||||
SimpleLexer::Config::MultiCharacterToken::MultiCharacterToken(const int id, std::string value)
|
||||
: m_id(id),
|
||||
m_value(std::move(value))
|
||||
{
|
||||
}
|
||||
|
||||
SimpleLexer::MultiCharacterTokenLookupEntry::MultiCharacterTokenLookupEntry(const int id, std::string value)
|
||||
: m_id(id),
|
||||
m_value(std::move(value))
|
||||
{
|
||||
}
|
||||
|
||||
SimpleLexer::SimpleLexer(IParserLineStream* stream)
|
||||
: AbstractLexer(stream),
|
||||
m_config{false, true, true},
|
||||
m_last_line(1)
|
||||
m_config{false, true, true, {}},
|
||||
m_check_for_multi_character_tokens(false),
|
||||
m_last_line(1)
|
||||
{
|
||||
}
|
||||
|
||||
SimpleLexer::SimpleLexer(IParserLineStream* stream, Config config)
|
||||
: AbstractLexer(stream),
|
||||
m_config(config),
|
||||
m_last_line(1)
|
||||
m_config(std::move(config)),
|
||||
m_check_for_multi_character_tokens(false),
|
||||
m_last_line(1)
|
||||
{
|
||||
for (auto tokenConfig : m_config.m_multi_character_tokens)
|
||||
AddMultiCharacterTokenConfigToLookup(std::move(tokenConfig));
|
||||
m_config.m_multi_character_tokens.clear();
|
||||
}
|
||||
|
||||
void SimpleLexer::AddMultiCharacterTokenConfigToLookup(Config::MultiCharacterToken tokenConfig)
|
||||
{
|
||||
if (tokenConfig.m_value.empty())
|
||||
return;
|
||||
|
||||
m_check_for_multi_character_tokens = true;
|
||||
const auto firstCharacterValue = static_cast<uint8_t>(tokenConfig.m_value[0]);
|
||||
|
||||
if (m_multi_character_token_lookup[firstCharacterValue])
|
||||
{
|
||||
auto* currentEntry = m_multi_character_token_lookup[firstCharacterValue].get();
|
||||
while (currentEntry->m_next)
|
||||
currentEntry = currentEntry->m_next.get();
|
||||
|
||||
currentEntry->m_next = std::make_unique<MultiCharacterTokenLookupEntry>(tokenConfig.m_id, std::move(tokenConfig.m_value));
|
||||
}
|
||||
else
|
||||
{
|
||||
m_multi_character_token_lookup[firstCharacterValue] = std::make_unique<MultiCharacterTokenLookupEntry>(tokenConfig.m_id, std::move(tokenConfig.m_value));
|
||||
}
|
||||
}
|
||||
|
||||
bool SimpleLexer::ReadMultiCharacterToken(const MultiCharacterTokenLookupEntry* multiTokenLookup)
|
||||
{
|
||||
const auto& currentLine = CurrentLine();
|
||||
assert(m_current_line_offset >= 1);
|
||||
assert(multiTokenLookup);
|
||||
assert(!multiTokenLookup->m_value.empty());
|
||||
assert(currentLine.m_line[m_current_line_offset - 1] == multiTokenLookup->m_value[0]);
|
||||
|
||||
const char* linePos = ¤tLine.m_line[m_current_line_offset - 1];
|
||||
|
||||
for (const auto c : multiTokenLookup->m_value)
|
||||
{
|
||||
if (!*linePos || *linePos != c)
|
||||
return false;
|
||||
linePos++;
|
||||
}
|
||||
|
||||
m_current_line_offset = m_current_line_offset - 1 + multiTokenLookup->m_value.size();
|
||||
return true;
|
||||
}
|
||||
|
||||
SimpleParserValue SimpleLexer::GetNextToken()
|
||||
@ -45,6 +105,18 @@ SimpleParserValue SimpleLexer::GetNextToken()
|
||||
if (c == EOF)
|
||||
return SimpleParserValue::EndOfFile(TokenPos());
|
||||
|
||||
if(m_check_for_multi_character_tokens)
|
||||
{
|
||||
const auto* multiTokenLookup = m_multi_character_token_lookup[static_cast<uint8_t>(c)].get();
|
||||
while(multiTokenLookup)
|
||||
{
|
||||
if(ReadMultiCharacterToken(multiTokenLookup))
|
||||
return SimpleParserValue::MultiCharacter(pos, multiTokenLookup->m_id);
|
||||
|
||||
multiTokenLookup = multiTokenLookup->m_next.get();
|
||||
}
|
||||
}
|
||||
|
||||
if (m_config.m_read_strings && c == '\"')
|
||||
return SimpleParserValue::String(GetPreviousCharacterPos(), new std::string(ReadString()));
|
||||
|
||||
|
@ -1,5 +1,9 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
|
||||
#include "SimpleParserValue.h"
|
||||
#include "Parsing/Impl/AbstractLexer.h"
|
||||
|
||||
@ -9,15 +13,41 @@ public:
|
||||
class Config
|
||||
{
|
||||
public:
|
||||
class MultiCharacterToken
|
||||
{
|
||||
public:
|
||||
int m_id;
|
||||
std::string m_value;
|
||||
|
||||
MultiCharacterToken(int id, std::string value);
|
||||
};
|
||||
|
||||
bool m_emit_new_line_tokens;
|
||||
bool m_read_strings;
|
||||
bool m_read_numbers;
|
||||
std::vector<MultiCharacterToken> m_multi_character_tokens;
|
||||
};
|
||||
|
||||
protected:
|
||||
class MultiCharacterTokenLookupEntry
|
||||
{
|
||||
public:
|
||||
int m_id;
|
||||
std::string m_value;
|
||||
std::unique_ptr<MultiCharacterTokenLookupEntry> m_next;
|
||||
|
||||
MultiCharacterTokenLookupEntry(int id, std::string value);
|
||||
};
|
||||
|
||||
Config m_config;
|
||||
bool m_check_for_multi_character_tokens;
|
||||
int m_last_line;
|
||||
|
||||
protected:
|
||||
std::unique_ptr<MultiCharacterTokenLookupEntry> m_multi_character_token_lookup[std::numeric_limits<uint8_t>::max() + 1];
|
||||
|
||||
void AddMultiCharacterTokenConfigToLookup(Config::MultiCharacterToken tokenConfig);
|
||||
bool ReadMultiCharacterToken(const MultiCharacterTokenLookupEntry* multiTokenLookup);
|
||||
|
||||
SimpleParserValue GetNextToken() override;
|
||||
|
||||
public:
|
||||
|
@ -27,6 +27,13 @@ SimpleParserValue SimpleParserValue::Character(const TokenPos pos, const char c)
|
||||
return pv;
|
||||
}
|
||||
|
||||
SimpleParserValue SimpleParserValue::MultiCharacter(const TokenPos pos, const int multiCharacterSequenceId)
|
||||
{
|
||||
SimpleParserValue pv(pos, SimpleParserValueType::MULTI_CHARACTER);
|
||||
pv.m_value.multi_character_sequence_id = multiCharacterSequenceId;
|
||||
return pv;
|
||||
}
|
||||
|
||||
SimpleParserValue SimpleParserValue::Integer(const TokenPos pos, const int value)
|
||||
{
|
||||
SimpleParserValue pv(pos, SimpleParserValueType::INTEGER);
|
||||
@ -116,6 +123,12 @@ char SimpleParserValue::CharacterValue() const
|
||||
return m_value.char_value;
|
||||
}
|
||||
|
||||
int SimpleParserValue::MultiCharacterValue() const
|
||||
{
|
||||
assert(m_type == SimpleParserValueType::MULTI_CHARACTER);
|
||||
return m_value.multi_character_sequence_id;
|
||||
}
|
||||
|
||||
int SimpleParserValue::IntegerValue() const
|
||||
{
|
||||
assert(m_type == SimpleParserValueType::INTEGER);
|
||||
|
@ -13,8 +13,9 @@ enum class SimpleParserValueType
|
||||
END_OF_FILE,
|
||||
NEW_LINE,
|
||||
|
||||
// Single character
|
||||
// Character sequences
|
||||
CHARACTER,
|
||||
MULTI_CHARACTER,
|
||||
|
||||
// Generic token types
|
||||
INTEGER,
|
||||
@ -36,6 +37,7 @@ public:
|
||||
{
|
||||
char char_value;
|
||||
int int_value;
|
||||
int multi_character_sequence_id;
|
||||
double double_value;
|
||||
std::string* string_value;
|
||||
} m_value;
|
||||
@ -44,6 +46,7 @@ public:
|
||||
static SimpleParserValue EndOfFile(TokenPos pos);
|
||||
static SimpleParserValue NewLine(TokenPos pos);
|
||||
static SimpleParserValue Character(TokenPos pos, char c);
|
||||
static SimpleParserValue MultiCharacter(TokenPos pos, int multiCharacterSequenceId);
|
||||
static SimpleParserValue Integer(TokenPos pos, int value);
|
||||
static SimpleParserValue FloatingPoint(TokenPos pos, double value);
|
||||
static SimpleParserValue String(TokenPos pos, std::string* stringValue);
|
||||
@ -63,6 +66,7 @@ public:
|
||||
_NODISCARD const TokenPos& GetPos() const override;
|
||||
|
||||
_NODISCARD char CharacterValue() const;
|
||||
_NODISCARD int MultiCharacterValue() const;
|
||||
_NODISCARD int IntegerValue() const;
|
||||
_NODISCARD double FloatingPointValue() const;
|
||||
_NODISCARD std::string& StringValue() const;
|
||||
|
Loading…
x
Reference in New Issue
Block a user