mirror of
https://github.com/Laupetin/OpenAssetTools.git
synced 2025-04-20 00:02:55 +00:00
Add multicharacter tokens to simple lexer
This commit is contained in:
parent
ab7b516918
commit
c5d7d71a51
@ -29,7 +29,12 @@ void LocalizeFileReader::SetupStreamProxies()
|
|||||||
|
|
||||||
std::vector<LocalizeFileEntry> LocalizeFileReader::ReadLocalizeFile()
|
std::vector<LocalizeFileEntry> LocalizeFileReader::ReadLocalizeFile()
|
||||||
{
|
{
|
||||||
const auto lexer = std::make_unique<SimpleLexer>(m_stream, SimpleLexer::Config{true, true, false});
|
SimpleLexer::Config lexerConfig;
|
||||||
|
lexerConfig.m_emit_new_line_tokens = true;
|
||||||
|
lexerConfig.m_read_strings = true;
|
||||||
|
lexerConfig.m_read_numbers = false;
|
||||||
|
const auto lexer = std::make_unique<SimpleLexer>(m_stream, std::move(lexerConfig));
|
||||||
|
|
||||||
const auto parser = std::make_unique<LocalizeFileParser>(lexer.get(), m_language);
|
const auto parser = std::make_unique<LocalizeFileParser>(lexer.get(), m_language);
|
||||||
|
|
||||||
if (parser->Parse())
|
if (parser->Parse())
|
||||||
|
@ -111,7 +111,12 @@ std::unique_ptr<ParsingResult> MenuFileReader::CreateParsingResult(MenuFileParse
|
|||||||
|
|
||||||
std::unique_ptr<ParsingResult> MenuFileReader::ReadMenuFile()
|
std::unique_ptr<ParsingResult> MenuFileReader::ReadMenuFile()
|
||||||
{
|
{
|
||||||
const auto lexer = std::make_unique<SimpleLexer>(m_stream, SimpleLexer::Config{false, true, true});
|
SimpleLexer::Config lexerConfig;
|
||||||
|
lexerConfig.m_emit_new_line_tokens = false;
|
||||||
|
lexerConfig.m_read_strings = true;
|
||||||
|
lexerConfig.m_read_numbers = true;
|
||||||
|
const auto lexer = std::make_unique<SimpleLexer>(m_stream, std::move(lexerConfig));
|
||||||
|
|
||||||
const auto parser = std::make_unique<MenuFileParser>(lexer.get(), m_feature_level);
|
const auto parser = std::make_unique<MenuFileParser>(lexer.get(), m_feature_level);
|
||||||
|
|
||||||
if (!parser->Parse())
|
if (!parser->Parse())
|
||||||
|
@ -66,7 +66,12 @@ std::vector<std::string> AbstractMenuDumper::CreateScriptTokenList(const char* s
|
|||||||
const std::string scriptString(script);
|
const std::string scriptString(script);
|
||||||
std::istringstream stringStream(scriptString);
|
std::istringstream stringStream(scriptString);
|
||||||
ParserSingleInputStream inputStream(stringStream, "MenuScript");
|
ParserSingleInputStream inputStream(stringStream, "MenuScript");
|
||||||
SimpleLexer lexer(&inputStream, SimpleLexer::Config{false, true, false});
|
|
||||||
|
SimpleLexer::Config lexerConfig;
|
||||||
|
lexerConfig.m_emit_new_line_tokens = false;
|
||||||
|
lexerConfig.m_read_strings = true;
|
||||||
|
lexerConfig.m_read_numbers = false;
|
||||||
|
SimpleLexer lexer(&inputStream, std::move(lexerConfig));
|
||||||
|
|
||||||
std::vector<std::string> result;
|
std::vector<std::string> result;
|
||||||
auto hasLexerTokens = true;
|
auto hasLexerTokens = true;
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
#include "SimpleMatcherKeyword.h"
|
#include "SimpleMatcherKeyword.h"
|
||||||
#include "SimpleMatcherKeywordIgnoreCase.h"
|
#include "SimpleMatcherKeywordIgnoreCase.h"
|
||||||
#include "SimpleMatcherKeywordPrefix.h"
|
#include "SimpleMatcherKeywordPrefix.h"
|
||||||
|
#include "SimpleMatcherMultiCharacter.h"
|
||||||
#include "SimpleMatcherValueType.h"
|
#include "SimpleMatcherValueType.h"
|
||||||
|
|
||||||
SimpleMatcherFactory::SimpleMatcherFactory(const IMatcherForLabelSupplier<SimpleParserValue>* labelSupplier)
|
SimpleMatcherFactory::SimpleMatcherFactory(const IMatcherForLabelSupplier<SimpleParserValue>* labelSupplier)
|
||||||
@ -57,6 +58,11 @@ MatcherFactoryWrapper<SimpleParserValue> SimpleMatcherFactory::Char(char c) cons
|
|||||||
return MatcherFactoryWrapper<SimpleParserValue>(std::make_unique<SimpleMatcherCharacter>(c));
|
return MatcherFactoryWrapper<SimpleParserValue>(std::make_unique<SimpleMatcherCharacter>(c));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MatcherFactoryWrapper<SimpleParserValue> SimpleMatcherFactory::MultiChar(int multiCharacterSequenceId) const
|
||||||
|
{
|
||||||
|
return MatcherFactoryWrapper<SimpleParserValue>(std::make_unique<SimpleMatcherMultiCharacter>(multiCharacterSequenceId));
|
||||||
|
}
|
||||||
|
|
||||||
MatcherFactoryWrapper<SimpleParserValue> SimpleMatcherFactory::AnyCharBesides(std::vector<char> chars) const
|
MatcherFactoryWrapper<SimpleParserValue> SimpleMatcherFactory::AnyCharBesides(std::vector<char> chars) const
|
||||||
{
|
{
|
||||||
return MatcherFactoryWrapper<SimpleParserValue>(std::make_unique<SimpleMatcherAnyCharacterBesides>(std::move(chars)));
|
return MatcherFactoryWrapper<SimpleParserValue>(std::make_unique<SimpleMatcherAnyCharacterBesides>(std::move(chars)));
|
||||||
|
@ -19,5 +19,6 @@ public:
|
|||||||
_NODISCARD MatcherFactoryWrapper<SimpleParserValue> Integer() const;
|
_NODISCARD MatcherFactoryWrapper<SimpleParserValue> Integer() const;
|
||||||
_NODISCARD MatcherFactoryWrapper<SimpleParserValue> FloatingPoint() const;
|
_NODISCARD MatcherFactoryWrapper<SimpleParserValue> FloatingPoint() const;
|
||||||
_NODISCARD MatcherFactoryWrapper<SimpleParserValue> Char(char c) const;
|
_NODISCARD MatcherFactoryWrapper<SimpleParserValue> Char(char c) const;
|
||||||
|
_NODISCARD MatcherFactoryWrapper<SimpleParserValue> MultiChar(int multiCharacterSequenceId) const;
|
||||||
_NODISCARD MatcherFactoryWrapper<SimpleParserValue> AnyCharBesides(std::vector<char> chars) const;
|
_NODISCARD MatcherFactoryWrapper<SimpleParserValue> AnyCharBesides(std::vector<char> chars) const;
|
||||||
};
|
};
|
||||||
|
@ -0,0 +1,14 @@
|
|||||||
|
#include "SimpleMatcherMultiCharacter.h"
|
||||||
|
|
||||||
|
SimpleMatcherMultiCharacter::SimpleMatcherMultiCharacter(const int multiCharacterSequenceId)
|
||||||
|
: m_multi_character_sequence_id(multiCharacterSequenceId)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
MatcherResult<SimpleParserValue> SimpleMatcherMultiCharacter::CanMatch(ILexer<SimpleParserValue>* lexer, const unsigned tokenOffset)
|
||||||
|
{
|
||||||
|
const auto& token = lexer->GetToken(tokenOffset);
|
||||||
|
return token.m_type == SimpleParserValueType::MULTI_CHARACTER && token.MultiCharacterValue() == m_multi_character_sequence_id
|
||||||
|
? MatcherResult<SimpleParserValue>::Match(1)
|
||||||
|
: MatcherResult<SimpleParserValue>::NoMatch();
|
||||||
|
}
|
@ -0,0 +1,15 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "Parsing/Simple/SimpleParserValue.h"
|
||||||
|
#include "Parsing/Matcher/AbstractMatcher.h"
|
||||||
|
|
||||||
|
class SimpleMatcherMultiCharacter final : public AbstractMatcher<SimpleParserValue>
|
||||||
|
{
|
||||||
|
int m_multi_character_sequence_id;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
MatcherResult<SimpleParserValue> CanMatch(ILexer<SimpleParserValue>* lexer, unsigned tokenOffset) override;
|
||||||
|
|
||||||
|
public:
|
||||||
|
explicit SimpleMatcherMultiCharacter(int multiCharacterSequenceId);
|
||||||
|
};
|
@ -1,17 +1,77 @@
|
|||||||
#include "SimpleLexer.h"
|
#include "SimpleLexer.h"
|
||||||
|
|
||||||
|
SimpleLexer::Config::MultiCharacterToken::MultiCharacterToken(const int id, std::string value)
|
||||||
|
: m_id(id),
|
||||||
|
m_value(std::move(value))
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
SimpleLexer::MultiCharacterTokenLookupEntry::MultiCharacterTokenLookupEntry(const int id, std::string value)
|
||||||
|
: m_id(id),
|
||||||
|
m_value(std::move(value))
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
SimpleLexer::SimpleLexer(IParserLineStream* stream)
|
SimpleLexer::SimpleLexer(IParserLineStream* stream)
|
||||||
: AbstractLexer(stream),
|
: AbstractLexer(stream),
|
||||||
m_config{false, true, true},
|
m_config{false, true, true, {}},
|
||||||
m_last_line(1)
|
m_check_for_multi_character_tokens(false),
|
||||||
|
m_last_line(1)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
SimpleLexer::SimpleLexer(IParserLineStream* stream, Config config)
|
SimpleLexer::SimpleLexer(IParserLineStream* stream, Config config)
|
||||||
: AbstractLexer(stream),
|
: AbstractLexer(stream),
|
||||||
m_config(config),
|
m_config(std::move(config)),
|
||||||
m_last_line(1)
|
m_check_for_multi_character_tokens(false),
|
||||||
|
m_last_line(1)
|
||||||
{
|
{
|
||||||
|
for (auto tokenConfig : m_config.m_multi_character_tokens)
|
||||||
|
AddMultiCharacterTokenConfigToLookup(std::move(tokenConfig));
|
||||||
|
m_config.m_multi_character_tokens.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
void SimpleLexer::AddMultiCharacterTokenConfigToLookup(Config::MultiCharacterToken tokenConfig)
|
||||||
|
{
|
||||||
|
if (tokenConfig.m_value.empty())
|
||||||
|
return;
|
||||||
|
|
||||||
|
m_check_for_multi_character_tokens = true;
|
||||||
|
const auto firstCharacterValue = static_cast<uint8_t>(tokenConfig.m_value[0]);
|
||||||
|
|
||||||
|
if (m_multi_character_token_lookup[firstCharacterValue])
|
||||||
|
{
|
||||||
|
auto* currentEntry = m_multi_character_token_lookup[firstCharacterValue].get();
|
||||||
|
while (currentEntry->m_next)
|
||||||
|
currentEntry = currentEntry->m_next.get();
|
||||||
|
|
||||||
|
currentEntry->m_next = std::make_unique<MultiCharacterTokenLookupEntry>(tokenConfig.m_id, std::move(tokenConfig.m_value));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
m_multi_character_token_lookup[firstCharacterValue] = std::make_unique<MultiCharacterTokenLookupEntry>(tokenConfig.m_id, std::move(tokenConfig.m_value));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SimpleLexer::ReadMultiCharacterToken(const MultiCharacterTokenLookupEntry* multiTokenLookup)
|
||||||
|
{
|
||||||
|
const auto& currentLine = CurrentLine();
|
||||||
|
assert(m_current_line_offset >= 1);
|
||||||
|
assert(multiTokenLookup);
|
||||||
|
assert(!multiTokenLookup->m_value.empty());
|
||||||
|
assert(currentLine.m_line[m_current_line_offset - 1] == multiTokenLookup->m_value[0]);
|
||||||
|
|
||||||
|
const char* linePos = ¤tLine.m_line[m_current_line_offset - 1];
|
||||||
|
|
||||||
|
for (const auto c : multiTokenLookup->m_value)
|
||||||
|
{
|
||||||
|
if (!*linePos || *linePos != c)
|
||||||
|
return false;
|
||||||
|
linePos++;
|
||||||
|
}
|
||||||
|
|
||||||
|
m_current_line_offset = m_current_line_offset - 1 + multiTokenLookup->m_value.size();
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
SimpleParserValue SimpleLexer::GetNextToken()
|
SimpleParserValue SimpleLexer::GetNextToken()
|
||||||
@ -32,7 +92,7 @@ SimpleParserValue SimpleLexer::GetNextToken()
|
|||||||
NextChar();
|
NextChar();
|
||||||
c = PeekChar();
|
c = PeekChar();
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto pos = GetNextCharacterPos();
|
const auto pos = GetNextCharacterPos();
|
||||||
if (m_config.m_emit_new_line_tokens && pos.m_line > m_last_line)
|
if (m_config.m_emit_new_line_tokens && pos.m_line > m_last_line)
|
||||||
{
|
{
|
||||||
@ -45,9 +105,21 @@ SimpleParserValue SimpleLexer::GetNextToken()
|
|||||||
if (c == EOF)
|
if (c == EOF)
|
||||||
return SimpleParserValue::EndOfFile(TokenPos());
|
return SimpleParserValue::EndOfFile(TokenPos());
|
||||||
|
|
||||||
|
if(m_check_for_multi_character_tokens)
|
||||||
|
{
|
||||||
|
const auto* multiTokenLookup = m_multi_character_token_lookup[static_cast<uint8_t>(c)].get();
|
||||||
|
while(multiTokenLookup)
|
||||||
|
{
|
||||||
|
if(ReadMultiCharacterToken(multiTokenLookup))
|
||||||
|
return SimpleParserValue::MultiCharacter(pos, multiTokenLookup->m_id);
|
||||||
|
|
||||||
|
multiTokenLookup = multiTokenLookup->m_next.get();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (m_config.m_read_strings && c == '\"')
|
if (m_config.m_read_strings && c == '\"')
|
||||||
return SimpleParserValue::String(GetPreviousCharacterPos(), new std::string(ReadString()));
|
return SimpleParserValue::String(GetPreviousCharacterPos(), new std::string(ReadString()));
|
||||||
|
|
||||||
if (m_config.m_read_numbers && (isdigit(c) || c == '.' && isdigit(PeekChar())))
|
if (m_config.m_read_numbers && (isdigit(c) || c == '.' && isdigit(PeekChar())))
|
||||||
{
|
{
|
||||||
bool isFloatingPointValue;
|
bool isFloatingPointValue;
|
||||||
|
@ -1,5 +1,9 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
#include <limits>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
#include "SimpleParserValue.h"
|
#include "SimpleParserValue.h"
|
||||||
#include "Parsing/Impl/AbstractLexer.h"
|
#include "Parsing/Impl/AbstractLexer.h"
|
||||||
|
|
||||||
@ -9,15 +13,41 @@ public:
|
|||||||
class Config
|
class Config
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
class MultiCharacterToken
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
int m_id;
|
||||||
|
std::string m_value;
|
||||||
|
|
||||||
|
MultiCharacterToken(int id, std::string value);
|
||||||
|
};
|
||||||
|
|
||||||
bool m_emit_new_line_tokens;
|
bool m_emit_new_line_tokens;
|
||||||
bool m_read_strings;
|
bool m_read_strings;
|
||||||
bool m_read_numbers;
|
bool m_read_numbers;
|
||||||
|
std::vector<MultiCharacterToken> m_multi_character_tokens;
|
||||||
|
};
|
||||||
|
|
||||||
|
protected:
|
||||||
|
class MultiCharacterTokenLookupEntry
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
int m_id;
|
||||||
|
std::string m_value;
|
||||||
|
std::unique_ptr<MultiCharacterTokenLookupEntry> m_next;
|
||||||
|
|
||||||
|
MultiCharacterTokenLookupEntry(int id, std::string value);
|
||||||
};
|
};
|
||||||
|
|
||||||
Config m_config;
|
Config m_config;
|
||||||
|
bool m_check_for_multi_character_tokens;
|
||||||
int m_last_line;
|
int m_last_line;
|
||||||
|
|
||||||
protected:
|
std::unique_ptr<MultiCharacterTokenLookupEntry> m_multi_character_token_lookup[std::numeric_limits<uint8_t>::max() + 1];
|
||||||
|
|
||||||
|
void AddMultiCharacterTokenConfigToLookup(Config::MultiCharacterToken tokenConfig);
|
||||||
|
bool ReadMultiCharacterToken(const MultiCharacterTokenLookupEntry* multiTokenLookup);
|
||||||
|
|
||||||
SimpleParserValue GetNextToken() override;
|
SimpleParserValue GetNextToken() override;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
@ -27,6 +27,13 @@ SimpleParserValue SimpleParserValue::Character(const TokenPos pos, const char c)
|
|||||||
return pv;
|
return pv;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SimpleParserValue SimpleParserValue::MultiCharacter(const TokenPos pos, const int multiCharacterSequenceId)
|
||||||
|
{
|
||||||
|
SimpleParserValue pv(pos, SimpleParserValueType::MULTI_CHARACTER);
|
||||||
|
pv.m_value.multi_character_sequence_id = multiCharacterSequenceId;
|
||||||
|
return pv;
|
||||||
|
}
|
||||||
|
|
||||||
SimpleParserValue SimpleParserValue::Integer(const TokenPos pos, const int value)
|
SimpleParserValue SimpleParserValue::Integer(const TokenPos pos, const int value)
|
||||||
{
|
{
|
||||||
SimpleParserValue pv(pos, SimpleParserValueType::INTEGER);
|
SimpleParserValue pv(pos, SimpleParserValueType::INTEGER);
|
||||||
@ -116,6 +123,12 @@ char SimpleParserValue::CharacterValue() const
|
|||||||
return m_value.char_value;
|
return m_value.char_value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int SimpleParserValue::MultiCharacterValue() const
|
||||||
|
{
|
||||||
|
assert(m_type == SimpleParserValueType::MULTI_CHARACTER);
|
||||||
|
return m_value.multi_character_sequence_id;
|
||||||
|
}
|
||||||
|
|
||||||
int SimpleParserValue::IntegerValue() const
|
int SimpleParserValue::IntegerValue() const
|
||||||
{
|
{
|
||||||
assert(m_type == SimpleParserValueType::INTEGER);
|
assert(m_type == SimpleParserValueType::INTEGER);
|
||||||
|
@ -13,8 +13,9 @@ enum class SimpleParserValueType
|
|||||||
END_OF_FILE,
|
END_OF_FILE,
|
||||||
NEW_LINE,
|
NEW_LINE,
|
||||||
|
|
||||||
// Single character
|
// Character sequences
|
||||||
CHARACTER,
|
CHARACTER,
|
||||||
|
MULTI_CHARACTER,
|
||||||
|
|
||||||
// Generic token types
|
// Generic token types
|
||||||
INTEGER,
|
INTEGER,
|
||||||
@ -36,6 +37,7 @@ public:
|
|||||||
{
|
{
|
||||||
char char_value;
|
char char_value;
|
||||||
int int_value;
|
int int_value;
|
||||||
|
int multi_character_sequence_id;
|
||||||
double double_value;
|
double double_value;
|
||||||
std::string* string_value;
|
std::string* string_value;
|
||||||
} m_value;
|
} m_value;
|
||||||
@ -44,6 +46,7 @@ public:
|
|||||||
static SimpleParserValue EndOfFile(TokenPos pos);
|
static SimpleParserValue EndOfFile(TokenPos pos);
|
||||||
static SimpleParserValue NewLine(TokenPos pos);
|
static SimpleParserValue NewLine(TokenPos pos);
|
||||||
static SimpleParserValue Character(TokenPos pos, char c);
|
static SimpleParserValue Character(TokenPos pos, char c);
|
||||||
|
static SimpleParserValue MultiCharacter(TokenPos pos, int multiCharacterSequenceId);
|
||||||
static SimpleParserValue Integer(TokenPos pos, int value);
|
static SimpleParserValue Integer(TokenPos pos, int value);
|
||||||
static SimpleParserValue FloatingPoint(TokenPos pos, double value);
|
static SimpleParserValue FloatingPoint(TokenPos pos, double value);
|
||||||
static SimpleParserValue String(TokenPos pos, std::string* stringValue);
|
static SimpleParserValue String(TokenPos pos, std::string* stringValue);
|
||||||
@ -63,6 +66,7 @@ public:
|
|||||||
_NODISCARD const TokenPos& GetPos() const override;
|
_NODISCARD const TokenPos& GetPos() const override;
|
||||||
|
|
||||||
_NODISCARD char CharacterValue() const;
|
_NODISCARD char CharacterValue() const;
|
||||||
|
_NODISCARD int MultiCharacterValue() const;
|
||||||
_NODISCARD int IntegerValue() const;
|
_NODISCARD int IntegerValue() const;
|
||||||
_NODISCARD double FloatingPointValue() const;
|
_NODISCARD double FloatingPointValue() const;
|
||||||
_NODISCARD std::string& StringValue() const;
|
_NODISCARD std::string& StringValue() const;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user