Fix SimpleLexer not being able to read escaped strings

This commit is contained in:
Jan 2023-09-23 14:28:59 +02:00
parent 7d0abaf256
commit 88bc1c1056
6 changed files with 154 additions and 3 deletions

View File

@ -129,6 +129,7 @@ std::unique_ptr<ParsingResult> MenuFileReader::ReadMenuFile()
SimpleLexer::Config lexerConfig; SimpleLexer::Config lexerConfig;
lexerConfig.m_emit_new_line_tokens = false; lexerConfig.m_emit_new_line_tokens = false;
lexerConfig.m_read_strings = true; lexerConfig.m_read_strings = true;
lexerConfig.m_string_escape_sequences = true;
lexerConfig.m_read_integer_numbers = true; lexerConfig.m_read_integer_numbers = true;
lexerConfig.m_read_floating_point_numbers = true; lexerConfig.m_read_floating_point_numbers = true;
MenuExpressionMatchers().ApplyTokensToLexerConfig(lexerConfig); MenuExpressionMatchers().ApplyTokensToLexerConfig(lexerConfig);

View File

@ -2,11 +2,13 @@
#include <cassert> #include <cassert>
#include <deque> #include <deque>
#include <sstream>
#include "Utils/ClassUtils.h" #include "Utils/ClassUtils.h"
#include "Parsing/ILexer.h" #include "Parsing/ILexer.h"
#include "Parsing/IParserLineStream.h" #include "Parsing/IParserLineStream.h"
#include "Parsing/ParsingException.h" #include "Parsing/ParsingException.h"
#include "Utils/StringUtils.h"
template <typename TokenType> template <typename TokenType>
class AbstractLexer : public ILexer<TokenType> class AbstractLexer : public ILexer<TokenType>
@ -140,6 +142,52 @@ protected:
return std::string(currentLine.m_line, startPos, m_current_line_offset - startPos); return std::string(currentLine.m_line, startPos, m_current_line_offset - startPos);
} }
/**
* \brief Reads an identifier from the current position
* \return The value of the read identifier
*/
std::string ReadStringWithEscapeSequences()
{
const auto& currentLine = CurrentLine();
assert(m_current_line_offset >= 1);
assert(currentLine.m_line[m_current_line_offset - 1] == '"');
const auto startPos = m_current_line_offset;
const auto lineSize = currentLine.m_line.size();
auto isEscaped = false;
auto inEscape = false;
while (true)
{
if (m_current_line_offset >= lineSize)
throw ParsingException(TokenPos(*currentLine.m_filename, currentLine.m_line_number, m_current_line_offset), "Unclosed string");
const auto c = currentLine.m_line[m_current_line_offset];
if (c == '\"' && !inEscape)
break;
if (c == '\\' && !inEscape)
{
isEscaped = true;
inEscape = true;
}
else
{
inEscape = false;
}
m_current_line_offset++;
}
std::string str(currentLine.m_line, startPos, m_current_line_offset++ - startPos);
if (!isEscaped)
return str;
std::ostringstream ss;
utils::UnescapeStringFromQuotationMarks(ss, std::move(str));
return ss.str();
}
/** /**
* \brief Reads an identifier from the current position * \brief Reads an identifier from the current position
* \return The value of the read identifier * \return The value of the read identifier

View File

@ -14,7 +14,7 @@ SimpleLexer::MultiCharacterTokenLookupEntry::MultiCharacterTokenLookupEntry(cons
SimpleLexer::SimpleLexer(IParserLineStream* stream) SimpleLexer::SimpleLexer(IParserLineStream* stream)
: AbstractLexer(stream), : AbstractLexer(stream),
m_config{false, true, true, true, {}}, m_config{false, true, false, true, true, {}},
m_check_for_multi_character_tokens(false), m_check_for_multi_character_tokens(false),
m_last_line(1) m_last_line(1)
{ {
@ -31,7 +31,7 @@ SimpleLexer::SimpleLexer(IParserLineStream* stream, Config config)
m_config.m_multi_character_tokens.clear(); m_config.m_multi_character_tokens.clear();
// If reading floating point numbers then must be reading integers // If reading floating point numbers then must be reading integers
assert(config.m_read_floating_point_numbers == false || config.m_read_floating_point_numbers == config.m_read_integer_numbers); assert(m_config.m_read_floating_point_numbers == false || m_config.m_read_floating_point_numbers == m_config.m_read_integer_numbers);
} }
void SimpleLexer::AddMultiCharacterTokenConfigToLookup(Config::MultiCharacterToken tokenConfig) void SimpleLexer::AddMultiCharacterTokenConfigToLookup(Config::MultiCharacterToken tokenConfig)
@ -121,7 +121,7 @@ SimpleParserValue SimpleLexer::GetNextToken()
} }
if (m_config.m_read_strings && c == '\"') if (m_config.m_read_strings && c == '\"')
return SimpleParserValue::String(pos, new std::string(ReadString())); return SimpleParserValue::String(pos, new std::string(m_config.m_string_escape_sequences ? ReadStringWithEscapeSequences() : ReadString()));
if (m_config.m_read_integer_numbers && (isdigit(c) || (c == '+' || c == '-' || (m_config.m_read_floating_point_numbers && c == '.')) && isdigit(PeekChar()))) if (m_config.m_read_integer_numbers && (isdigit(c) || (c == '+' || c == '-' || (m_config.m_read_floating_point_numbers && c == '.')) && isdigit(PeekChar())))
{ {

View File

@ -24,6 +24,7 @@ public:
bool m_emit_new_line_tokens; bool m_emit_new_line_tokens;
bool m_read_strings; bool m_read_strings;
bool m_string_escape_sequences;
bool m_read_integer_numbers; bool m_read_integer_numbers;
bool m_read_floating_point_numbers; bool m_read_floating_point_numbers;
std::vector<MultiCharacterToken> m_multi_character_tokens; std::vector<MultiCharacterToken> m_multi_character_tokens;

View File

@ -0,0 +1,91 @@
#include "StringUtils.h"
#include <sstream>
namespace utils
{
std::string EscapeStringForQuotationMarks(const std::string_view& str)
{
std::ostringstream ss;
EscapeStringForQuotationMarks(ss, str);
return ss.str();
}
void EscapeStringForQuotationMarks(std::ostream& stream, const std::string_view& str)
{
for (const auto& c : str)
{
switch (c)
{
case '\r':
stream << "\\r";
break;
case '\n':
stream << "\\n";
break;
case '\t':
stream << "\\t";
break;
case '\f':
stream << "\\f";
break;
case '"':
stream << "\\\"";
break;
case '\\':
stream << "\\\\";
break;
default:
stream << c;
break;
}
}
}
std::string UnescapeStringFromQuotationMarks(const std::string_view& str)
{
std::ostringstream ss;
UnescapeStringFromQuotationMarks(ss, str);
return ss.str();
}
void UnescapeStringFromQuotationMarks(std::ostream& stream, const std::string_view& str)
{
auto inEscape = false;
for (const auto& c : str)
{
if (inEscape)
{
switch (c)
{
case 'r':
stream << "\r";
break;
case 'n':
stream << "\n";
break;
case 't':
stream << "\t";
break;
case 'f':
stream << "\f";
break;
case '"':
stream << "\"";
break;
case '\\':
stream << "\\";
break;
default:
stream << c;
break;
}
inEscape = false;
}
else if (c != '\\')
stream << c;
else
inEscape = true;
}
}
}

View File

@ -0,0 +1,10 @@
#pragma once
#include <string>
namespace utils
{
std::string EscapeStringForQuotationMarks(const std::string_view& str);
void EscapeStringForQuotationMarks(std::ostream& stream, const std::string_view& str);
std::string UnescapeStringFromQuotationMarks(const std::string_view& str);
void UnescapeStringFromQuotationMarks(std::ostream& stream, const std::string_view& str);
}