From 88bc1c1056b29688a2460128c6ac98a7645cfbfd Mon Sep 17 00:00:00 2001 From: Jan Date: Sat, 23 Sep 2023 14:28:59 +0200 Subject: [PATCH] Fix SimpleLexer not being able to read escaped strings --- .../Parsing/Menu/MenuFileReader.cpp | 1 + src/Parser/Parsing/Impl/AbstractLexer.h | 48 ++++++++++ src/Parser/Parsing/Simple/SimpleLexer.cpp | 6 +- src/Parser/Parsing/Simple/SimpleLexer.h | 1 + src/Utils/Utils/StringUtils.cpp | 91 +++++++++++++++++++ src/Utils/Utils/StringUtils.h | 10 ++ 6 files changed, 154 insertions(+), 3 deletions(-) create mode 100644 src/Utils/Utils/StringUtils.cpp create mode 100644 src/Utils/Utils/StringUtils.h diff --git a/src/ObjLoading/Parsing/Menu/MenuFileReader.cpp b/src/ObjLoading/Parsing/Menu/MenuFileReader.cpp index edbc58d3..d64eff63 100644 --- a/src/ObjLoading/Parsing/Menu/MenuFileReader.cpp +++ b/src/ObjLoading/Parsing/Menu/MenuFileReader.cpp @@ -129,6 +129,7 @@ std::unique_ptr MenuFileReader::ReadMenuFile() SimpleLexer::Config lexerConfig; lexerConfig.m_emit_new_line_tokens = false; lexerConfig.m_read_strings = true; + lexerConfig.m_string_escape_sequences = true; lexerConfig.m_read_integer_numbers = true; lexerConfig.m_read_floating_point_numbers = true; MenuExpressionMatchers().ApplyTokensToLexerConfig(lexerConfig); diff --git a/src/Parser/Parsing/Impl/AbstractLexer.h b/src/Parser/Parsing/Impl/AbstractLexer.h index dbf1b12b..dfbb91c8 100644 --- a/src/Parser/Parsing/Impl/AbstractLexer.h +++ b/src/Parser/Parsing/Impl/AbstractLexer.h @@ -2,11 +2,13 @@ #include #include +#include #include "Utils/ClassUtils.h" #include "Parsing/ILexer.h" #include "Parsing/IParserLineStream.h" #include "Parsing/ParsingException.h" +#include "Utils/StringUtils.h" template class AbstractLexer : public ILexer @@ -140,6 +142,52 @@ protected: return std::string(currentLine.m_line, startPos, m_current_line_offset - startPos); } + /** + * \brief Reads an identifier from the current position + * \return The value of the read identifier + */ + std::string ReadStringWithEscapeSequences() + { + const auto& currentLine = CurrentLine(); + assert(m_current_line_offset >= 1); + assert(currentLine.m_line[m_current_line_offset - 1] == '"'); + + const auto startPos = m_current_line_offset; + const auto lineSize = currentLine.m_line.size(); + auto isEscaped = false; + auto inEscape = false; + while (true) + { + if (m_current_line_offset >= lineSize) + throw ParsingException(TokenPos(*currentLine.m_filename, currentLine.m_line_number, m_current_line_offset), "Unclosed string"); + + const auto c = currentLine.m_line[m_current_line_offset]; + + if (c == '\"' && !inEscape) + break; + + if (c == '\\' && !inEscape) + { + isEscaped = true; + inEscape = true; + } + else + { + inEscape = false; + } + + m_current_line_offset++; + } + + std::string str(currentLine.m_line, startPos, m_current_line_offset++ - startPos); + if (!isEscaped) + return str; + + std::ostringstream ss; + utils::UnescapeStringFromQuotationMarks(ss, std::move(str)); + return ss.str(); + } + /** * \brief Reads an identifier from the current position * \return The value of the read identifier diff --git a/src/Parser/Parsing/Simple/SimpleLexer.cpp b/src/Parser/Parsing/Simple/SimpleLexer.cpp index b5e0775b..719bad83 100644 --- a/src/Parser/Parsing/Simple/SimpleLexer.cpp +++ b/src/Parser/Parsing/Simple/SimpleLexer.cpp @@ -14,7 +14,7 @@ SimpleLexer::MultiCharacterTokenLookupEntry::MultiCharacterTokenLookupEntry(cons SimpleLexer::SimpleLexer(IParserLineStream* stream) : AbstractLexer(stream), - m_config{false, true, true, true, {}}, + m_config{false, true, false, true, true, {}}, m_check_for_multi_character_tokens(false), m_last_line(1) { @@ -31,7 +31,7 @@ SimpleLexer::SimpleLexer(IParserLineStream* stream, Config config) m_config.m_multi_character_tokens.clear(); // If reading floating point numbers then must be reading integers - assert(config.m_read_floating_point_numbers == false || config.m_read_floating_point_numbers == config.m_read_integer_numbers); + assert(m_config.m_read_floating_point_numbers == false || m_config.m_read_floating_point_numbers == m_config.m_read_integer_numbers); } void SimpleLexer::AddMultiCharacterTokenConfigToLookup(Config::MultiCharacterToken tokenConfig) @@ -121,7 +121,7 @@ SimpleParserValue SimpleLexer::GetNextToken() } if (m_config.m_read_strings && c == '\"') - return SimpleParserValue::String(pos, new std::string(ReadString())); + return SimpleParserValue::String(pos, new std::string(m_config.m_string_escape_sequences ? ReadStringWithEscapeSequences() : ReadString())); if (m_config.m_read_integer_numbers && (isdigit(c) || (c == '+' || c == '-' || (m_config.m_read_floating_point_numbers && c == '.')) && isdigit(PeekChar()))) { diff --git a/src/Parser/Parsing/Simple/SimpleLexer.h b/src/Parser/Parsing/Simple/SimpleLexer.h index 7563e2db..8a4315d5 100644 --- a/src/Parser/Parsing/Simple/SimpleLexer.h +++ b/src/Parser/Parsing/Simple/SimpleLexer.h @@ -24,6 +24,7 @@ public: bool m_emit_new_line_tokens; bool m_read_strings; + bool m_string_escape_sequences; bool m_read_integer_numbers; bool m_read_floating_point_numbers; std::vector m_multi_character_tokens; diff --git a/src/Utils/Utils/StringUtils.cpp b/src/Utils/Utils/StringUtils.cpp new file mode 100644 index 00000000..e7f84d9a --- /dev/null +++ b/src/Utils/Utils/StringUtils.cpp @@ -0,0 +1,91 @@ +#include "StringUtils.h" + +#include + +namespace utils +{ + std::string EscapeStringForQuotationMarks(const std::string_view& str) + { + std::ostringstream ss; + EscapeStringForQuotationMarks(ss, str); + return ss.str(); + } + + void EscapeStringForQuotationMarks(std::ostream& stream, const std::string_view& str) + { + for (const auto& c : str) + { + switch (c) + { + case '\r': + stream << "\\r"; + break; + case '\n': + stream << "\\n"; + break; + case '\t': + stream << "\\t"; + break; + case '\f': + stream << "\\f"; + break; + case '"': + stream << "\\\""; + break; + case '\\': + stream << "\\\\"; + break; + default: + stream << c; + break; + } + } + } + + std::string UnescapeStringFromQuotationMarks(const std::string_view& str) + { + std::ostringstream ss; + UnescapeStringFromQuotationMarks(ss, str); + return ss.str(); + } + + void UnescapeStringFromQuotationMarks(std::ostream& stream, const std::string_view& str) + { + auto inEscape = false; + for (const auto& c : str) + { + if (inEscape) + { + switch (c) + { + case 'r': + stream << "\r"; + break; + case 'n': + stream << "\n"; + break; + case 't': + stream << "\t"; + break; + case 'f': + stream << "\f"; + break; + case '"': + stream << "\""; + break; + case '\\': + stream << "\\"; + break; + default: + stream << c; + break; + } + inEscape = false; + } + else if (c != '\\') + stream << c; + else + inEscape = true; + } + } +} diff --git a/src/Utils/Utils/StringUtils.h b/src/Utils/Utils/StringUtils.h new file mode 100644 index 00000000..c74f141d --- /dev/null +++ b/src/Utils/Utils/StringUtils.h @@ -0,0 +1,10 @@ +#pragma once +#include + +namespace utils +{ + std::string EscapeStringForQuotationMarks(const std::string_view& str); + void EscapeStringForQuotationMarks(std::ostream& stream, const std::string_view& str); + std::string UnescapeStringFromQuotationMarks(const std::string_view& str); + void UnescapeStringFromQuotationMarks(std::ostream& stream, const std::string_view& str); +}