From 7a7126f3b87d444039237d001fe8faa36d5f77e2 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Sat, 21 May 2022 12:57:46 +0200 Subject: [PATCH] Implement bit flipping with a lookup table Should improve performance. This version is cooler, and also does not suffer from iteration limits --- include/gfx/main.hpp | 20 ++++++++++++++------ src/gfx/process.cpp | 6 +++--- src/gfx/reverse.cpp | 4 ++-- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/include/gfx/main.hpp b/include/gfx/main.hpp index dfed320d..a85b929f 100644 --- a/include/gfx/main.hpp +++ b/include/gfx/main.hpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "helpers.h" @@ -87,12 +88,19 @@ struct Palette { uint8_t size() const; }; -static constexpr uint8_t flip(uint8_t byte) { - // To flip all the bits, we'll flip both nibbles, then each nibble half, etc. - byte = (byte & 0b0000'1111) << 4 | (byte & 0b1111'0000) >> 4; - byte = (byte & 0b0011'0011) << 2 | (byte & 0b1100'1100) >> 2; - byte = (byte & 0b0101'0101) << 1 | (byte & 0b1010'1010) >> 1; - return byte; +namespace detail { +template +static constexpr auto flipTable(std::integer_sequence) { + return std::array{[](uint8_t byte) { + // To flip all the bits, we'll flip both nibbles, then each nibble half, etc. + byte = (byte & 0b0000'1111) << 4 | (byte & 0b1111'0000) >> 4; + byte = (byte & 0b0011'0011) << 2 | (byte & 0b1100'1100) >> 2; + byte = (byte & 0b0101'0101) << 1 | (byte & 0b1010'1010) >> 1; + return byte; + }(i)...}; } +} +// Flipping tends to happen fairly often, so take a bite out of dcache to speed it up +static constexpr auto flipTable = detail::flipTable(std::make_integer_sequence()); #endif /* RGBDS_GFX_MAIN_HPP */ diff --git a/src/gfx/process.cpp b/src/gfx/process.cpp index 8b15f676..a857c688 100644 --- a/src/gfx/process.cpp +++ b/src/gfx/process.cpp @@ -652,7 +652,7 @@ public: // Count the line itself as mirrorred; vertical mirroring is // already taken care of because the symmetric line will be XOR'd // the same way. (...which is a problem, but probably benign.) - _hash ^= flip(bitplanes >> 8) << 8 | flip(bitplanes & 0xFF); + _hash ^= flipTable[bitplanes >> 8] << 8 | flipTable[bitplanes & 0xFF]; } } } @@ -680,7 +680,7 @@ public: // Check if we have horizontal mirroring, which scans the array forward again if (std::equal(_data.begin(), _data.end(), other._data.begin(), - [](uint8_t lhs, uint8_t rhs) { return lhs == flip(rhs); })) { + [](uint8_t lhs, uint8_t rhs) { return lhs == flipTable[rhs]; })) { return MatchType::HFLIP; } @@ -694,7 +694,7 @@ public: if (lhs != rhs) { hasVFlip = false; } - if (lhs != flip(rhs)) { + if (lhs != flipTable[rhs]) { hasVHFlip = false; } if (!hasVFlip && !hasVHFlip) { diff --git a/src/gfx/reverse.cpp b/src/gfx/reverse.cpp index 81f693c6..4ffe1b1c 100644 --- a/src/gfx/reverse.cpp +++ b/src/gfx/reverse.cpp @@ -271,8 +271,8 @@ void reverse() { uint8_t realY = attribute & 0x40 ? 7 - y : y; uint8_t bitplane0 = tileData[realY * 2], bitplane1 = tileData[realY * 2 + 1]; if (attribute & 0x20) { // Handle horizontal flip - bitplane0 = flip(bitplane0); - bitplane1 = flip(bitplane1); + bitplane0 = flipTable[bitplane0]; + bitplane1 = flipTable[bitplane1]; } uint8_t *ptr = &rowPtrs[y][tx * 8 * SIZEOF_PIXEL]; for (uint8_t x = 0; x < 8; ++x) {