From 7a7126f3b87d444039237d001fe8faa36d5f77e2 Mon Sep 17 00:00:00 2001
From: ISSOtm <eldredhabert0@gmail.com>
Date: Sat, 21 May 2022 12:57:46 +0200
Subject: [PATCH] Implement bit flipping with a lookup table

Should improve performance.
This version is cooler, and also does not suffer from iteration limits
---
 include/gfx/main.hpp | 20 ++++++++++++++------
 src/gfx/process.cpp  |  6 +++---
 src/gfx/reverse.cpp  |  4 ++--
 3 files changed, 19 insertions(+), 11 deletions(-)
diff --git a/include/gfx/main.hpp b/include/gfx/main.hpp
index dfed320d..a85b929f 100644
--- a/include/gfx/main.hpp
+++ b/include/gfx/main.hpp
@@ -13,6 +13,7 @@
 #include <limits.h>
 #include <stdint.h>
 #include <string>
+#include <utility>
 #include <vector>
 
 #include "helpers.h"
@@ -87,12 +88,19 @@ struct Palette {
 	uint8_t size() const;
 };
 
-static constexpr uint8_t flip(uint8_t byte) {
-	// To flip all the bits, we'll flip both nibbles, then each nibble half, etc.
-	byte = (byte & 0b0000'1111) << 4 | (byte & 0b1111'0000) >> 4;
-	byte = (byte & 0b0011'0011) << 2 | (byte & 0b1100'1100) >> 2;
-	byte = (byte & 0b0101'0101) << 1 | (byte & 0b1010'1010) >> 1;
-	return byte;
+namespace detail {
+template<typename T, T... i>
+static constexpr auto flipTable(std::integer_sequence<T, i...>) {
+	return std::array{[](uint8_t byte) {
+		// To flip all the bits, we'll flip both nibbles, then each nibble half, etc.
+		byte = (byte & 0b0000'1111) << 4 | (byte & 0b1111'0000) >> 4;
+		byte = (byte & 0b0011'0011) << 2 | (byte & 0b1100'1100) >> 2;
+		byte = (byte & 0b0101'0101) << 1 | (byte & 0b1010'1010) >> 1;
+		return byte;
+	}(i)...};
 }
+}
+// Flipping tends to happen fairly often, so take a bite out of dcache to speed it up
+static constexpr auto flipTable = detail::flipTable(std::make_integer_sequence<uint16_t, 256>());
 
 #endif /* RGBDS_GFX_MAIN_HPP */
diff --git a/src/gfx/process.cpp b/src/gfx/process.cpp
index 8b15f676..a857c688 100644
--- a/src/gfx/process.cpp
+++ b/src/gfx/process.cpp
@@ -652,7 +652,7 @@ public:
 				// Count the line itself as mirrorred; vertical mirroring is
 				// already taken care of because the symmetric line will be XOR'd
 				// the same way. (...which is a problem, but probably benign.)
-				_hash ^= flip(bitplanes >> 8) << 8 | flip(bitplanes & 0xFF);
+				_hash ^= flipTable[bitplanes >> 8] << 8 | flipTable[bitplanes & 0xFF];
 			}
 		}
 	}
@@ -680,7 +680,7 @@ public:
 
 		// Check if we have horizontal mirroring, which scans the array forward again
 		if (std::equal(_data.begin(), _data.end(), other._data.begin(),
-		               [](uint8_t lhs, uint8_t rhs) { return lhs == flip(rhs); })) {
+		               [](uint8_t lhs, uint8_t rhs) { return lhs == flipTable[rhs]; })) {
 			return MatchType::HFLIP;
 		}
 
@@ -694,7 +694,7 @@ public:
 			if (lhs != rhs) {
 				hasVFlip = false;
 			}
-			if (lhs != flip(rhs)) {
+			if (lhs != flipTable[rhs]) {
 				hasVHFlip = false;
 			}
 			if (!hasVFlip && !hasVHFlip) {
diff --git a/src/gfx/reverse.cpp b/src/gfx/reverse.cpp
index 81f693c6..4ffe1b1c 100644
--- a/src/gfx/reverse.cpp
+++ b/src/gfx/reverse.cpp
@@ -271,8 +271,8 @@ void reverse() {
 				uint8_t realY = attribute & 0x40 ? 7 - y : y;
 				uint8_t bitplane0 = tileData[realY * 2], bitplane1 = tileData[realY * 2 + 1];
 				if (attribute & 0x20) { // Handle horizontal flip
-					bitplane0 = flip(bitplane0);
-					bitplane1 = flip(bitplane1);
+					bitplane0 = flipTable[bitplane0];
+					bitplane1 = flipTable[bitplane1];
 				}
 				uint8_t *ptr = &rowPtrs[y][tx * 8 * SIZEOF_PIXEL];
 				for (uint8_t x = 0; x < 8; ++x) {