Reimplement basic RGBGFX features in C++

Currently missing from the old version: - `-f` ("fixing" the input image to be indexed) - `-m` (the code for detecting mirrored tiles is missing, but all of the "plumbing" is otherwise there) - `-C` - `-d` - `-x` (though I need to check the exact functionality the old one has) - Also the man page is still a draft and needs to be fleshed out More planned features are not implemented yet either: - Explicit palette spec - Better error messages, also error "images" - Better 8x16 support, as well as other "dedup unit" sizes - Support for arbitrary number of palettes & colors per palette - Other output formats (for example, a "full" palette map for "streaming" use cases like gb-open-world) - Quantization? Some things may also be bugged: - Transparency support - Tile offsets (not exposed yet) - Tile counts per bank (not exposed yet) ...and performance remains to be checked. We need to set up some tests, honestly.
2025-11-21 18:52:07 +00:00 · 2022-02-06 23:30:37 +01:00
parent 34bc650341
commit 8c62e80c18
23 changed files with 2022 additions and 1696 deletions
--- a/src/gfx/convert.cpp
+++ b/src/gfx/convert.cpp
@@ -0,0 +1,869 @@
+/*
+ * This file is part of RGBDS.
+ *
+ * Copyright (c) 2022, Eldred Habert and RGBDS contributors.
+ *
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "gfx/convert.hpp"
+
+#include <algorithm>
+#include <assert.h>
+#include <errno.h>
+#include <fstream>
+#include <inttypes.h>
+#include <memory>
+#include <optional>
+#include <png.h>
+#include <setjmp.h>
+#include <string.h>
+#include <tuple>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "defaultinitalloc.hpp"
+#include "helpers.h"
+
+#include "gfx/main.hpp"
+#include "gfx/pal_packing.hpp"
+#include "gfx/pal_sorting.hpp"
+#include "gfx/proto_palette.hpp"
+
+struct Rgba {
+	uint8_t red;
+	uint8_t green;
+	uint8_t blue;
+	uint8_t alpha;
+
+	Rgba(uint8_t r, uint8_t g, uint8_t b, uint8_t a) : red(r), green(g), blue(b), alpha(a) {}
+	Rgba(uint32_t rgba) : red(rgba), green(rgba >> 8), blue(rgba >> 16), alpha(rgba >> 24) {}
+
+	operator uint32_t() const {
+		auto shl = [](uint8_t val, unsigned shift) { return static_cast<uint32_t>(val) << shift; };
+		return shl(red, 0) | shl(green, 8) | shl(blue, 16) | shl(alpha, 24);
+	}
+	bool operator!=(Rgba const &other) const {
+		return static_cast<uint32_t>(*this) != static_cast<uint32_t>(other);
+	}
+
+	bool isGray() const { return red == green && green == blue; }
+
+	/**
+	 * CGB colors are RGB555, so we use bit 15 to signify that the color is transparent instead
+	 * Since the rest of the bits don't matter then, we return 0x8000 exactly.
+	 */
+	static constexpr uint16_t transparent = 0b1'00000'00000'00000;
+
+	static constexpr uint8_t transparency_threshold = 5; // TODO: adjust this
+	/**
+	 * Computes the equivalent CGB color, respects the color curve depending on options
+	 */
+	uint16_t cgbColor() const {
+		if (alpha < transparency_threshold)
+			return transparent;
+		if (options.useColorCurve) {
+			assert(!"TODO");
+		} else {
+			return (red >> 3) | (green >> 3) << 5 | (blue >> 3) << 10;
+		}
+	}
+};
+
+class ImagePalette {
+	// Use as many slots as there are CGB colors (plus transparency)
+	std::array<std::optional<Rgba>, 0x8001> _colors;
+
+public:
+	ImagePalette() = default;
+
+	void registerColor(Rgba const &rgba) {
+		decltype(_colors)::value_type &slot = _colors[rgba.cgbColor()];
+
+		if (!slot.has_value()) {
+			slot.emplace(rgba);
+		} else if (*slot != rgba) {
+			warning("Different colors melded together"); // TODO: indicate position
+		}
+	}
+
+	size_t size() const { return _colors.size(); }
+
+	auto begin() const { return _colors.begin(); }
+
+	auto end() const { return _colors.end(); }
+};
+
+class Png {
+	std::filesystem::path const &path;
+	std::filebuf file{};
+	png_structp png = nullptr;
+	png_infop info = nullptr;
+
+	// These are cached for speed
+	uint32_t width, height;
+	DefaultInitVec<uint16_t> pixels;
+	ImagePalette colors;
+	int colorType;
+	int nbColors;
+	png_colorp embeddedPal = nullptr;
+	png_bytep transparencyPal = nullptr;
+	bool isGrayOnly = true;
+
+	[[noreturn]] static void handleError(png_structp png, char const *msg) {
+		struct Png *self = reinterpret_cast<Png *>(png_get_error_ptr(png));
+
+		fatal("Error reading input image (\"%s\"): %s", self->path.c_str(), msg);
+	}
+
+	static void handleWarning(png_structp png, char const *msg) {
+		struct Png *self = reinterpret_cast<Png *>(png_get_error_ptr(png));
+
+		warning("In input image (\"%s\"): %s", self->path.c_str(), msg);
+	}
+
+	static void readData(png_structp png, png_bytep data, size_t length) {
+		struct Png *self = reinterpret_cast<Png *>(png_get_io_ptr(png));
+		std::streamsize expectedLen = length;
+		std::streamsize nbBytesRead = self->file.sgetn(reinterpret_cast<char *>(data), expectedLen);
+
+		if (nbBytesRead != expectedLen) {
+			fatal("Error reading input image (\"%s\"): file too short (expected at least %zd more "
+			      "bytes after reading %lld)",
+			      self->path.c_str(), length - nbBytesRead,
+			      self->file.pubseekoff(0, std::ios_base::cur));
+		}
+	}
+
+public:
+	ImagePalette const &getColors() const { return colors; }
+
+	int getColorType() const { return colorType; }
+
+	std::tuple<int, png_const_colorp, png_bytep> getEmbeddedPal() const {
+		return {nbColors, embeddedPal, transparencyPal};
+	}
+
+	uint32_t getWidth() const { return width; }
+
+	uint32_t getHeight() const { return height; }
+
+	uint16_t &pixel(uint32_t x, uint32_t y) { return pixels[y * width + x]; }
+
+	uint16_t const &pixel(uint32_t x, uint32_t y) const { return pixels[y * width + x]; }
+
+	bool hasNonGray() const { return !isGrayOnly; }
+
+	/**
+	 * Reads a PNG and notes all of its colors
+	 *
+	 * This code is more complicated than strictly necessary, but that's because of the API
+	 * being used: the "high-level" interface doesn't provide all the transformations we need,
+	 * so we use the "lower-level" one instead.
+	 * We also use that occasion to only read the PNG one line at a time, since we store all of
+	 * the pixel data in `pixels`, which saves on memory allocations.
+	 */
+	explicit Png(std::filesystem::path const &filePath) : path(filePath), colors() {
+		if (file.open(path, std::ios_base::in | std::ios_base::binary) == nullptr) {
+			fatal("Failed to open input image (\"%s\"): %s", path.c_str(), strerror(errno));
+		}
+
+		options.verbosePrint("Opened input file\n");
+
+		std::array<unsigned char, 8> pngHeader;
+
+		if (file.sgetn(reinterpret_cast<char *>(pngHeader.data()), pngHeader.size())
+		        != static_cast<std::streamsize>(pngHeader.size()) // Not enough bytes?
+		    || png_sig_cmp(pngHeader.data(), 0, pngHeader.size()) != 0) {
+			fatal("Input file (\"%s\") is not a PNG image!", path.c_str());
+		}
+
+		options.verbosePrint("PNG header signature is OK\n");
+
+		png = png_create_read_struct(PNG_LIBPNG_VER_STRING, (png_voidp)this, handleError,
+		                             handleWarning);
+		if (!png) {
+			fatal("Failed to allocate PNG structure: %s", strerror(errno));
+		}
+
+		info = png_create_info_struct(png);
+		if (!info) {
+			png_destroy_read_struct(&png, nullptr, nullptr);
+			fatal("Failed to allocate PNG info structure: %s", strerror(errno));
+		}
+
+		png_set_read_fn(png, this, readData);
+		png_set_sig_bytes(png, pngHeader.size());
+
+		// TODO: png_set_crc_action(png, PNG_CRC_ERROR_QUIT, PNG_CRC_WARN_DISCARD);
+
+		// Skipping chunks we don't use should improve performance
+		// TODO: png_set_keep_unknown_chunks(png, ...);
+
+		// Process all chunks up to but not including the image data
+		png_read_info(png, info);
+
+		int bitDepth, interlaceType; //, compressionType, filterMethod;
+
+		png_get_IHDR(png, info, &width, &height, &bitDepth, &colorType, &interlaceType, nullptr,
+		             nullptr);
+
+		if (width % 8 != 0)
+			fatal("Image width (%" PRIu32 " pixels) is not a multiple of 8!", width);
+		if (height % 8 != 0)
+			fatal("Image height (%" PRIu32 " pixels) is not a multiple of 8!", height);
+
+		// TODO: use an allocator that doesn't zero on init to save potentially a lot of perf
+		// https://stackoverflow.com/questions/21028299/is-this-behavior-of-vectorresizesize-type-n-under-c11-and-boost-container/21028912#21028912
+		pixels.resize(static_cast<size_t>(width) * static_cast<size_t>(height));
+
+		auto colorTypeName = [this]() {
+			switch (colorType) {
+			case PNG_COLOR_TYPE_GRAY:
+				return "grayscale";
+			case PNG_COLOR_TYPE_GRAY_ALPHA:
+				return "grayscale + alpha";
+			case PNG_COLOR_TYPE_PALETTE:
+				return "palette";
+			case PNG_COLOR_TYPE_RGB:
+				return "RGB";
+			case PNG_COLOR_TYPE_RGB_ALPHA:
+				return "RGB + alpha";
+			default:
+				fatal("Unknown color type %d", colorType);
+			}
+		};
+		auto interlaceTypeName = [&interlaceType]() {
+			switch (interlaceType) {
+			case PNG_INTERLACE_NONE:
+				return "not interlaced";
+			case PNG_INTERLACE_ADAM7:
+				return "interlaced (Adam7)";
+			default:
+				fatal("Unknown interlace type %d", interlaceType);
+			}
+		};
+		options.verbosePrint("Input image: %" PRIu32 "x%" PRIu32 " pixels, %dbpp %s, %s\n", height,
+		                     width, bitDepth, colorTypeName(), interlaceTypeName());
+
+		if (png_get_PLTE(png, info, &embeddedPal, &nbColors) != 0) {
+			int nbTransparentEntries;
+			if (png_get_tRNS(png, info, &transparencyPal, &nbTransparentEntries, nullptr)) {
+				assert(nbTransparentEntries == nbColors);
+			}
+
+			options.verbosePrint("Embedded palette has %d colors: [", nbColors);
+			for (int i = 0; i < nbColors; ++i) {
+				auto const &color = embeddedPal[i];
+				options.verbosePrint("#%02x%02x%02x%s", color.red, color.green, color.blue,
+				                     i != nbColors - 1 ? ", " : "]\n");
+			}
+		} else {
+			options.verbosePrint("No embedded palette\n");
+		}
+
+		// Set up transformations; to turn everything into RGBA888
+		// TODO: it's not necessary to uniformize the pixel data (in theory), and not doing
+		// so *might* improve performance, and should reduce memory usage.
+
+		// Convert grayscale to RGB
+		switch (colorType & ~PNG_COLOR_MASK_ALPHA) {
+		case PNG_COLOR_TYPE_GRAY:
+			png_set_gray_to_rgb(png); // This also converts tRNS to alpha
+			break;
+		case PNG_COLOR_TYPE_PALETTE:
+			png_set_palette_to_rgb(png);
+			break;
+		}
+
+		// If we read a tRNS chunk, convert it to alpha
+		if (png_get_valid(png, info, PNG_INFO_tRNS))
+			png_set_tRNS_to_alpha(png);
+		// Otherwise, if we lack an alpha channel, default to full opacity
+		else if (!(colorType & PNG_COLOR_MASK_ALPHA))
+			png_set_add_alpha(png, 0xFFFF, PNG_FILLER_AFTER);
+
+		// Scale 16bpp back to 8 (we don't need all of that precision anyway)
+		if (bitDepth == 16)
+			png_set_scale_16(png);
+		else if (bitDepth < 8)
+			png_set_packing(png);
+
+		// Set interlace handling (MUST be done before `png_read_update_info`)
+		int nbPasses = png_set_interlace_handling(png);
+
+		// Update `info` with the transformations
+		png_read_update_info(png, info);
+		// These shouldn't have changed
+		assert(png_get_image_width(png, info) == width);
+		assert(png_get_image_height(png, info) == height);
+		// These should have changed, however
+		assert(png_get_color_type(png, info) == PNG_COLOR_TYPE_RGBA);
+		assert(png_get_bit_depth(png, info) == 8);
+
+		// Now that metadata has been read, we can process the image data
+
+		std::vector<png_byte> row(png_get_rowbytes(png, info));
+
+		if (interlaceType == PNG_INTERLACE_NONE) {
+			for (png_uint_32 y = 0; y < height; ++y) {
+				png_read_row(png, row.data(), nullptr);
+
+				for (png_uint_32 x = 0; x < width; ++x) {
+					Rgba rgba(row[x * 4], row[x * 4 + 1], row[x * 4 + 2], row[x * 4 + 3]);
+
+					colors.registerColor(rgba);
+					pixel(x, y) = rgba.cgbColor();
+					isGrayOnly &= rgba.isGray();
+				}
+			}
+		} else {
+			// For interlace to work properly, we must read the image `nbPasses` times
+			for (int pass = 0; pass < nbPasses; ++pass) {
+				// The interlacing pass must be skipped if its width or height is reported as zero
+				if (PNG_PASS_COLS(width, pass) == 0 || PNG_PASS_ROWS(height, pass) == 0) {
+					continue;
+				}
+
+				png_uint_32 xStep = 1u << PNG_PASS_COL_SHIFT(pass);
+				png_uint_32 yStep = 1u << PNG_PASS_ROW_SHIFT(pass);
+
+				for (png_uint_32 y = PNG_PASS_START_ROW(pass); y < height; y += yStep) {
+					png_bytep ptr = row.data();
+
+					png_read_row(png, ptr, nullptr);
+					for (png_uint_32 x = PNG_PASS_START_COL(pass); x < width; x += xStep) {
+						Rgba rgba(ptr[0], ptr[1], ptr[2], ptr[3]);
+
+						colors.registerColor(rgba);
+						pixel(x, y) = rgba.cgbColor();
+						isGrayOnly &= rgba.isGray();
+						ptr += 4;
+					}
+				}
+			}
+		}
+
+		png_read_end(png,
+		             nullptr); // We don't care about chunks after the image data (comments, etc.)
+	}
+
+	~Png() { png_destroy_read_struct(&png, &info, nullptr); }
+
+	class TilesVisitor {
+		Png const &_png;
+		bool const _columnMajor;
+		uint32_t const _width, _height;
+		uint32_t const _limit = _columnMajor ? _height : _width;
+
+	public:
+		TilesVisitor(Png const &png, bool columnMajor, uint32_t width, uint32_t height)
+			: _png(png), _columnMajor(columnMajor), _width(width), _height(height) {}
+
+		class Tile {
+			Png const &_png;
+			uint32_t const _x, _y;
+
+		public:
+			Tile(Png const &png, uint32_t x, uint32_t y) : _png(png), _x(x), _y(y) {}
+
+			uint16_t pixel(uint32_t xOfs, uint32_t yOfs) const {
+				return _png.pixel(_x + xOfs, _y + yOfs);
+			}
+		};
+
+	private:
+		struct iterator {
+			TilesVisitor const &parent;
+			uint32_t const limit;
+			uint32_t x, y;
+
+		public:
+			std::pair<uint32_t, uint32_t> coords() const { return {x, y}; }
+			Tile operator*() const { return {parent._png, x, y}; }
+
+			iterator &operator++() {
+				auto [major, minor] = parent._columnMajor ? std::tie(y, x) : std::tie(x, y);
+				major += 8;
+				if (major == limit) {
+					minor += 8;
+					major = 0;
+				}
+
+				return *this;
+			}
+
+			bool operator!=(iterator const &rhs) const {
+				return coords() != rhs.coords(); // Compare the returned coord pairs
+			}
+		};
+
+	public:
+		iterator begin() const { return {*this, _width, 0, 0}; }
+		iterator end() const {
+			iterator it{*this, _width, _width - 8, _height - 8}; // Last valid one
+			return ++it; // Now one-past-last
+		}
+	};
+public:
+	TilesVisitor visitAsTiles(bool columnMajor) const {
+		return {*this, columnMajor, width, height};
+	}
+};
+
+class RawTiles {
+public:
+	/**
+	 * A tile which only contains indices into the image's global palette
+	 */
+	class RawTile {
+		std::array<std::array<size_t, 8>, 8> _pixelIndices{};
+
+	public:
+		// Not super clean, but it's closer to matrix notation
+		size_t &operator()(size_t x, size_t y) { return _pixelIndices[y][x]; }
+	};
+
+private:
+	std::vector<RawTile> _tiles;
+
+public:
+	/**
+	 * Creates a new raw tile, and returns a reference to it so it can be filled in
+	 */
+	RawTile &newTile() {
+		_tiles.emplace_back();
+		return _tiles.back();
+	}
+};
+
+struct AttrmapEntry {
+	size_t protoPaletteID;
+	uint16_t tileID;
+	bool yFlip;
+	bool xFlip;
+};
+
+static void outputPalettes(std::vector<Palette> const &palettes) {
+	std::filebuf output;
+	output.open(options.palettes, std::ios_base::out | std::ios_base::binary);
+
+	for (Palette const &palette : palettes) {
+		for (uint16_t color : palette) {
+			output.sputc(color & 0xFF);
+			output.sputc(color >> 8);
+		}
+	}
+}
+
+namespace unoptimized {
+
+// TODO: this is very redundant with `TileData`; try merging both?
+static void outputTileData(Png const &png, DefaultInitVec<AttrmapEntry> const &attrmap,
+                           std::vector<Palette> const &palettes,
+                           DefaultInitVec<size_t> const &mappings) {
+	std::filebuf output;
+	output.open(options.tilemap, std::ios_base::out | std::ios_base::binary);
+
+	auto iter = attrmap.begin();
+	for (auto tile : png.visitAsTiles(options.columnMajor)) {
+		Palette const &palette = palettes[mappings[iter->protoPaletteID]];
+		for (uint32_t y = 0; y < 8; ++y) {
+			uint16_t row = 0;
+			for (uint32_t x = 0; x < 8; ++x) {
+				row <<= 1;
+				uint8_t index = palette.indexOf(tile.pixel(x, y));
+				if (index & 1) {
+					row |= 0x001;
+				}
+				if (index & 2) {
+					row |= 0x100;
+				}
+			}
+			output.sputc(row & 0xFF);
+			output.sputc(row >> 8);
+		}
+		++iter;
+	}
+	assert(iter == attrmap.end());
+}
+
+static void outputMaps(Png const &png, DefaultInitVec<AttrmapEntry> const &attrmap,
+                       DefaultInitVec<size_t> const &mappings) {
+	std::optional<std::filebuf> tilemapOutput, attrmapOutput;
+	if (!options.tilemap.empty()) {
+		tilemapOutput.emplace();
+		tilemapOutput->open(options.tilemap, std::ios_base::out | std::ios_base::binary);
+	}
+	if (!options.attrmap.empty()) {
+		attrmapOutput.emplace();
+		attrmapOutput->open(options.attrmap, std::ios_base::out | std::ios_base::binary);
+	}
+
+	uint8_t tileID = 0;
+	uint8_t bank = 0;
+	auto iter = attrmap.begin();
+	for ([[maybe_unused]] auto tile : png.visitAsTiles(options.columnMajor)) {
+		if (tileID == options.maxNbTiles[bank]) {
+			assert(bank == 0);
+			bank = 1;
+			tileID = 0;
+		}
+
+		if (tilemapOutput.has_value()) {
+			tilemapOutput->sputc(tileID + options.baseTileIDs[bank]);
+		}
+		if (attrmapOutput.has_value()) {
+			uint8_t palID = mappings[iter->protoPaletteID] & 7;
+			attrmapOutput->sputc(palID | bank << 3); // The other flags are all 0
+			++iter;
+		}
+		++tileID;
+	}
+}
+
+} // namespace unoptimized
+
+static uint8_t flip(uint8_t byte) {
+	// To flip all the bits, we'll flip both nibbles, then each nibble half, etc.
+	byte = (byte & 0x0F) << 4 | (byte & 0xF0) >> 4;
+	byte = (byte & 0x33) << 2 | (byte & 0xCC) >> 2;
+	byte = (byte & 0x55) << 1 | (byte & 0xAA) >> 1;
+	return byte;
+}
+
+class TileData {
+	// TODO: might want to switch to `std::byte` instead?
+	std::array<uint8_t, 16> _data;
+	// The hash is a bit lax: it's the XOR of all lines, and every other nibble is identical
+	// if horizontal mirroring is in effect. It should still be a reasonable tie-breaker in
+	// non-pathological cases.
+	uint16_t _hash;
+public:
+	mutable size_t tileID;
+
+	TileData(Png::TilesVisitor::Tile const &tile, Palette const &palette) : _hash(0) {
+		for (uint32_t y = 0; y < 8; ++y) {
+			uint16_t bitplanes = 0;
+			for (uint32_t x = 0; x < 8; ++x) {
+				bitplanes <<= 1;
+				uint8_t index = palette.indexOf(tile.pixel(x, y));
+				if (index & 1) {
+					bitplanes |= 1;
+				}
+				if (index & 2) {
+					bitplanes |= 0x100;
+				}
+			}
+			_data[y * 2] = bitplanes & 0xFF;
+			_data[y * 2 + 1] = bitplanes >> 8;
+
+			// Update the hash
+			_hash ^= bitplanes;
+			if (options.allowMirroring) {
+				// Count the line itself as mirrorred; vertical mirroring is
+				// already taken care of because the symmetric line will be XOR'd
+				// the same way. (...which is a problem, but probably benign.)
+				_hash ^= flip(bitplanes >> 8) << 8 | flip(bitplanes & 0xFF);
+			}
+		}
+	}
+
+	auto const &data() const { return _data; }
+	uint16_t hash() const { return _hash; }
+
+	enum MatchType {
+		NOPE,
+		EXACT,
+		HFLIP,
+		VFLIP,
+		VHFLIP,
+	};
+	MatchType tryMatching(TileData const &other) const {
+		if (std::equal(_data.begin(), _data.end(), other._data.begin()))
+			return MatchType::EXACT;
+
+		if (options.allowMirroring) {
+			// TODO
+		}
+
+		return MatchType::NOPE;
+	}
+	friend bool operator==(TileData const &lhs, TileData const &rhs) {
+		return lhs.tryMatching(rhs) != MatchType::NOPE;
+	}
+};
+
+template<>
+struct std::hash<TileData> {
+	std::size_t operator()(TileData const &tile) const { return tile.hash(); }
+};
+
+namespace optimized {
+
+struct UniqueTiles {
+	std::unordered_set<TileData> tileset;
+	std::vector<TileData const *> tiles;
+
+	UniqueTiles() = default;
+	// Copies are likely to break pointers, so we really don't want those.
+	// Copy elision should be relied on to be more sure that refs won't be invalidated, too!
+	UniqueTiles(UniqueTiles const &) = delete;
+	UniqueTiles(UniqueTiles &&) = default;
+
+	/**
+	 * Adds a tile to the collection, and returns its ID
+	 */
+	std::tuple<uint16_t, TileData::MatchType> addTile(Png::TilesVisitor::Tile const &tile,
+	                                                  Palette const &palette) {
+		TileData newTile(tile, palette);
+		auto [tileData, inserted] = tileset.insert(newTile);
+
+		TileData::MatchType matchType = TileData::EXACT;
+		if (inserted) {
+			// Give the new tile the next available unique ID
+			tileData->tileID = tiles.size();
+			// Pointers are never invalidated!
+			tiles.emplace_back(&*tileData);
+		} else {
+			matchType = tileData->tryMatching(newTile);
+		}
+		return {tileData->tileID, matchType};
+	}
+
+	auto size() const { return tiles.size(); }
+
+	auto begin() const { return tiles.begin(); }
+	auto end() const { return tiles.end(); }
+};
+
+static UniqueTiles dedupTiles(Png const &png, DefaultInitVec<AttrmapEntry> &attrmap,
+                              std::vector<Palette> const &palettes,
+                              DefaultInitVec<size_t> const &mappings) {
+	// Iterate throughout the image, generating tile data as we go
+	// (We don't need the full tile data to be able to dedup tiles, but we don't lose anything
+	// by caching the full tile data anyway, so we might as well.)
+	UniqueTiles tiles;
+
+	auto iter = attrmap.begin();
+	for (auto tile : png.visitAsTiles(options.columnMajor)) {
+		auto [tileID, matchType] = tiles.addTile(tile, palettes[mappings[iter->protoPaletteID]]);
+
+		iter->xFlip = matchType == TileData::HFLIP || matchType == TileData::VHFLIP;
+		iter->yFlip = matchType == TileData::VFLIP || matchType == TileData::VHFLIP;
+		assert(tileID < 1 << 10);
+		iter->tileID = tileID;
+
+		++iter;
+	}
+	assert(iter == attrmap.end());
+
+	return tiles; // Copy elision should prevent the contained `unordered_set` from being
+	              // re-constructed
+}
+
+static void outputTileData(UniqueTiles const &tiles) {
+	std::filebuf output;
+	output.open(options.output, std::ios_base::out | std::ios_base::binary);
+
+	uint16_t tileID = 0;
+	for (TileData const *tile : tiles) {
+		assert(tile->tileID == tileID);
+		++tileID;
+		output.sputn(reinterpret_cast<char const *>(tile->data().data()), tile->data().size());
+	}
+}
+
+static void outputTilemap(DefaultInitVec<AttrmapEntry> const &attrmap) {
+	std::filebuf output;
+	output.open(options.tilemap, std::ios_base::out | std::ios_base::binary);
+
+	assert(options.baseTileIDs[0] == 0 && options.baseTileIDs[1] == 0); // TODO: deal with offset
+	for (AttrmapEntry const &entry : attrmap) {
+		output.sputc(entry.tileID & 0xFF);
+	}
+}
+
+static void outputAttrmap(DefaultInitVec<AttrmapEntry> const &attrmap,
+                          DefaultInitVec<size_t> const &mappings) {
+	std::filebuf output;
+	output.open(options.attrmap, std::ios_base::out | std::ios_base::binary);
+
+	assert(options.baseTileIDs[0] == 0 && options.baseTileIDs[1] == 0); // TODO: deal with offset
+	for (AttrmapEntry const &entry : attrmap) {
+		uint8_t attr = entry.xFlip << 5 | entry.yFlip << 6;
+		attr |= (entry.tileID >= options.maxNbTiles[0]) << 3;
+		attr |= mappings[entry.protoPaletteID] & 7;
+		output.sputc(attr);
+	}
+}
+
+} // namespace optimized
+
+void process() {
+	options.verbosePrint("Using libpng %s\n", png_get_libpng_ver(nullptr));
+
+	options.verbosePrint("Reading tiles...\n");
+	Png png(options.input);
+	ImagePalette const &colors = png.getColors();
+
+	// Now, we have all the image's colors in `colors`
+	// The next step is to order the palette
+
+	if (options.beVerbose) {
+		options.verbosePrint("Image colors: [ ");
+		size_t i = 0;
+		for (auto const &slot : colors) {
+			if (!slot.has_value()) {
+				continue;
+			}
+			options.verbosePrint("#%02x%02x%02x%02x%s", slot->red, slot->green, slot->blue,
+			                     slot->alpha, i != colors.size() - 1 ? ", " : "");
+			++i;
+		}
+		options.verbosePrint("]\n");
+	}
+
+	// Now, iterate through the tiles, generating proto-palettes as we go
+	// We do this unconditionally because this performs the image validation (which we want to
+	// perform even if no output is requested), and because it's necessary to generate any
+	// output (with the exception of an un-duplicated tilemap, but that's an acceptable loss.)
+	std::vector<ProtoPalette> protoPalettes;
+	DefaultInitVec<AttrmapEntry> attrmap{};
+
+	for (auto tile : png.visitAsTiles(options.columnMajor)) {
+		ProtoPalette tileColors;
+		AttrmapEntry &attrs = attrmap.emplace_back();
+
+		for (uint32_t y = 0; y < 8; ++y) {
+			for (uint32_t x = 0; x < 8; ++x) {
+				tileColors.add(tile.pixel(x, y));
+			}
+		}
+
+		// Insert the palette, making sure to avoid overlaps
+		// TODO: if inserting (0, 1), (0, 2), and then (0, 1, 2), we might have a problem!
+		for (size_t i = 0; i < protoPalettes.size(); ++i) {
+			switch (tileColors.compare(protoPalettes[i])) {
+			case ProtoPalette::WE_BIGGER:
+				protoPalettes[i] = tileColors; // Override them
+				[[fallthrough]];
+			case ProtoPalette::THEY_BIGGER:
+				// Do nothing, they already contain us
+				attrs.protoPaletteID = i;
+				goto contained;
+			case ProtoPalette::NEITHER:
+				break; // Keep going
+			}
+		}
+		attrs.protoPaletteID = protoPalettes.size();
+		protoPalettes.push_back(tileColors);
+contained:;
+	}
+
+	options.verbosePrint("Image contains %zu proto-palette%s\n", protoPalettes.size(),
+	                     protoPalettes.size() != 1 ? "s" : "");
+
+	// Sort the proto-palettes by size, which improves the packing algorithm's efficiency
+	// TODO: try keeping the palettes stored while inserting them instead, might perform better
+	std::sort(
+		protoPalettes.begin(), protoPalettes.end(),
+		[](ProtoPalette const &lhs, ProtoPalette const &rhs) { return lhs.size() < rhs.size(); });
+
+	// Run a "pagination" problem solver
+	// TODO: allow picking one of several solvers?
+	auto [mappings, nbPalettes] = packing::overloadAndRemove(protoPalettes);
+	assert(mappings.size() == protoPalettes.size());
+	if (options.beVerbose) {
+		options.verbosePrint("Proto-palette mappings: (%zu palettes)\n", nbPalettes);
+		for (size_t i = 0; i < mappings.size(); ++i) {
+			options.verbosePrint("%zu -> %zu\n", i, mappings[i]);
+		}
+	}
+
+	// TODO: optionally, "decant" the result
+
+	// Generate the actual palettes from the mappings
+	std::vector<Palette> palettes(nbPalettes);
+	for (size_t protoPalID = 0; protoPalID < mappings.size(); ++protoPalID) {
+		auto &pal = palettes[mappings[protoPalID]];
+		for (uint16_t color : protoPalettes[protoPalID]) {
+			pal.addColor(color);
+		}
+	}
+
+	// "Sort" colors in the generated palettes, see the man page for the flowchart
+	auto [palSize, palRGB, palAlpha] = png.getEmbeddedPal();
+	if (palRGB) {
+		sorting::indexed(palettes, palSize, palRGB, palAlpha);
+	} else if (png.hasNonGray()) {
+		sorting::rgb(palettes);
+	} else {
+		sorting::grayscale(palettes);
+	}
+
+	if (options.beVerbose) {
+		for (auto &&palette : palettes) {
+			options.verbosePrint("{ ");
+			for (uint16_t colorIndex : palette) {
+				options.verbosePrint("%04" PRIx16 ", ", colorIndex);
+			}
+			options.verbosePrint("}\n");
+		}
+	}
+
+	if (!options.palettes.empty()) {
+		outputPalettes(palettes);
+	}
+
+	// If deduplication is not happening, we just need to output the tile data and/or maps as-is
+	if (!options.allowDedup) {
+		uint32_t const nbTilesH = png.getHeight() / 8, nbTilesW = png.getWidth() / 8;
+
+		// Check the tile count
+		if (nbTilesW * nbTilesH > options.maxNbTiles[0] + options.maxNbTiles[1]) {
+			fatal("Image contains %" PRIu32 " tiles, exceeding the limit of %" PRIu16 " + %" PRIu16,
+			      nbTilesW * nbTilesH, options.maxNbTiles[0], options.maxNbTiles[1]);
+		}
+
+		if (!options.output.empty()) {
+			options.verbosePrint("Generating unoptimized tile data...\n");
+
+			unoptimized::outputTileData(png, attrmap, palettes, mappings);
+		}
+
+		if (!options.tilemap.empty() || !options.attrmap.empty()) {
+			options.verbosePrint("Generating unoptimized tilemap and/or attrmap...\n");
+
+			unoptimized::outputMaps(png, attrmap, mappings);
+		}
+	} else {
+		// All of these require the deduplication process to be performed to be output
+		options.verbosePrint("Deduplicating tiles...\n");
+		optimized::UniqueTiles tiles = optimized::dedupTiles(png, attrmap, palettes, mappings);
+
+		if (tiles.size() > options.maxNbTiles[0] + options.maxNbTiles[1]) {
+			fatal("Image contains %zu tiles, exceeding the limit of %" PRIu16 " + %" PRIu16,
+			      tiles.size(), options.maxNbTiles[0], options.maxNbTiles[1]);
+		}
+
+		if (!options.output.empty()) {
+			options.verbosePrint("Generating optimized tile data...\n");
+
+			optimized::outputTileData(tiles);
+		}
+
+		if (!options.tilemap.empty()) {
+			options.verbosePrint("Generating optimized tilemap...\n");
+
+			optimized::outputTilemap(attrmap);
+		}
+
+		if (!options.attrmap.empty()) {
+			options.verbosePrint("Generating optimized attrmap...\n");
+
+			optimized::outputAttrmap(attrmap, mappings);
+		}
+	}
+}