From d678567dc73b89fcb7704b93ab74f2a962e91e06 Mon Sep 17 00:00:00 2001 From: drewcassidy Date: Tue, 9 Feb 2021 03:46:39 -0800 Subject: [PATCH] Interpolator match table caching idk I dont like this might move it to the compressor class --- src/interpolator.cpp | 104 ++++++++++++++++++++++++++++++++-------- src/interpolator.h | 112 +++++++++++++++++++++++++++++++++++-------- 2 files changed, 177 insertions(+), 39 deletions(-) diff --git a/src/interpolator.cpp b/src/interpolator.cpp index 5e6c729..57c3592 100644 --- a/src/interpolator.cpp +++ b/src/interpolator.cpp @@ -19,53 +19,118 @@ #include "interpolator.h" +#include #include +#include + +#include "util.h" + +namespace rgbcx { + +Interpolator::Interpolator() { + PrepSingleColorTables(_single_match5, _single_match5_half, 5); + PrepSingleColorTables(_single_match5, _single_match5_half, 6); +} + +void Interpolator::PrepSingleColorTables(const MatchListPtr &matchTable, const MatchListPtr &matchTableHalf, int len) { + int size = 1 << len; + + assert((len == 5 && size == size5) || (len == 6 && size == size6)); + + const uint8_t *expand = (len == 5) ? &Expand5[0] : &Expand6[0]; + + bool ideal = isIdeal(); + bool use_e = useExpandedInMatch(); + + for (int i = 0; i < match_count; i++) { + int lowest_error = 256; + int lowest_half_error = 256; + + for (int low = 0; low < size; low++) { + const int low_e = expand[low]; + const int low_val = use_e ? low_e : low; + + for (int high = 0; high < size; high++) { + const int high_e = expand[high]; + const int high_val = use_e ? high_e : high; + + int v = (len == 5) ? Interpolate5(high_val, low_val) : Interpolate6(high_val, low_val); + int v_half = (len == 5) ? InterpolateHalf5(low_val, high_val) : InterpolateHalf6(low_val, high_val); + + int error = PrepSingleColorTableEntry(matchTable, v, i, low, high, low_e, high_e, lowest_error, false, ideal); + int half_error = PrepSingleColorTableEntry(matchTableHalf, v, i, low, high, low_e, high_e, lowest_error, true, ideal); + + if (error < lowest_error) lowest_error = error; + if (half_error < lowest_half_error) lowest_half_error = half_error; + } + } + } +} +int Interpolator::PrepSingleColorTableEntry(const MatchListPtr &matchTable, int v, int i, int low, int high, int low_e, int high_e, int lowest_error, + bool half, bool ideal) { + int e = iabs(v - i); + + // We only need to factor in 3% error in BC1 ideal mode. + if (ideal) e += (iabs(high_e - low_e) * 3) / 100; + + // Favor equal endpoints, for lower error on actual GPU's which approximate the interpolation. + if ((e < lowest_error) || (e == lowest_error && low == high)) { + assert(e <= UINT8_MAX); + + auto &entry = (*matchTable)[i]; + entry.low = low; + entry.high = high; + entry.error = e; + } + + return e; +} // region InterpolatorIdeal implementation -int rgbcx::InterpolatorIdeal::Interpolate5(int v0, int v1) { return Interpolate5or6(v0, v1); } -int rgbcx::InterpolatorIdeal::Interpolate6(int v0, int v1) { return Interpolate5or6(v0, v1); } -int rgbcx::InterpolatorIdeal::InterpolateHalf5(int v0, int v1) { return InterpolateHalf5or6(v0, v1); } -int rgbcx::InterpolatorIdeal::InterpolateHalf6(int v0, int v1) { return InterpolateHalf5or6(v0, v1); } +int InterpolatorIdeal::Interpolate5(int v0, int v1) const { return Interpolate5or6(v0, v1); } +int InterpolatorIdeal::Interpolate6(int v0, int v1) const { return Interpolate5or6(v0, v1); } +int InterpolatorIdeal::InterpolateHalf5(int v0, int v1) const { return InterpolateHalf5or6(v0, v1); } +int InterpolatorIdeal::InterpolateHalf6(int v0, int v1) const { return InterpolateHalf5or6(v0, v1); } -int rgbcx::InterpolatorIdeal::Interpolate5or6(int v0, int v1) { +int InterpolatorIdeal::Interpolate5or6(int v0, int v1) const { assert(v0 < 256 && v1 < 256); return (v0 * 2 + v1) / 3; } -int rgbcx::InterpolatorIdeal::InterpolateHalf5or6(int v0, int v1) { +int InterpolatorIdeal::InterpolateHalf5or6(int v0, int v1) const { assert(v0 < 256 && v1 < 256); return (v0 + v1) / 2; } // endregion // region InterpolatorIdealRound implementation -int rgbcx::InterpolatorIdealRound::Interpolate5(int v0, int v1) { return Interpolate5or6Round(v0, v1); } -int rgbcx::InterpolatorIdealRound::Interpolate6(int v0, int v1) { return Interpolate5or6Round(v0, v1); } +int InterpolatorIdealRound::Interpolate5(int v0, int v1) const { return Interpolate5or6Round(v0, v1); } +int InterpolatorIdealRound::Interpolate6(int v0, int v1) const { return Interpolate5or6Round(v0, v1); } -int rgbcx::InterpolatorIdealRound::Interpolate5or6Round(int v0, int v1) { +int InterpolatorIdealRound::Interpolate5or6Round(int v0, int v1) const { assert(v0 < 256 && v1 < 256); return (v0 * 2 + v1 + 1) / 3; } // endregion // region InterpolatorNvidia implementation -int rgbcx::InterpolatorNvidia::Interpolate5(int v0, int v1) { +int InterpolatorNvidia::Interpolate5(int v0, int v1) const { assert(v0 < 32 && v1 < 32); return ((2 * v0 + v1) * 22) / 8; } -int rgbcx::InterpolatorNvidia::Interpolate6(int v0, int v1) { +int InterpolatorNvidia::Interpolate6(int v0, int v1) const { assert(v0 < 256 && v1 < 256); const int gdiff = v1 - v0; return (256 * v0 + (gdiff / 4) + 128 + gdiff * 80) / 256; } -int rgbcx::InterpolatorNvidia::InterpolateHalf5(int v0, int v1) { +int InterpolatorNvidia::InterpolateHalf5(int v0, int v1) const { assert(v0 < 32 && v1 < 32); return ((v0 + v1) * 33) / 8; } -int rgbcx::InterpolatorNvidia::InterpolateHalf6(int v0, int v1) { +int InterpolatorNvidia::InterpolateHalf6(int v0, int v1) const { assert(v0 < 256 && v1 < 256); const int gdiff = v1 - v0; return (256 * v0 + gdiff / 4 + 128 + gdiff * 128) / 256; @@ -73,18 +138,19 @@ int rgbcx::InterpolatorNvidia::InterpolateHalf6(int v0, int v1) { // endregion // region InterpolatorAMD implementation -int rgbcx::InterpolatorAMD::Interpolate5(int v0, int v1) { return Interpolate5or6(v0, v1); } -int rgbcx::InterpolatorAMD::Interpolate6(int v0, int v1) { return Interpolate5or6(v0, v1); } -int rgbcx::InterpolatorAMD::InterpolateHalf5(int v0, int v1) { return InterpolateHalf5or6(v0, v1); } -int rgbcx::InterpolatorAMD::InterpolateHalf6(int v0, int v1) { return InterpolateHalf5or6(v0, v1); } +int InterpolatorAMD::Interpolate5(int v0, int v1) const { return Interpolate5or6(v0, v1); } +int InterpolatorAMD::Interpolate6(int v0, int v1) const { return Interpolate5or6(v0, v1); } +int InterpolatorAMD::InterpolateHalf5(int v0, int v1) const { return InterpolateHalf5or6(v0, v1); } +int InterpolatorAMD::InterpolateHalf6(int v0, int v1) const { return InterpolateHalf5or6(v0, v1); } -int rgbcx::InterpolatorAMD::Interpolate5or6(int v0, int v1) { +int InterpolatorAMD::Interpolate5or6(int v0, int v1) const { assert(v0 < 256 && v1 < 256); return (v0 * 43 + v1 * 21 + 32) >> 6; } -int rgbcx::InterpolatorAMD::InterpolateHalf5or6(int v0, int v1) { +int InterpolatorAMD::InterpolateHalf5or6(int v0, int v1) const { assert(v0 < 256 && v1 < 256); return (v0 + v1 + 1) >> 1; } // endregion +} // namespace rgbcx diff --git a/src/interpolator.h b/src/interpolator.h index aa6fb61..c1a6025 100644 --- a/src/interpolator.h +++ b/src/interpolator.h @@ -18,56 +18,128 @@ */ #pragma once +#include +#include +#include +#include + +#include "util.h" + +#ifdef NDEBUG // asserts disabled +static constexpr bool ndebug = true; +#else // asserts enabled +static constexpr bool ndebug = false; +#endif namespace rgbcx { +template static constexpr std::array ExpandArray() { + std::array res; + for (int i = 0; i < size; i++) { res[i] = op(i); } + return res; +} + class Interpolator { public: + struct MatchEntry { + uint8_t high; + uint8_t low; + uint8_t error; + }; + + Interpolator(); + virtual ~Interpolator() noexcept = default; + virtual int Interpolate5(int v0, int v1) = 0; virtual int Interpolate6(int v0, int v1) = 0; virtual int InterpolateHalf5(int v0, int v1) = 0; virtual int InterpolateHalf6(int v0, int v1) = 0; - virtual ~Interpolator() noexcept = default; + + constexpr MatchEntry GetMatch5(int i) noexcept(ndebug) { + assert(i < match_count); + return (*_single_match5)[i]; + } + constexpr MatchEntry GetMatch6(int i) noexcept(ndebug) { + assert(i < match_count); + return (*_single_match6)[i]; + } + constexpr MatchEntry GetMatchHalf5(int i) noexcept(ndebug) { + assert(i < match_count); + return (*_single_match5_half)[i]; + } + constexpr MatchEntry GetMatchHalf6(int i) noexcept(ndebug) { + assert(i < match_count); + return (*_single_match6_half)[i]; + } + + private: + constexpr static inline size_t size5 = 32; + constexpr static inline size_t size6 = 64; + constexpr static inline size_t match_count = 256; + + constexpr static auto Expand5 = ExpandArray(); + constexpr static auto Expand6 = ExpandArray(); + + // match tables used for single-color blocks + using MatchList = std::array; + using MatchListPtr = std::shared_ptr; + + const MatchListPtr _single_match5 = {std::make_shared()}; + const MatchListPtr _single_match6 = {std::make_shared()}; + const MatchListPtr _single_match5_half = {std::make_shared()}; + const MatchListPtr _single_match6_half = {std::make_shared()}; + + virtual constexpr bool isIdeal() noexcept { return false; } + virtual constexpr bool useExpandedInMatch() noexcept { return true; } + + void PrepSingleColorTables(const MatchListPtr &matchTable, const MatchListPtr &matchTableHalf, int len); + + int PrepSingleColorTableEntry(const MatchListPtr &matchTable, int v, int i, int low, int high, int low_e, int high_e, int lowest_error, bool half, + bool ideal); }; class InterpolatorIdeal : public Interpolator { public: - virtual int Interpolate5(int v0, int v1); - virtual int Interpolate6(int v0, int v1); - virtual int InterpolateHalf5(int v0, int v1); - virtual int InterpolateHalf6(int v0, int v1); + virtual int Interpolate5(int v0, int v1) const; + virtual int Interpolate6(int v0, int v1) const; + virtual int InterpolateHalf5(int v0, int v1) const; + virtual int InterpolateHalf6(int v0, int v1) const; private: - int Interpolate5or6(int v0, int v1); - int InterpolateHalf5or6(int v0, int v1); + int Interpolate5or6(int v0, int v1) const; + int InterpolateHalf5or6(int v0, int v1) const; + virtual constexpr bool isIdeal() noexcept override { return true; } }; class InterpolatorIdealRound : public InterpolatorIdeal { public: - virtual int Interpolate5(int v0, int v1); - virtual int Interpolate6(int v0, int v1); + virtual int Interpolate5(int v0, int v1) const override; + virtual int Interpolate6(int v0, int v1) const override; private: - int Interpolate5or6Round(int v0, int v1); + int Interpolate5or6Round(int v0, int v1) const; }; class InterpolatorNvidia : public Interpolator { public: - virtual int Interpolate5(int v0, int v1); - virtual int Interpolate6(int v0, int v1); - virtual int InterpolateHalf5(int v0, int v1); - virtual int InterpolateHalf6(int v0, int v1); + virtual int Interpolate5(int v0, int v1) const; + virtual int Interpolate6(int v0, int v1) const; + virtual int InterpolateHalf5(int v0, int v1) const; + virtual int InterpolateHalf6(int v0, int v1) const; + + private: + virtual constexpr bool useExpandedInMatch() noexcept override { return false; } }; class InterpolatorAMD : public Interpolator { public: - virtual int Interpolate5(int v0, int v1); - virtual int Interpolate6(int v0, int v1); - virtual int InterpolateHalf5(int v0, int v1); - virtual int InterpolateHalf6(int v0, int v1); + virtual int Interpolate5(int v0, int v1) const; + virtual int Interpolate6(int v0, int v1) const; + virtual int InterpolateHalf5(int v0, int v1) const; + virtual int InterpolateHalf6(int v0, int v1) const; private: - int Interpolate5or6(int v0, int v1); - int InterpolateHalf5or6(int v0, int v1); + int Interpolate5or6(int v0, int v1) const; + int InterpolateHalf5or6(int v0, int v1) const; }; } // namespace rgbcx \ No newline at end of file