Interpolator match table caching

idk I dont like this might move it to the compressor class
2024-09-13 06:37:34 +00:00 · 2021-02-09 03:46:39 -08:00 · 2021-02-09 03:46:39 -08:00 · d678567dc7
commit d678567dc7
parent 8cd1e60394
2 changed files with 177 additions and 39 deletions
--- a/src/interpolator.cpp
+++ b/src/interpolator.cpp
@ -19,53 +19,118 @@

 #include "interpolator.h"

+#include <array>
 #include <cassert>
+#include <cstdint>
+
+#include "util.h"
+
+namespace rgbcx {
+
+Interpolator::Interpolator() {
+    PrepSingleColorTables(_single_match5, _single_match5_half, 5);
+    PrepSingleColorTables(_single_match5, _single_match5_half, 6);
+}
+
+void Interpolator::PrepSingleColorTables(const MatchListPtr &matchTable, const MatchListPtr &matchTableHalf, int len) {
+    int size = 1 << len;
+
+    assert((len == 5 && size == size5) || (len == 6 && size == size6));
+
+    const uint8_t *expand = (len == 5) ? &Expand5[0] : &Expand6[0];
+
+    bool ideal = isIdeal();
+    bool use_e = useExpandedInMatch();
+
+    for (int i = 0; i < match_count; i++) {
+        int lowest_error = 256;
+        int lowest_half_error = 256;
+
+        for (int low = 0; low < size; low++) {
+            const int low_e = expand[low];
+            const int low_val = use_e ? low_e : low;
+
+            for (int high = 0; high < size; high++) {
+                const int high_e = expand[high];
+                const int high_val = use_e ? high_e : high;
+
+                int v = (len == 5) ? Interpolate5(high_val, low_val) : Interpolate6(high_val, low_val);
+                int v_half = (len == 5) ? InterpolateHalf5(low_val, high_val) : InterpolateHalf6(low_val, high_val);
+
+                int error = PrepSingleColorTableEntry(matchTable, v, i, low, high, low_e, high_e, lowest_error, false, ideal);
+                int half_error = PrepSingleColorTableEntry(matchTableHalf, v, i, low, high, low_e, high_e, lowest_error, true, ideal);
+
+                if (error < lowest_error) lowest_error = error;
+                if (half_error < lowest_half_error) lowest_half_error = half_error;
+            }
+        }
+    }
+}
+int Interpolator::PrepSingleColorTableEntry(const MatchListPtr &matchTable, int v, int i, int low, int high, int low_e, int high_e, int lowest_error,
+                                                 bool half, bool ideal) {
+    int e = iabs(v - i);
+
+    // We only need to factor in 3% error in BC1 ideal mode.
+    if (ideal) e += (iabs(high_e - low_e) * 3) / 100;
+
+    // Favor equal endpoints, for lower error on actual GPU's which approximate the interpolation.
+    if ((e < lowest_error) || (e == lowest_error && low == high)) {
+        assert(e <= UINT8_MAX);
+
+        auto &entry = (*matchTable)[i];
+        entry.low = low;
+        entry.high = high;
+        entry.error = e;
+    }
+
+    return e;
+}

 // region InterpolatorIdeal implementation
-int rgbcx::InterpolatorIdeal::Interpolate5(int v0, int v1) { return Interpolate5or6(v0, v1); }
-int rgbcx::InterpolatorIdeal::Interpolate6(int v0, int v1) { return Interpolate5or6(v0, v1); }
-int rgbcx::InterpolatorIdeal::InterpolateHalf5(int v0, int v1) { return InterpolateHalf5or6(v0, v1); }
-int rgbcx::InterpolatorIdeal::InterpolateHalf6(int v0, int v1) { return InterpolateHalf5or6(v0, v1); }
+int InterpolatorIdeal::Interpolate5(int v0, int v1) const { return Interpolate5or6(v0, v1); }
+int InterpolatorIdeal::Interpolate6(int v0, int v1) const { return Interpolate5or6(v0, v1); }
+int InterpolatorIdeal::InterpolateHalf5(int v0, int v1) const { return InterpolateHalf5or6(v0, v1); }
+int InterpolatorIdeal::InterpolateHalf6(int v0, int v1) const { return InterpolateHalf5or6(v0, v1); }

-int rgbcx::InterpolatorIdeal::Interpolate5or6(int v0, int v1) {
+int InterpolatorIdeal::Interpolate5or6(int v0, int v1) const {
    assert(v0 < 256 && v1 < 256);
    return (v0 * 2 + v1) / 3;
 }

-int rgbcx::InterpolatorIdeal::InterpolateHalf5or6(int v0, int v1) {
+int InterpolatorIdeal::InterpolateHalf5or6(int v0, int v1) const {
    assert(v0 < 256 && v1 < 256);
    return (v0 + v1) / 2;
 }
 // endregion

 // region InterpolatorIdealRound implementation
-int rgbcx::InterpolatorIdealRound::Interpolate5(int v0, int v1) { return Interpolate5or6Round(v0, v1); }
-int rgbcx::InterpolatorIdealRound::Interpolate6(int v0, int v1) { return Interpolate5or6Round(v0, v1); }
+int InterpolatorIdealRound::Interpolate5(int v0, int v1) const { return Interpolate5or6Round(v0, v1); }
+int InterpolatorIdealRound::Interpolate6(int v0, int v1) const { return Interpolate5or6Round(v0, v1); }

-int rgbcx::InterpolatorIdealRound::Interpolate5or6Round(int v0, int v1) {
+int InterpolatorIdealRound::Interpolate5or6Round(int v0, int v1) const {
    assert(v0 < 256 && v1 < 256);
    return (v0 * 2 + v1 + 1) / 3;
 }
 // endregion

 // region InterpolatorNvidia implementation
-int rgbcx::InterpolatorNvidia::Interpolate5(int v0, int v1) {
+int InterpolatorNvidia::Interpolate5(int v0, int v1) const {
    assert(v0 < 32 && v1 < 32);
    return ((2 * v0 + v1) * 22) / 8;
 }

-int rgbcx::InterpolatorNvidia::Interpolate6(int v0, int v1) {
+int InterpolatorNvidia::Interpolate6(int v0, int v1) const {
    assert(v0 < 256 && v1 < 256);
    const int gdiff = v1 - v0;
    return (256 * v0 + (gdiff / 4) + 128 + gdiff * 80) / 256;
 }

-int rgbcx::InterpolatorNvidia::InterpolateHalf5(int v0, int v1) {
+int InterpolatorNvidia::InterpolateHalf5(int v0, int v1) const {
    assert(v0 < 32 && v1 < 32);
    return ((v0 + v1) * 33) / 8;
 }

-int rgbcx::InterpolatorNvidia::InterpolateHalf6(int v0, int v1) {
+int InterpolatorNvidia::InterpolateHalf6(int v0, int v1) const {
    assert(v0 < 256 && v1 < 256);
    const int gdiff = v1 - v0;
    return (256 * v0 + gdiff / 4 + 128 + gdiff * 128) / 256;
@ -73,18 +138,19 @@ int rgbcx::InterpolatorNvidia::InterpolateHalf6(int v0, int v1) {
 // endregion

 // region InterpolatorAMD implementation
-int rgbcx::InterpolatorAMD::Interpolate5(int v0, int v1) { return Interpolate5or6(v0, v1); }
-int rgbcx::InterpolatorAMD::Interpolate6(int v0, int v1) { return Interpolate5or6(v0, v1); }
-int rgbcx::InterpolatorAMD::InterpolateHalf5(int v0, int v1) { return InterpolateHalf5or6(v0, v1); }
-int rgbcx::InterpolatorAMD::InterpolateHalf6(int v0, int v1) { return InterpolateHalf5or6(v0, v1); }
+int InterpolatorAMD::Interpolate5(int v0, int v1) const { return Interpolate5or6(v0, v1); }
+int InterpolatorAMD::Interpolate6(int v0, int v1) const { return Interpolate5or6(v0, v1); }
+int InterpolatorAMD::InterpolateHalf5(int v0, int v1) const { return InterpolateHalf5or6(v0, v1); }
+int InterpolatorAMD::InterpolateHalf6(int v0, int v1) const { return InterpolateHalf5or6(v0, v1); }

-int rgbcx::InterpolatorAMD::Interpolate5or6(int v0, int v1) {
+int InterpolatorAMD::Interpolate5or6(int v0, int v1) const {
    assert(v0 < 256 && v1 < 256);
    return (v0 * 43 + v1 * 21 + 32) >> 6;
 }

-int rgbcx::InterpolatorAMD::InterpolateHalf5or6(int v0, int v1) {
+int InterpolatorAMD::InterpolateHalf5or6(int v0, int v1) const {
    assert(v0 < 256 && v1 < 256);
    return (v0 + v1 + 1) >> 1;
 }
 // endregion
+}  // namespace rgbcx
--- a/src/interpolator.h
+++ b/src/interpolator.h
@ -18,56 +18,128 @@
 */

 #pragma once
+#include <array>
+#include <cassert>
+#include <cstdint>
+#include <memory>
+
+#include "util.h"
+
+#ifdef NDEBUG  // asserts disabled
+static constexpr bool ndebug = true;
+#else  // asserts enabled
+static constexpr bool ndebug = false;
+#endif

 namespace rgbcx {

+template <size_t size, int op(int)> static constexpr std::array<uint8_t, size> ExpandArray() {
+    std::array<uint8_t, size> res;
+    for (int i = 0; i < size; i++) { res[i] = op(i); }
+    return res;
+}
+
 class Interpolator {
   public:
+    struct MatchEntry {
+        uint8_t high;
+        uint8_t low;
+        uint8_t error;
+    };
+
+    Interpolator();
+    virtual ~Interpolator() noexcept = default;
+
    virtual int Interpolate5(int v0, int v1) = 0;
    virtual int Interpolate6(int v0, int v1) = 0;
    virtual int InterpolateHalf5(int v0, int v1) = 0;
    virtual int InterpolateHalf6(int v0, int v1) = 0;
-    virtual ~Interpolator() noexcept = default;
+
+    constexpr MatchEntry GetMatch5(int i) noexcept(ndebug) {
+        assert(i < match_count);
+        return (*_single_match5)[i];
+    }
+    constexpr MatchEntry GetMatch6(int i) noexcept(ndebug) {
+        assert(i < match_count);
+        return (*_single_match6)[i];
+    }
+    constexpr MatchEntry GetMatchHalf5(int i) noexcept(ndebug) {
+        assert(i < match_count);
+        return (*_single_match5_half)[i];
+    }
+    constexpr MatchEntry GetMatchHalf6(int i) noexcept(ndebug) {
+        assert(i < match_count);
+        return (*_single_match6_half)[i];
+    }
+
+   private:
+    constexpr static inline size_t size5 = 32;
+    constexpr static inline size_t size6 = 64;
+    constexpr static inline size_t match_count = 256;
+
+    constexpr static auto Expand5 = ExpandArray<size5, scale5To8>();
+    constexpr static auto Expand6 = ExpandArray<size6, scale6To8>();
+
+    // match tables used for single-color blocks
+    using MatchList = std::array<MatchEntry, match_count>;
+    using MatchListPtr = std::shared_ptr<MatchList>;
+
+    const MatchListPtr _single_match5 = {std::make_shared<MatchList>()};
+    const MatchListPtr _single_match6 = {std::make_shared<MatchList>()};
+    const MatchListPtr _single_match5_half = {std::make_shared<MatchList>()};
+    const MatchListPtr _single_match6_half = {std::make_shared<MatchList>()};
+
+    virtual constexpr bool isIdeal() noexcept { return false; }
+    virtual constexpr bool useExpandedInMatch() noexcept { return true; }
+
+    void PrepSingleColorTables(const MatchListPtr &matchTable, const MatchListPtr &matchTableHalf, int len);
+
+    int PrepSingleColorTableEntry(const MatchListPtr &matchTable, int v, int i, int low, int high, int low_e, int high_e, int lowest_error, bool half,
+                                  bool ideal);
 };

 class InterpolatorIdeal : public Interpolator {
   public:
-    virtual int Interpolate5(int v0, int v1);
-    virtual int Interpolate6(int v0, int v1);
-    virtual int InterpolateHalf5(int v0, int v1);
-    virtual int InterpolateHalf6(int v0, int v1);
+    virtual int Interpolate5(int v0, int v1) const;
+    virtual int Interpolate6(int v0, int v1) const;
+    virtual int InterpolateHalf5(int v0, int v1) const;
+    virtual int InterpolateHalf6(int v0, int v1) const;

   private:
-    int Interpolate5or6(int v0, int v1);
-    int InterpolateHalf5or6(int v0, int v1);
+    int Interpolate5or6(int v0, int v1) const;
+    int InterpolateHalf5or6(int v0, int v1) const;
+    virtual constexpr bool isIdeal() noexcept override { return true; }
 };

 class InterpolatorIdealRound : public InterpolatorIdeal {
   public:
-    virtual int Interpolate5(int v0, int v1);
-    virtual int Interpolate6(int v0, int v1);
+    virtual int Interpolate5(int v0, int v1) const override;
+    virtual int Interpolate6(int v0, int v1) const override;

   private:
-    int Interpolate5or6Round(int v0, int v1);
+    int Interpolate5or6Round(int v0, int v1) const;
 };

 class InterpolatorNvidia : public Interpolator {
   public:
-    virtual int Interpolate5(int v0, int v1);
-    virtual int Interpolate6(int v0, int v1);
-    virtual int InterpolateHalf5(int v0, int v1);
-    virtual int InterpolateHalf6(int v0, int v1);
+    virtual int Interpolate5(int v0, int v1) const;
+    virtual int Interpolate6(int v0, int v1) const;
+    virtual int InterpolateHalf5(int v0, int v1) const;
+    virtual int InterpolateHalf6(int v0, int v1) const;
+
+   private:
+    virtual constexpr bool useExpandedInMatch() noexcept override { return false; }
 };

 class InterpolatorAMD : public Interpolator {
   public:
-    virtual int Interpolate5(int v0, int v1);
-    virtual int Interpolate6(int v0, int v1);
-    virtual int InterpolateHalf5(int v0, int v1);
-    virtual int InterpolateHalf6(int v0, int v1);
+    virtual int Interpolate5(int v0, int v1) const;
+    virtual int Interpolate6(int v0, int v1) const;
+    virtual int InterpolateHalf5(int v0, int v1) const;
+    virtual int InterpolateHalf6(int v0, int v1) const;

   private:
-    int Interpolate5or6(int v0, int v1);
-    int InterpolateHalf5or6(int v0, int v1);
+    int Interpolate5or6(int v0, int v1) const;
+    int InterpolateHalf5or6(int v0, int v1) const;
 };
 }  // namespace rgbcx