Improved first-pass refinement

2024-09-13 06:37:34 +00:00 · 2021-02-23 19:44:36 -08:00 · 2021-02-23 19:44:36 -08:00 · 2e5cf991b0
commit 2e5cf991b0
parent 8acaf1ed96
5 changed files with 130 additions and 52 deletions
--- a/src/BC1/BC1Encoder.cpp
+++ b/src/BC1/BC1Encoder.cpp
@ -102,7 +102,7 @@ template <size_t S> void PrepSingleColorTable(MatchList &match_table, MatchList
 BC1Encoder::BC1Encoder(InterpolatorPtr interpolator) : _interpolator(interpolator) {
    PrepSingleColorTable<5>(*_single_match5, *_single_match5_half, *_interpolator);
    PrepSingleColorTable<6>(*_single_match6, *_single_match6_half, *_interpolator);
-    _flags = Flags::BoundingBoxInt;
+    _flags = Flags::UseFullMSEEval | Flags::TwoLeastSquaresPasses;
 }

 void BC1Encoder::EncodeBlock(Color4x4 pixels, BC1Block *dest) const {
@ -124,37 +124,44 @@ void BC1Encoder::EncodeBlock(Color4x4 pixels, BC1Block *dest) const {
    needs_block_error |= (_search_rounds > 0);
    needs_block_error |= metrics.has_black && ((_flags & Flags::Use3ColorBlocksForBlackPixels) != Flags::None);

-    unsigned cur_err = UINT_MAX;
+    unsigned total_ls_passes = (_flags & Flags::TwoLeastSquaresPasses) != Flags::None ? 2 : 1;
+    unsigned total_rounds = needs_block_error && ((_flags & Flags::TryAllInitialEndpoints) != Flags::None) ? 2 : 1;

-    if (!needs_block_error || true) {
-        //        assert((_flags & Flags::TryAllInitialEndponts) == Flags::None);
+    EncodeResults result;
+    for (unsigned round = 0; round < total_rounds; round++) {
+        Flags modified_flags = _flags;
+        if (round == 1) {
+            modified_flags &= ~(Flags::Use2DLS | Flags::BoundingBoxInt);
+            modified_flags |= Flags::BoundingBox;
+        }

-        EncodeResults orig;
-        FindEndpoints(pixels, _flags, metrics, orig.low, orig.high);
-        FindSelectors4(pixels, orig);
-        EncodeResults best = orig;
+        EncodeResults round_result;
+        FindEndpoints(pixels, modified_flags, metrics, round_result.low, round_result.high);
+        FindSelectors4(pixels, round_result, needs_block_error);

-        const uint32_t total_ls_passes = (_flags & Flags::TwoLeastSquaresPasses) != Flags::None ? 2 : 1;
        for (unsigned pass = 0; pass < total_ls_passes; pass++) {
-            EncodeResults trial = best;
+            EncodeResults trial_result = round_result;
            Vector4 low, high;

-            bool multicolor = ComputeEndpointsLS(pixels, trial, low, high, metrics);
-            if (multicolor) {
-                trial.low = Color::PreciseRound565(low);
-                trial.high = Color::PreciseRound565(high);
+            bool multicolor = ComputeEndpointsLS(pixels, trial_result, metrics, false, false);
+
+            if (trial_result.low == round_result.low && trial_result.high == round_result.high) break;
+
+            FindSelectors4(pixels, trial_result, needs_block_error);
+
+            if (!needs_block_error || trial_result.error < round_result.error) {
+                round_result = trial_result;
+            } else {
+                break;
            }
-
-            if (trial.low == best.low && trial.high == best.high) break;
-            FindSelectors4(pixels, trial);
-            best = trial;
        }
+        if (!needs_block_error || round_result.error < result.error) { result = round_result; }
+    }

-        if (best.low == best.high) {
-            EncodeBlockSingleColor(metrics.avg, dest);
-        } else {
-            EncodeBlock4Color(best, dest);
-        }
+    if (result.low == result.high) {
+        EncodeBlockSingleColor(metrics.avg, dest);
+    } else {
+        EncodeBlock4Color(result, dest);
    }
 }

@ -447,38 +454,96 @@ void BC1Encoder::FindEndpoints(Color4x4 pixels, BC1Encoder::Flags flags, const B
    }
 }

-unsigned BC1Encoder::FindSelectors4(Color4x4 pixels, BC1Encoder::EncodeResults &block, unsigned int cur_err, bool use_err) const {
+unsigned BC1Encoder::FindSelectors4(Color4x4 pixels, BC1Encoder::EncodeResults &block, bool use_err) const {
    // colors in selector order, 0, 1, 2, 3
    // 0 = low color, 1 = high color, 2/3 = interpolated
    std::array<Color, 4> colors = _interpolator->InterpolateBC1(block.low, block.high, false);
-    std::array<Vector4Int, 4> colorVectors = {(Vector4Int)colors[0], (Vector4Int)colors[2], (Vector4Int)colors[3], (Vector4Int)colors[1]};
+    std::array<Vector4Int, 4> color_vectors = {(Vector4Int)colors[0], (Vector4Int)colors[2], (Vector4Int)colors[3], (Vector4Int)colors[1]};
+    unsigned total_error = 0;

-    if (!use_err) {
-        Vector4Int a = colorVectors[3] - colorVectors[0];
-        Color high = block.high.ScaleFrom565();
-        Color low = block.low.ScaleFrom565();
+    if (!use_err || (_flags & Flags::UseFasterMSEEval) != Flags::None) {
+        Vector4Int axis = color_vectors[3] - color_vectors[0];
        std::array<int, 4> dots;
-        for (unsigned i = 0; i < 4; i++) { dots[i] = a.Dot(colorVectors[i]); }
+        for (unsigned i = 0; i < 4; i++) { dots[i] = axis.Dot(color_vectors[i]); }
        int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3];
-        a *= 2;
+        axis *= 2;

-        for (unsigned x = 0; x < 4; x++) {
-            for (unsigned y = 0; y < 4; y++) {
-                int dot = a.Dot((Vector4Int)pixels.Get(x, y));
-                unsigned level = (dot <= t0) + (dot < t1) + (dot < t2);
-                unsigned selector = 3 - level;
-                assert(level < 4);
-                assert(selector < 4);
-                block.selectors[x + (4 * y)] = selector;
+        for (unsigned i = 0; i < 16; i++) {
+            Vector4Int pixel_vector = (Vector4Int)pixels.Get(i);
+            int dot = axis.Dot(pixel_vector);
+            uint8_t level = (dot <= t0) + (dot < t1) + (dot < t2);
+            uint8_t selector = 3 - level;
+            assert(level < 4);
+            assert(selector < 4);
+
+            if ((_flags & Flags::UseFasterMSEEval) != Flags::None) {
+                // llvm is just going to unswitch this anyways so its not an issue
+                auto diff = pixel_vector - color_vectors[selector];
+                total_error += diff.SqrMag();
+                if (i % 4 != 0 && total_error >= block.error) break;  // check only once per row if we're generating too much error
            }
+
+            block.selectors[i] = selector;
+        }
+    } else if ((_flags & Flags::UseFullMSEEval) != Flags::None) {
+        for (unsigned i = 0; i < 16; i++) {
+            unsigned best_error = UINT_MAX;
+            uint8_t best_sel = 0;
+            Vector4Int pixel_vector = (Vector4Int)pixels.Get(i);
+
+            // exhasustively check every pixel's distance from each color, and calculate the error
+            for (uint8_t j = 0; j < 4; j++) {
+                auto diff = color_vectors[j] - pixel_vector;
+                unsigned err = diff.SqrMag();
+                if (err < best_error || ((err == best_error) && (j == 3))) {
+                    best_error = err;
+                    best_sel = j;
+                }
+            }
+
+            total_error += best_error;
+            if (total_error >= block.error) break;
+
+            block.selectors[i] = best_sel;
+        }
+    } else {
+        Vector4Int axis = color_vectors[3] - color_vectors[0];
+        const float f = 4.0f / ((float)axis.SqrMag() + .00000125f);
+
+        for (unsigned i = 0; i < 16; i++) {
+            Vector4Int pixel_vector = (Vector4Int)pixels.Get(i);
+            auto diff = pixel_vector - color_vectors[0];
+            float sel_f = (float)diff.Dot(axis) * f + 0.5f;
+            uint8_t sel = (uint8_t)clampi((int)sel_f, 1, 3);
+
+            unsigned err0 = (color_vectors[sel - 1] - pixel_vector).SqrMag();
+            unsigned err1 = (color_vectors[sel] - pixel_vector).SqrMag();
+
+            uint8_t best_sel = sel;
+            unsigned best_err = err1;
+            if (err0 == err1) {
+                // prefer non-interpolation
+                if ((best_sel) == 1) best_sel = 0;
+            } else if (err0 < best_err) {
+                best_sel = sel - 1;
+                best_err = err0;
+            }
+
+            total_error += best_err;
+
+            if (total_error >= block.error) break;
+
+            block.selectors[i] = best_sel;
        }
-        return 0;
    }
-    return 0;
+    block.is_3_color = false;
+    block.is_1_color = false;
+    block.error = total_error;
+    return total_error;
 }

-bool BC1Encoder::ComputeEndpointsLS(Color4x4 pixels, EncodeResults &block, Vector4 &low, Vector4 &high, BlockMetrics metrics, bool is_3color,
-                                    bool use_black) const {
+bool BC1Encoder::ComputeEndpointsLS(Color4x4 pixels, EncodeResults &block, BlockMetrics metrics, bool is_3color, bool use_black) const {
+    Vector4 low, high;
    Vector4 q00 = {0, 0, 0};
    unsigned weight_accum = 0;
    for (unsigned i = 0; i < 16; i++) {
@ -486,7 +551,7 @@ bool BC1Encoder::ComputeEndpointsLS(Color4x4 pixels, EncodeResults &block, Vecto
        const int sel = (int)block.selectors[i];

        if (use_black && color.IsBlack()) continue;
-        if (is_3color && sel == 3) continue; // NOTE: selectors for 3-color are in linear order here, but not in original
+        if (is_3color && sel == 3) continue;  // NOTE: selectors for 3-color are in linear order here, but not in original
        assert(sel <= 3);

        const Vector4Int color_vector = Vector4Int::FromColorRGB(color);
@ -504,7 +569,10 @@ bool BC1Encoder::ComputeEndpointsLS(Color4x4 pixels, EncodeResults &block, Vecto

    // invert matrix
    float det = z00 * z11 - z01 * z10;
-    if (fabs(det) < 1e-8f) return false;
+    if (fabs(det) < 1e-8f) {
+        block.is_1_color = true;
+        return false;
+    }

    det = ((float)denominator / 255.0f) / det;

@ -517,6 +585,9 @@ bool BC1Encoder::ComputeEndpointsLS(Color4x4 pixels, EncodeResults &block, Vecto
    low = (q00 * iz00) + (q10 * iz01);
    high = (q00 * iz10) + (q10 * iz11);

+    block.is_1_color = false;
+    block.low = Color::PreciseRound565(low);
+    block.high = Color::PreciseRound565(high);
    return true;
 }
 }  // namespace rgbcx
--- a/src/BC1/BC1Encoder.h
+++ b/src/BC1/BC1Encoder.h
@ -94,7 +94,7 @@ class BC1Encoder : public BlockEncoder<BC1Block, 4, 4> {
        Exhaustive = 8192,

        // Try 2 different ways of choosing the initial endpoints.
-        TryAllInitialEndponts = 16384,
+        TryAllInitialEndpoints = 16384,

        // Same as BoundingBox, but implemented using integer math (faster, slightly less quality)
        BoundingBoxInt = 32768,
@ -123,16 +123,16 @@ class BC1Encoder : public BlockEncoder<BC1Block, 4, 4> {
        std::array<uint8_t, 16> selectors;
        bool is_3_color;
        bool is_1_color;
+        unsigned error = UINT_MAX;
    };

    void EncodeBlockSingleColor(Color color, BC1Block *dest) const;
    void EncodeBlock4Color(EncodeResults &block, BC1Block *dest) const;

    void FindEndpoints(Color4x4 pixels, Flags flags, BlockMetrics const metrics, Color &low, Color &high) const;
-    unsigned FindSelectors4(Color4x4 pixels, EncodeResults &block, unsigned cur_err = 0, bool use_err = false) const;
+    unsigned FindSelectors4(Color4x4 pixels, BC1Encoder::EncodeResults &block, bool use_err) const;

-    bool ComputeEndpointsLS(Color4x4 pixels, EncodeResults &block, Vector4 &low, Vector4 &high, BlockMetrics metrics, bool is_3color = false,
-                             bool use_black = false) const;
+    bool ComputeEndpointsLS(Color4x4 pixels, EncodeResults &block, BlockMetrics metrics, bool is_3color, bool use_black) const;

    // match tables used for single-color blocks
    // Each entry includes a high and low pair that best reproduces the 8-bit index as well as possible,
--- a/src/Color.cpp
+++ b/src/Color.cpp
@ -108,6 +108,13 @@ Color Color::Max(const Color &a, const Color &b) { return Color(std::max(a[0], b

 Color::operator Vector4() const { return Vector4(r, g, b, a); }
 Color::operator Vector4Int() const { return Vector4Int(r, g, b, a);}
+Vector4Int operator-(const Color &lhs, const Color &rhs) {
+    Vector4Int result;
+    for (unsigned i = 0; i < 4; i++) {
+        result[i] = (int)lhs[i] - rhs[i];
+    }
+    return result;
+}

 uint16_t Color::Pack565() const { return Pack565(r, g, b); }
 uint16_t Color::Pack565Unscaled() const { return Pack565Unscaled(r, g, b); }
--- a/src/Color.h
+++ b/src/Color.h
@ -63,6 +63,7 @@ class Color {

    operator Vector4() const;
    operator Vector4Int() const;
+    friend Vector4Int operator-(const Color &lhs, const Color &rhs);

    void SetRGBA(uint8_t vr, uint8_t vg, uint8_t vb, uint8_t va);
    void SetRGBA(const Color &other) { SetRGBA(other.r, other.g, other.b, other.a); }
--- a/src/Vector4Int.h
+++ b/src/Vector4Int.h
@ -64,9 +64,7 @@ class Vector4Int {
        return _c[index];
    }

-    operator Vector4() const {
-        return Vector4(_c[0], _c[1], _c[2], _c[3]);
-    }
+    operator Vector4() const { return Vector4(_c[0], _c[1], _c[2], _c[3]); }

    friend Vector4Int operator+(const Vector4Int &lhs, const Vector4Int &rhs) { return DoOp(lhs, rhs, std::plus()); }
    friend Vector4Int operator-(const Vector4Int &lhs, const Vector4Int &rhs) { return DoOp(lhs, rhs, std::minus()); }
@ -99,6 +97,7 @@ class Vector4Int {
        }
        return max;
    }
+    unsigned int SqrMag() { return (unsigned)Dot(*this, *this); }

   private:
    template <typename Op> friend Vector4Int DoOp(const Vector4Int &lhs, const Vector4Int &rhs, Op f) {