Make BC4 encoding a bit more readable

2024-09-13 06:37:34 +00:00 · 2021-02-16 10:18:17 -08:00 · 2021-02-16 10:18:17 -08:00 · 4217d526cf
commit 4217d526cf
parent 460785ee7d
2 changed files with 30 additions and 66 deletions
--- a/src/BC4/BC4Block.h
+++ b/src/BC4/BC4Block.h
@ -68,6 +68,11 @@ class BC4Block {
        SetSelectorBits(packed);
    }

+    void PackSelectors(const std::array<uint8_t, 16>& unpacked) {
+        auto packed = Pack<uint8_t, uint64_t, 3, 16>(unpacked);
+        SetSelectorBits(packed);
+    }
+
    inline uint32_t GetSelector(uint32_t x, uint32_t y, uint64_t selector_bits) const {
        assert((x < 4U) && (y < 4U));
        return (selector_bits >> (((y * 4) + x) * SelectorBits)) & (SelectorMask);
--- a/src/BC4/BC4Encoder.cpp
+++ b/src/BC4/BC4Encoder.cpp
@ -23,85 +23,44 @@

 namespace rgbcx {
 void BC4Encoder::EncodeBlock(Byte4x4 pixels, BC4Block *const dest) const noexcept(ndebug) {
-    auto bytes = pixels.Flatten();
-    auto minmax = std::minmax_element(bytes.begin(), bytes.end());
+    auto flattened = pixels.Flatten();
+    auto minmax = std::minmax_element(flattened.begin(), flattened.end());

-    uint8_t min_v = *minmax.first;
-    uint8_t max_v = *minmax.second;
+    uint8_t min = *minmax.first;
+    uint8_t max = *minmax.second;

-    dest->high_alpha = min_v;
-    dest->low_alpha = max_v;
+    dest->high_alpha = min;
+    dest->low_alpha = max;

-    if (max_v == min_v) {
+    if (max == min) {
        dest->SetSelectorBits(0);
        return;
    }

-    const uint32_t delta = max_v - min_v;
-
-    // min_v is now 0. Compute thresholds between values by scaling max_v. It's x14 because we're adding two x7 scale factors.
-    const int t0 = delta * 13;
-    const int t1 = delta * 11;
-    const int t2 = delta * 9;
-    const int t3 = delta * 7;
-    const int t4 = delta * 5;
-    const int t5 = delta * 3;
-    const int t6 = delta * 1;
+    std::array<uint8_t, 16> selectors = {};
+    const static std::array<uint8_t, 8> Levels = {1U, 7U, 6U, 5U, 4U, 3U, 2U, 0U}; // selector value options in linear order

    // BC4 floors in its divisions, which we compensate for with the 4 bias.
    // This function is optimal for all possible inputs (i.e. it outputs the same results as checking all 8 values and choosing the closest one).
-    const int bias = 4 - min_v * 14;
+    const int bias = 4 - min * 14;
+    const int delta = max - min;

-    static const uint32_t s_tran0[8] = {1U, 7U, 6U, 5U, 4U, 3U, 2U, 0U};
-    static const uint32_t s_tran1[8] = {1U << 3U, 7U << 3U, 6U << 3U, 5U << 3U, 4U << 3U, 3U << 3U, 2U << 3U, 0U << 3U};
-    static const uint32_t s_tran2[8] = {1U << 6U, 7U << 6U, 6U << 6U, 5U << 6U, 4U << 6U, 3U << 6U, 2U << 6U, 0U << 6U};
-    static const uint32_t s_tran3[8] = {1U << 9U, 7U << 9U, 6U << 9U, 5U << 9U, 4U << 9U, 3U << 9U, 2U << 9U, 0U << 9U};
+    // min is now 0. Compute thresholds between values by scaling max. It's x14 because we're adding two x7 scale factors.
+    // bias is applied here
+    std::array<int, 7> thresholds = {};
+    for (unsigned i = 0; i < 7; i++) thresholds[i] = delta * (1 + (2 * (int)i)) - bias;

-    uint64_t a0, a1, a2, a3;
-    {
-        const int v0 = bytes[0] * 14 + bias;
-        const int v1 = bytes[1] * 14 + bias;
-        const int v2 = bytes[2] * 14 + bias;
-        const int v3 = bytes[3] * 14 + bias;
-        a0 = s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)];
-        a1 = s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)];
-        a2 = s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)];
-        a3 = s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)];
+    // iterate over all values and calculate selectors
+    for (unsigned i = 0; i < 16; i++) {
+        int value = flattened[i] * 14; // multiply by demonimator
+
+        // level = number of thresholds this value is greater than
+        unsigned level = 0;
+        for (unsigned c = 0; c < 7; c++) level += value >= thresholds[c];
+
+        selectors[i] = Levels[level];
    }

-    {
-        const int v0 = bytes[4] * 14 + bias;
-        const int v1 = bytes[5] * 14 + bias;
-        const int v2 = bytes[6] * 14 + bias;
-        const int v3 = bytes[7] * 14 + bias;
-        a0 |= (uint64_t)(s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)] << 12U);
-        a1 |= (uint64_t)(s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)] << 12U);
-        a2 |= (uint64_t)(s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)] << 12U);
-        a3 |= (uint64_t)(s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)] << 12U);
-    }
-
-    {
-        const int v0 = bytes[8] * 14 + bias;
-        const int v1 = bytes[9] * 14 + bias;
-        const int v2 = bytes[10] * 14 + bias;
-        const int v3 = bytes[11] * 14 + bias;
-        a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 24U);
-        a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 24U);
-        a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 24U);
-        a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 24U);
-    }
-
-    {
-        const int v0 = bytes[12] * 14 + bias;
-        const int v1 = bytes[13] * 14 + bias;
-        const int v2 = bytes[14] * 14 + bias;
-        const int v3 = bytes[15] * 14 + bias;
-        a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 36U);
-        a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 36U);
-        a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 36U);
-        a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 36U);
-    }
-
-    dest->SetSelectorBits(a0 | a1 | a2 | a3);
+    dest->PackSelectors(selectors);
 }
 }  // namespace rgbcx