From 4217d526cfd8e8f4196aaa296415d71ef73570be Mon Sep 17 00:00:00 2001 From: drewcassidy Date: Tue, 16 Feb 2021 10:18:17 -0800 Subject: [PATCH] Make BC4 encoding a bit more readable --- src/BC4/BC4Block.h | 5 +++ src/BC4/BC4Encoder.cpp | 91 ++++++++++++------------------------------ 2 files changed, 30 insertions(+), 66 deletions(-) diff --git a/src/BC4/BC4Block.h b/src/BC4/BC4Block.h index c32bba7..3ac60cc 100644 --- a/src/BC4/BC4Block.h +++ b/src/BC4/BC4Block.h @@ -68,6 +68,11 @@ class BC4Block { SetSelectorBits(packed); } + void PackSelectors(const std::array& unpacked) { + auto packed = Pack(unpacked); + SetSelectorBits(packed); + } + inline uint32_t GetSelector(uint32_t x, uint32_t y, uint64_t selector_bits) const { assert((x < 4U) && (y < 4U)); return (selector_bits >> (((y * 4) + x) * SelectorBits)) & (SelectorMask); diff --git a/src/BC4/BC4Encoder.cpp b/src/BC4/BC4Encoder.cpp index da273c4..abbeeac 100644 --- a/src/BC4/BC4Encoder.cpp +++ b/src/BC4/BC4Encoder.cpp @@ -23,85 +23,44 @@ namespace rgbcx { void BC4Encoder::EncodeBlock(Byte4x4 pixels, BC4Block *const dest) const noexcept(ndebug) { - auto bytes = pixels.Flatten(); - auto minmax = std::minmax_element(bytes.begin(), bytes.end()); + auto flattened = pixels.Flatten(); + auto minmax = std::minmax_element(flattened.begin(), flattened.end()); - uint8_t min_v = *minmax.first; - uint8_t max_v = *minmax.second; + uint8_t min = *minmax.first; + uint8_t max = *minmax.second; - dest->high_alpha = min_v; - dest->low_alpha = max_v; + dest->high_alpha = min; + dest->low_alpha = max; - if (max_v == min_v) { + if (max == min) { dest->SetSelectorBits(0); return; } - const uint32_t delta = max_v - min_v; - - // min_v is now 0. Compute thresholds between values by scaling max_v. It's x14 because we're adding two x7 scale factors. - const int t0 = delta * 13; - const int t1 = delta * 11; - const int t2 = delta * 9; - const int t3 = delta * 7; - const int t4 = delta * 5; - const int t5 = delta * 3; - const int t6 = delta * 1; + std::array selectors = {}; + const static std::array Levels = {1U, 7U, 6U, 5U, 4U, 3U, 2U, 0U}; // selector value options in linear order // BC4 floors in its divisions, which we compensate for with the 4 bias. // This function is optimal for all possible inputs (i.e. it outputs the same results as checking all 8 values and choosing the closest one). - const int bias = 4 - min_v * 14; + const int bias = 4 - min * 14; + const int delta = max - min; - static const uint32_t s_tran0[8] = {1U, 7U, 6U, 5U, 4U, 3U, 2U, 0U}; - static const uint32_t s_tran1[8] = {1U << 3U, 7U << 3U, 6U << 3U, 5U << 3U, 4U << 3U, 3U << 3U, 2U << 3U, 0U << 3U}; - static const uint32_t s_tran2[8] = {1U << 6U, 7U << 6U, 6U << 6U, 5U << 6U, 4U << 6U, 3U << 6U, 2U << 6U, 0U << 6U}; - static const uint32_t s_tran3[8] = {1U << 9U, 7U << 9U, 6U << 9U, 5U << 9U, 4U << 9U, 3U << 9U, 2U << 9U, 0U << 9U}; + // min is now 0. Compute thresholds between values by scaling max. It's x14 because we're adding two x7 scale factors. + // bias is applied here + std::array thresholds = {}; + for (unsigned i = 0; i < 7; i++) thresholds[i] = delta * (1 + (2 * (int)i)) - bias; - uint64_t a0, a1, a2, a3; - { - const int v0 = bytes[0] * 14 + bias; - const int v1 = bytes[1] * 14 + bias; - const int v2 = bytes[2] * 14 + bias; - const int v3 = bytes[3] * 14 + bias; - a0 = s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]; - a1 = s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]; - a2 = s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]; - a3 = s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]; + // iterate over all values and calculate selectors + for (unsigned i = 0; i < 16; i++) { + int value = flattened[i] * 14; // multiply by demonimator + + // level = number of thresholds this value is greater than + unsigned level = 0; + for (unsigned c = 0; c < 7; c++) level += value >= thresholds[c]; + + selectors[i] = Levels[level]; } - { - const int v0 = bytes[4] * 14 + bias; - const int v1 = bytes[5] * 14 + bias; - const int v2 = bytes[6] * 14 + bias; - const int v3 = bytes[7] * 14 + bias; - a0 |= (uint64_t)(s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)] << 12U); - a1 |= (uint64_t)(s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)] << 12U); - a2 |= (uint64_t)(s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)] << 12U); - a3 |= (uint64_t)(s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)] << 12U); - } - - { - const int v0 = bytes[8] * 14 + bias; - const int v1 = bytes[9] * 14 + bias; - const int v2 = bytes[10] * 14 + bias; - const int v3 = bytes[11] * 14 + bias; - a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 24U); - a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 24U); - a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 24U); - a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 24U); - } - - { - const int v0 = bytes[12] * 14 + bias; - const int v1 = bytes[13] * 14 + bias; - const int v2 = bytes[14] * 14 + bias; - const int v3 = bytes[15] * 14 + bias; - a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 36U); - a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 36U); - a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 36U); - a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 36U); - } - - dest->SetSelectorBits(a0 | a1 | a2 | a3); + dest->PackSelectors(selectors); } } // namespace rgbcx \ No newline at end of file