|
|
|
@ -23,85 +23,44 @@
|
|
|
|
|
|
|
|
|
|
namespace rgbcx {
|
|
|
|
|
void BC4Encoder::EncodeBlock(Byte4x4 pixels, BC4Block *const dest) const noexcept(ndebug) {
|
|
|
|
|
auto bytes = pixels.Flatten();
|
|
|
|
|
auto minmax = std::minmax_element(bytes.begin(), bytes.end());
|
|
|
|
|
auto flattened = pixels.Flatten();
|
|
|
|
|
auto minmax = std::minmax_element(flattened.begin(), flattened.end());
|
|
|
|
|
|
|
|
|
|
uint8_t min_v = *minmax.first;
|
|
|
|
|
uint8_t max_v = *minmax.second;
|
|
|
|
|
uint8_t min = *minmax.first;
|
|
|
|
|
uint8_t max = *minmax.second;
|
|
|
|
|
|
|
|
|
|
dest->high_alpha = min_v;
|
|
|
|
|
dest->low_alpha = max_v;
|
|
|
|
|
dest->high_alpha = min;
|
|
|
|
|
dest->low_alpha = max;
|
|
|
|
|
|
|
|
|
|
if (max_v == min_v) {
|
|
|
|
|
if (max == min) {
|
|
|
|
|
dest->SetSelectorBits(0);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const uint32_t delta = max_v - min_v;
|
|
|
|
|
|
|
|
|
|
// min_v is now 0. Compute thresholds between values by scaling max_v. It's x14 because we're adding two x7 scale factors.
|
|
|
|
|
const int t0 = delta * 13;
|
|
|
|
|
const int t1 = delta * 11;
|
|
|
|
|
const int t2 = delta * 9;
|
|
|
|
|
const int t3 = delta * 7;
|
|
|
|
|
const int t4 = delta * 5;
|
|
|
|
|
const int t5 = delta * 3;
|
|
|
|
|
const int t6 = delta * 1;
|
|
|
|
|
std::array<uint8_t, 16> selectors = {};
|
|
|
|
|
const static std::array<uint8_t, 8> Levels = {1U, 7U, 6U, 5U, 4U, 3U, 2U, 0U}; // selector value options in linear order
|
|
|
|
|
|
|
|
|
|
// BC4 floors in its divisions, which we compensate for with the 4 bias.
|
|
|
|
|
// This function is optimal for all possible inputs (i.e. it outputs the same results as checking all 8 values and choosing the closest one).
|
|
|
|
|
const int bias = 4 - min_v * 14;
|
|
|
|
|
const int bias = 4 - min * 14;
|
|
|
|
|
const int delta = max - min;
|
|
|
|
|
|
|
|
|
|
static const uint32_t s_tran0[8] = {1U, 7U, 6U, 5U, 4U, 3U, 2U, 0U};
|
|
|
|
|
static const uint32_t s_tran1[8] = {1U << 3U, 7U << 3U, 6U << 3U, 5U << 3U, 4U << 3U, 3U << 3U, 2U << 3U, 0U << 3U};
|
|
|
|
|
static const uint32_t s_tran2[8] = {1U << 6U, 7U << 6U, 6U << 6U, 5U << 6U, 4U << 6U, 3U << 6U, 2U << 6U, 0U << 6U};
|
|
|
|
|
static const uint32_t s_tran3[8] = {1U << 9U, 7U << 9U, 6U << 9U, 5U << 9U, 4U << 9U, 3U << 9U, 2U << 9U, 0U << 9U};
|
|
|
|
|
// min is now 0. Compute thresholds between values by scaling max. It's x14 because we're adding two x7 scale factors.
|
|
|
|
|
// bias is applied here
|
|
|
|
|
std::array<int, 7> thresholds = {};
|
|
|
|
|
for (unsigned i = 0; i < 7; i++) thresholds[i] = delta * (1 + (2 * (int)i)) - bias;
|
|
|
|
|
|
|
|
|
|
uint64_t a0, a1, a2, a3;
|
|
|
|
|
{
|
|
|
|
|
const int v0 = bytes[0] * 14 + bias;
|
|
|
|
|
const int v1 = bytes[1] * 14 + bias;
|
|
|
|
|
const int v2 = bytes[2] * 14 + bias;
|
|
|
|
|
const int v3 = bytes[3] * 14 + bias;
|
|
|
|
|
a0 = s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)];
|
|
|
|
|
a1 = s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)];
|
|
|
|
|
a2 = s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)];
|
|
|
|
|
a3 = s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)];
|
|
|
|
|
// iterate over all values and calculate selectors
|
|
|
|
|
for (unsigned i = 0; i < 16; i++) {
|
|
|
|
|
int value = flattened[i] * 14; // multiply by demonimator
|
|
|
|
|
|
|
|
|
|
// level = number of thresholds this value is greater than
|
|
|
|
|
unsigned level = 0;
|
|
|
|
|
for (unsigned c = 0; c < 7; c++) level += value >= thresholds[c];
|
|
|
|
|
|
|
|
|
|
selectors[i] = Levels[level];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
const int v0 = bytes[4] * 14 + bias;
|
|
|
|
|
const int v1 = bytes[5] * 14 + bias;
|
|
|
|
|
const int v2 = bytes[6] * 14 + bias;
|
|
|
|
|
const int v3 = bytes[7] * 14 + bias;
|
|
|
|
|
a0 |= (uint64_t)(s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)] << 12U);
|
|
|
|
|
a1 |= (uint64_t)(s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)] << 12U);
|
|
|
|
|
a2 |= (uint64_t)(s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)] << 12U);
|
|
|
|
|
a3 |= (uint64_t)(s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)] << 12U);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
const int v0 = bytes[8] * 14 + bias;
|
|
|
|
|
const int v1 = bytes[9] * 14 + bias;
|
|
|
|
|
const int v2 = bytes[10] * 14 + bias;
|
|
|
|
|
const int v3 = bytes[11] * 14 + bias;
|
|
|
|
|
a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 24U);
|
|
|
|
|
a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 24U);
|
|
|
|
|
a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 24U);
|
|
|
|
|
a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 24U);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
const int v0 = bytes[12] * 14 + bias;
|
|
|
|
|
const int v1 = bytes[13] * 14 + bias;
|
|
|
|
|
const int v2 = bytes[14] * 14 + bias;
|
|
|
|
|
const int v3 = bytes[15] * 14 + bias;
|
|
|
|
|
a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 36U);
|
|
|
|
|
a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 36U);
|
|
|
|
|
a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 36U);
|
|
|
|
|
a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 36U);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
dest->SetSelectorBits(a0 | a1 | a2 | a3);
|
|
|
|
|
dest->PackSelectors(selectors);
|
|
|
|
|
}
|
|
|
|
|
} // namespace rgbcx
|