Compare commits

..

3 Commits

Author SHA1 Message Date
db2d5dbe61 Fix sign warnings in BlockView 2021-02-16 16:48:41 -08:00
a61e8c0ca0 Run multiple tests for better values
going to regret this for BC1 tbh
2021-02-16 16:48:22 -08:00
4217d526cf Make BC4 encoding a bit more readable 2021-02-16 10:18:17 -08:00
4 changed files with 41 additions and 70 deletions

View File

@ -68,6 +68,11 @@ class BC4Block {
SetSelectorBits(packed);
}
void PackSelectors(const std::array<uint8_t, 16>& unpacked) {
auto packed = Pack<uint8_t, uint64_t, 3, 16>(unpacked);
SetSelectorBits(packed);
}
inline uint32_t GetSelector(uint32_t x, uint32_t y, uint64_t selector_bits) const {
assert((x < 4U) && (y < 4U));
return (selector_bits >> (((y * 4) + x) * SelectorBits)) & (SelectorMask);

View File

@ -23,85 +23,44 @@
namespace rgbcx {
void BC4Encoder::EncodeBlock(Byte4x4 pixels, BC4Block *const dest) const noexcept(ndebug) {
auto bytes = pixels.Flatten();
auto minmax = std::minmax_element(bytes.begin(), bytes.end());
auto flattened = pixels.Flatten();
auto minmax = std::minmax_element(flattened.begin(), flattened.end());
uint8_t min_v = *minmax.first;
uint8_t max_v = *minmax.second;
uint8_t min = *minmax.first;
uint8_t max = *minmax.second;
dest->high_alpha = min_v;
dest->low_alpha = max_v;
dest->high_alpha = min;
dest->low_alpha = max;
if (max_v == min_v) {
if (max == min) {
dest->SetSelectorBits(0);
return;
}
const uint32_t delta = max_v - min_v;
// min_v is now 0. Compute thresholds between values by scaling max_v. It's x14 because we're adding two x7 scale factors.
const int t0 = delta * 13;
const int t1 = delta * 11;
const int t2 = delta * 9;
const int t3 = delta * 7;
const int t4 = delta * 5;
const int t5 = delta * 3;
const int t6 = delta * 1;
std::array<uint8_t, 16> selectors = {};
const static std::array<uint8_t, 8> Levels = {1U, 7U, 6U, 5U, 4U, 3U, 2U, 0U}; // selector value options in linear order
// BC4 floors in its divisions, which we compensate for with the 4 bias.
// This function is optimal for all possible inputs (i.e. it outputs the same results as checking all 8 values and choosing the closest one).
const int bias = 4 - min_v * 14;
const int bias = 4 - min * 14;
const int delta = max - min;
static const uint32_t s_tran0[8] = {1U, 7U, 6U, 5U, 4U, 3U, 2U, 0U};
static const uint32_t s_tran1[8] = {1U << 3U, 7U << 3U, 6U << 3U, 5U << 3U, 4U << 3U, 3U << 3U, 2U << 3U, 0U << 3U};
static const uint32_t s_tran2[8] = {1U << 6U, 7U << 6U, 6U << 6U, 5U << 6U, 4U << 6U, 3U << 6U, 2U << 6U, 0U << 6U};
static const uint32_t s_tran3[8] = {1U << 9U, 7U << 9U, 6U << 9U, 5U << 9U, 4U << 9U, 3U << 9U, 2U << 9U, 0U << 9U};
// min is now 0. Compute thresholds between values by scaling max. It's x14 because we're adding two x7 scale factors.
// bias is applied here
std::array<int, 7> thresholds = {};
for (unsigned i = 0; i < 7; i++) thresholds[i] = delta * (1 + (2 * (int)i)) - bias;
uint64_t a0, a1, a2, a3;
{
const int v0 = bytes[0] * 14 + bias;
const int v1 = bytes[1] * 14 + bias;
const int v2 = bytes[2] * 14 + bias;
const int v3 = bytes[3] * 14 + bias;
a0 = s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)];
a1 = s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)];
a2 = s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)];
a3 = s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)];
// iterate over all values and calculate selectors
for (unsigned i = 0; i < 16; i++) {
int value = flattened[i] * 14; // multiply by demonimator
// level = number of thresholds this value is greater than
unsigned level = 0;
for (unsigned c = 0; c < 7; c++) level += value >= thresholds[c];
selectors[i] = Levels[level];
}
{
const int v0 = bytes[4] * 14 + bias;
const int v1 = bytes[5] * 14 + bias;
const int v2 = bytes[6] * 14 + bias;
const int v3 = bytes[7] * 14 + bias;
a0 |= (uint64_t)(s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)] << 12U);
a1 |= (uint64_t)(s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)] << 12U);
a2 |= (uint64_t)(s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)] << 12U);
a3 |= (uint64_t)(s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)] << 12U);
}
{
const int v0 = bytes[8] * 14 + bias;
const int v1 = bytes[9] * 14 + bias;
const int v2 = bytes[10] * 14 + bias;
const int v3 = bytes[11] * 14 + bias;
a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 24U);
a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 24U);
a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 24U);
a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 24U);
}
{
const int v0 = bytes[12] * 14 + bias;
const int v1 = bytes[13] * 14 + bias;
const int v2 = bytes[14] * 14 + bias;
const int v3 = bytes[15] * 14 + bias;
a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 36U);
a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 36U);
a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 36U);
a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 36U);
}
dest->SetSelectorBits(a0 | a1 | a2 | a3);
dest->PackSelectors(selectors);
}
} // namespace rgbcx

View File

@ -83,8 +83,8 @@ template <typename S, size_t M, size_t N> class BlockView {
constexpr std::array<S, M * N> Flatten() noexcept {
std::array<S, M * N> result;
for (int x = 0; x < N; x++) {
for (int y = 0; y < M; y++) { result[x + (N * y)] = start[(row_stride * y) + (pixel_stride * x)]; }
for (unsigned x = 0; x < N; x++) {
for (unsigned y = 0; y < M; y++) { result[x + (N * y)] = Get(x, y); }
}
return result;
}

View File

@ -659,11 +659,18 @@ int main(int argc, char *argv[]) {
uint32_t bc7_mode_hist[8];
memset(bc7_mode_hist, 0, sizeof(bc7_mode_hist));
#ifdef NDEBUG
const int test_count = 1000;
#else
const int test_count = 1;
#endif
if (dxgi_format == DXGI_FORMAT_BC4_UNORM) {
auto bc4_encoder = BC4Encoder(bc45_channel0);
Color *src = &source_image.get_pixels()[0];
bc4_encoder.EncodeImage(reinterpret_cast<uint8_t *>(&packed_image8[0]), src, source_image.width(), source_image.height());
for (int i = 0; i < test_count; i++)
bc4_encoder.EncodeImage(reinterpret_cast<uint8_t *>(&packed_image8[0]), src, source_image.width(), source_image.height());
} else {
for (uint32_t by = 0; by < blocks_y; by++) {
@ -732,7 +739,7 @@ int main(int argc, char *argv[]) {
clock_t end_t = clock();
printf("\nTotal time: %f secs\n", (double)(end_t - start_t) / CLOCKS_PER_SEC);
printf("\nTotal time: %f secs\n", (double)(end_t - start_t) / CLOCKS_PER_SEC / test_count);
if (dxgi_format == DXGI_FORMAT_BC7_UNORM) {
printf("BC7 mode histogram:\n");