mirror of
https://github.com/drewcassidy/quicktex.git
synced 2024-09-13 06:37:34 +00:00
Compare commits
3 Commits
460785ee7d
...
db2d5dbe61
Author | SHA1 | Date | |
---|---|---|---|
db2d5dbe61 | |||
a61e8c0ca0 | |||
4217d526cf |
@ -68,6 +68,11 @@ class BC4Block {
|
|||||||
SetSelectorBits(packed);
|
SetSelectorBits(packed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void PackSelectors(const std::array<uint8_t, 16>& unpacked) {
|
||||||
|
auto packed = Pack<uint8_t, uint64_t, 3, 16>(unpacked);
|
||||||
|
SetSelectorBits(packed);
|
||||||
|
}
|
||||||
|
|
||||||
inline uint32_t GetSelector(uint32_t x, uint32_t y, uint64_t selector_bits) const {
|
inline uint32_t GetSelector(uint32_t x, uint32_t y, uint64_t selector_bits) const {
|
||||||
assert((x < 4U) && (y < 4U));
|
assert((x < 4U) && (y < 4U));
|
||||||
return (selector_bits >> (((y * 4) + x) * SelectorBits)) & (SelectorMask);
|
return (selector_bits >> (((y * 4) + x) * SelectorBits)) & (SelectorMask);
|
||||||
|
@ -23,85 +23,44 @@
|
|||||||
|
|
||||||
namespace rgbcx {
|
namespace rgbcx {
|
||||||
void BC4Encoder::EncodeBlock(Byte4x4 pixels, BC4Block *const dest) const noexcept(ndebug) {
|
void BC4Encoder::EncodeBlock(Byte4x4 pixels, BC4Block *const dest) const noexcept(ndebug) {
|
||||||
auto bytes = pixels.Flatten();
|
auto flattened = pixels.Flatten();
|
||||||
auto minmax = std::minmax_element(bytes.begin(), bytes.end());
|
auto minmax = std::minmax_element(flattened.begin(), flattened.end());
|
||||||
|
|
||||||
uint8_t min_v = *minmax.first;
|
uint8_t min = *minmax.first;
|
||||||
uint8_t max_v = *minmax.second;
|
uint8_t max = *minmax.second;
|
||||||
|
|
||||||
dest->high_alpha = min_v;
|
dest->high_alpha = min;
|
||||||
dest->low_alpha = max_v;
|
dest->low_alpha = max;
|
||||||
|
|
||||||
if (max_v == min_v) {
|
if (max == min) {
|
||||||
dest->SetSelectorBits(0);
|
dest->SetSelectorBits(0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const uint32_t delta = max_v - min_v;
|
std::array<uint8_t, 16> selectors = {};
|
||||||
|
const static std::array<uint8_t, 8> Levels = {1U, 7U, 6U, 5U, 4U, 3U, 2U, 0U}; // selector value options in linear order
|
||||||
// min_v is now 0. Compute thresholds between values by scaling max_v. It's x14 because we're adding two x7 scale factors.
|
|
||||||
const int t0 = delta * 13;
|
|
||||||
const int t1 = delta * 11;
|
|
||||||
const int t2 = delta * 9;
|
|
||||||
const int t3 = delta * 7;
|
|
||||||
const int t4 = delta * 5;
|
|
||||||
const int t5 = delta * 3;
|
|
||||||
const int t6 = delta * 1;
|
|
||||||
|
|
||||||
// BC4 floors in its divisions, which we compensate for with the 4 bias.
|
// BC4 floors in its divisions, which we compensate for with the 4 bias.
|
||||||
// This function is optimal for all possible inputs (i.e. it outputs the same results as checking all 8 values and choosing the closest one).
|
// This function is optimal for all possible inputs (i.e. it outputs the same results as checking all 8 values and choosing the closest one).
|
||||||
const int bias = 4 - min_v * 14;
|
const int bias = 4 - min * 14;
|
||||||
|
const int delta = max - min;
|
||||||
|
|
||||||
static const uint32_t s_tran0[8] = {1U, 7U, 6U, 5U, 4U, 3U, 2U, 0U};
|
// min is now 0. Compute thresholds between values by scaling max. It's x14 because we're adding two x7 scale factors.
|
||||||
static const uint32_t s_tran1[8] = {1U << 3U, 7U << 3U, 6U << 3U, 5U << 3U, 4U << 3U, 3U << 3U, 2U << 3U, 0U << 3U};
|
// bias is applied here
|
||||||
static const uint32_t s_tran2[8] = {1U << 6U, 7U << 6U, 6U << 6U, 5U << 6U, 4U << 6U, 3U << 6U, 2U << 6U, 0U << 6U};
|
std::array<int, 7> thresholds = {};
|
||||||
static const uint32_t s_tran3[8] = {1U << 9U, 7U << 9U, 6U << 9U, 5U << 9U, 4U << 9U, 3U << 9U, 2U << 9U, 0U << 9U};
|
for (unsigned i = 0; i < 7; i++) thresholds[i] = delta * (1 + (2 * (int)i)) - bias;
|
||||||
|
|
||||||
uint64_t a0, a1, a2, a3;
|
// iterate over all values and calculate selectors
|
||||||
{
|
for (unsigned i = 0; i < 16; i++) {
|
||||||
const int v0 = bytes[0] * 14 + bias;
|
int value = flattened[i] * 14; // multiply by demonimator
|
||||||
const int v1 = bytes[1] * 14 + bias;
|
|
||||||
const int v2 = bytes[2] * 14 + bias;
|
// level = number of thresholds this value is greater than
|
||||||
const int v3 = bytes[3] * 14 + bias;
|
unsigned level = 0;
|
||||||
a0 = s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)];
|
for (unsigned c = 0; c < 7; c++) level += value >= thresholds[c];
|
||||||
a1 = s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)];
|
|
||||||
a2 = s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)];
|
selectors[i] = Levels[level];
|
||||||
a3 = s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
dest->PackSelectors(selectors);
|
||||||
const int v0 = bytes[4] * 14 + bias;
|
|
||||||
const int v1 = bytes[5] * 14 + bias;
|
|
||||||
const int v2 = bytes[6] * 14 + bias;
|
|
||||||
const int v3 = bytes[7] * 14 + bias;
|
|
||||||
a0 |= (uint64_t)(s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)] << 12U);
|
|
||||||
a1 |= (uint64_t)(s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)] << 12U);
|
|
||||||
a2 |= (uint64_t)(s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)] << 12U);
|
|
||||||
a3 |= (uint64_t)(s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)] << 12U);
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
const int v0 = bytes[8] * 14 + bias;
|
|
||||||
const int v1 = bytes[9] * 14 + bias;
|
|
||||||
const int v2 = bytes[10] * 14 + bias;
|
|
||||||
const int v3 = bytes[11] * 14 + bias;
|
|
||||||
a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 24U);
|
|
||||||
a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 24U);
|
|
||||||
a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 24U);
|
|
||||||
a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 24U);
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
const int v0 = bytes[12] * 14 + bias;
|
|
||||||
const int v1 = bytes[13] * 14 + bias;
|
|
||||||
const int v2 = bytes[14] * 14 + bias;
|
|
||||||
const int v3 = bytes[15] * 14 + bias;
|
|
||||||
a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 36U);
|
|
||||||
a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 36U);
|
|
||||||
a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 36U);
|
|
||||||
a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 36U);
|
|
||||||
}
|
|
||||||
|
|
||||||
dest->SetSelectorBits(a0 | a1 | a2 | a3);
|
|
||||||
}
|
}
|
||||||
} // namespace rgbcx
|
} // namespace rgbcx
|
@ -83,8 +83,8 @@ template <typename S, size_t M, size_t N> class BlockView {
|
|||||||
|
|
||||||
constexpr std::array<S, M * N> Flatten() noexcept {
|
constexpr std::array<S, M * N> Flatten() noexcept {
|
||||||
std::array<S, M * N> result;
|
std::array<S, M * N> result;
|
||||||
for (int x = 0; x < N; x++) {
|
for (unsigned x = 0; x < N; x++) {
|
||||||
for (int y = 0; y < M; y++) { result[x + (N * y)] = start[(row_stride * y) + (pixel_stride * x)]; }
|
for (unsigned y = 0; y < M; y++) { result[x + (N * y)] = Get(x, y); }
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -659,11 +659,18 @@ int main(int argc, char *argv[]) {
|
|||||||
uint32_t bc7_mode_hist[8];
|
uint32_t bc7_mode_hist[8];
|
||||||
memset(bc7_mode_hist, 0, sizeof(bc7_mode_hist));
|
memset(bc7_mode_hist, 0, sizeof(bc7_mode_hist));
|
||||||
|
|
||||||
|
#ifdef NDEBUG
|
||||||
|
const int test_count = 1000;
|
||||||
|
#else
|
||||||
|
const int test_count = 1;
|
||||||
|
#endif
|
||||||
|
|
||||||
if (dxgi_format == DXGI_FORMAT_BC4_UNORM) {
|
if (dxgi_format == DXGI_FORMAT_BC4_UNORM) {
|
||||||
auto bc4_encoder = BC4Encoder(bc45_channel0);
|
auto bc4_encoder = BC4Encoder(bc45_channel0);
|
||||||
Color *src = &source_image.get_pixels()[0];
|
Color *src = &source_image.get_pixels()[0];
|
||||||
|
|
||||||
bc4_encoder.EncodeImage(reinterpret_cast<uint8_t *>(&packed_image8[0]), src, source_image.width(), source_image.height());
|
for (int i = 0; i < test_count; i++)
|
||||||
|
bc4_encoder.EncodeImage(reinterpret_cast<uint8_t *>(&packed_image8[0]), src, source_image.width(), source_image.height());
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
for (uint32_t by = 0; by < blocks_y; by++) {
|
for (uint32_t by = 0; by < blocks_y; by++) {
|
||||||
@ -732,7 +739,7 @@ int main(int argc, char *argv[]) {
|
|||||||
|
|
||||||
clock_t end_t = clock();
|
clock_t end_t = clock();
|
||||||
|
|
||||||
printf("\nTotal time: %f secs\n", (double)(end_t - start_t) / CLOCKS_PER_SEC);
|
printf("\nTotal time: %f secs\n", (double)(end_t - start_t) / CLOCKS_PER_SEC / test_count);
|
||||||
|
|
||||||
if (dxgi_format == DXGI_FORMAT_BC7_UNORM) {
|
if (dxgi_format == DXGI_FORMAT_BC7_UNORM) {
|
||||||
printf("BC7 mode histogram:\n");
|
printf("BC7 mode histogram:\n");
|
||||||
|
Loading…
Reference in New Issue
Block a user