temporarily re-add original files for testing

faster-single-tables
Andrew Cassidy 3 years ago
parent 64fa687b20
commit b118663b2c

@ -8,6 +8,8 @@ add_subdirectory(extern/pybind11)
file(GLOB SOURCE_FILES "src/*.cpp")
file(GLOB HEADER_FILES "src/*.h")
file(GLOB PYTHON_FILES "python/*.cpp" "python/*.h")
file(GLOB TEST_FILES "src/test/*.c" "src/test/*.cpp" "src/test/*.h")
# Organize source files together for some IDEs
source_group(TREE ${CMAKE_CURRENT_SOURCE_DIR} FILES ${SOURCE_FILES} ${HEADER_FILES} ${PYTHON_FILES})
@ -19,5 +21,11 @@ pybind11_add_module(python_rgbcx
${PYTHON_FILES}
)
add_executable(test_rgbcx
${SOURCE_FILES}
${HEADER_FILES}
${TEST_FILES})
# Set module features, like C/C++ standards
target_compile_features(python_rgbcx PUBLIC cxx_std_17 c_std_11)
target_compile_features(python_rgbcx PUBLIC cxx_std_17 c_std_11)
target_compile_features(test_rgbcx PUBLIC cxx_std_17 c_std_11)

@ -1,10 +0,0 @@
#include <iostream>
#define RGBCX_IMPLEMENTATION
#include "rgbcx.h"
int main() {
std::cout << "Hello, World!" << std::endl;
return 0;
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 MiB

@ -0,0 +1,429 @@
// File: bc7decomp.c - Richard Geldreich, Jr. 3/31/2020 - MIT license or public domain (see end of file)
#include "bc7decomp.h"
namespace bc7decomp
{
const uint32_t g_bc7_weights2[4] = { 0, 21, 43, 64 };
const uint32_t g_bc7_weights3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 };
const uint32_t g_bc7_weights4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
const uint8_t g_bc7_partition2[64 * 16] =
{
0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1, 0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1, 0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1, 0,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1, 0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1, 0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,1,
0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,1,0,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1, 0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1, 0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,
0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,1, 0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0, 0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0, 0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,0, 0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0, 0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,
0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0, 0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0, 0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0, 0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0, 0,0,0,1,0,1,1,1,1,1,1,0,1,0,0,0, 0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0, 0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0, 0,0,1,1,1,0,0,1,1,0,0,1,1,1,0,0,
0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1, 0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1, 0,1,0,1,1,0,1,0,0,1,0,1,1,0,1,0, 0,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0, 0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0, 0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0, 0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1, 0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,
0,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0, 0,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0, 0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,0, 0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0, 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0, 0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,1, 0,1,1,0,0,1,1,0,1,0,0,1,1,0,0,1, 0,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,
0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0, 0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0, 0,0,0,0,0,0,1,0,0,1,1,1,0,0,1,0, 0,0,0,0,0,1,0,0,1,1,1,0,0,1,0,0, 0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1, 0,1,1,0,0,0,1,1,1,0,0,1,1,1,0,0, 0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,
0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1, 0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,1, 0,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1, 0,0,0,1,1,0,0,0,1,1,1,0,0,1,1,1, 0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,1, 0,0,1,1,0,0,1,1,1,1,1,1,0,0,0,0, 0,0,1,0,0,0,1,0,1,1,1,0,1,1,1,0, 0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,1
};
const uint8_t g_bc7_partition3[64 * 16] =
{
0,0,1,1,0,0,1,1,0,2,2,1,2,2,2,2, 0,0,0,1,0,0,1,1,2,2,1,1,2,2,2,1, 0,0,0,0,2,0,0,1,2,2,1,1,2,2,1,1, 0,2,2,2,0,0,2,2,0,0,1,1,0,1,1,1, 0,0,0,0,0,0,0,0,1,1,2,2,1,1,2,2, 0,0,1,1,0,0,1,1,0,0,2,2,0,0,2,2, 0,0,2,2,0,0,2,2,1,1,1,1,1,1,1,1, 0,0,1,1,0,0,1,1,2,2,1,1,2,2,1,1,
0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2, 0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2, 0,0,0,0,1,1,1,1,2,2,2,2,2,2,2,2, 0,0,1,2,0,0,1,2,0,0,1,2,0,0,1,2, 0,1,1,2,0,1,1,2,0,1,1,2,0,1,1,2, 0,1,2,2,0,1,2,2,0,1,2,2,0,1,2,2, 0,0,1,1,0,1,1,2,1,1,2,2,1,2,2,2, 0,0,1,1,2,0,0,1,2,2,0,0,2,2,2,0,
0,0,0,1,0,0,1,1,0,1,1,2,1,1,2,2, 0,1,1,1,0,0,1,1,2,0,0,1,2,2,0,0, 0,0,0,0,1,1,2,2,1,1,2,2,1,1,2,2, 0,0,2,2,0,0,2,2,0,0,2,2,1,1,1,1, 0,1,1,1,0,1,1,1,0,2,2,2,0,2,2,2, 0,0,0,1,0,0,0,1,2,2,2,1,2,2,2,1, 0,0,0,0,0,0,1,1,0,1,2,2,0,1,2,2, 0,0,0,0,1,1,0,0,2,2,1,0,2,2,1,0,
0,1,2,2,0,1,2,2,0,0,1,1,0,0,0,0, 0,0,1,2,0,0,1,2,1,1,2,2,2,2,2,2, 0,1,1,0,1,2,2,1,1,2,2,1,0,1,1,0, 0,0,0,0,0,1,1,0,1,2,2,1,1,2,2,1, 0,0,2,2,1,1,0,2,1,1,0,2,0,0,2,2, 0,1,1,0,0,1,1,0,2,0,0,2,2,2,2,2, 0,0,1,1,0,1,2,2,0,1,2,2,0,0,1,1, 0,0,0,0,2,0,0,0,2,2,1,1,2,2,2,1,
0,0,0,0,0,0,0,2,1,1,2,2,1,2,2,2, 0,2,2,2,0,0,2,2,0,0,1,2,0,0,1,1, 0,0,1,1,0,0,1,2,0,0,2,2,0,2,2,2, 0,1,2,0,0,1,2,0,0,1,2,0,0,1,2,0, 0,0,0,0,1,1,1,1,2,2,2,2,0,0,0,0, 0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0, 0,1,2,0,2,0,1,2,1,2,0,1,0,1,2,0, 0,0,1,1,2,2,0,0,1,1,2,2,0,0,1,1,
0,0,1,1,1,1,2,2,2,2,0,0,0,0,1,1, 0,1,0,1,0,1,0,1,2,2,2,2,2,2,2,2, 0,0,0,0,0,0,0,0,2,1,2,1,2,1,2,1, 0,0,2,2,1,1,2,2,0,0,2,2,1,1,2,2, 0,0,2,2,0,0,1,1,0,0,2,2,0,0,1,1, 0,2,2,0,1,2,2,1,0,2,2,0,1,2,2,1, 0,1,0,1,2,2,2,2,2,2,2,2,0,1,0,1, 0,0,0,0,2,1,2,1,2,1,2,1,2,1,2,1,
0,1,0,1,0,1,0,1,0,1,0,1,2,2,2,2, 0,2,2,2,0,1,1,1,0,2,2,2,0,1,1,1, 0,0,0,2,1,1,1,2,0,0,0,2,1,1,1,2, 0,0,0,0,2,1,1,2,2,1,1,2,2,1,1,2, 0,2,2,2,0,1,1,1,0,1,1,1,0,2,2,2, 0,0,0,2,1,1,1,2,1,1,1,2,0,0,0,2, 0,1,1,0,0,1,1,0,0,1,1,0,2,2,2,2, 0,0,0,0,0,0,0,0,2,1,1,2,2,1,1,2,
0,1,1,0,0,1,1,0,2,2,2,2,2,2,2,2, 0,0,2,2,0,0,1,1,0,0,1,1,0,0,2,2, 0,0,2,2,1,1,2,2,1,1,2,2,0,0,2,2, 0,0,0,0,0,0,0,0,0,0,0,0,2,1,1,2, 0,0,0,2,0,0,0,1,0,0,0,2,0,0,0,1, 0,2,2,2,1,2,2,2,0,2,2,2,1,2,2,2, 0,1,0,1,2,2,2,2,2,2,2,2,2,2,2,2, 0,1,1,1,2,0,1,1,2,2,0,1,2,2,2,0,
};
const uint8_t g_bc7_table_anchor_index_second_subset[64] = { 15,15,15,15,15,15,15,15, 15,15,15,15,15,15,15,15, 15, 2, 8, 2, 2, 8, 8,15, 2, 8, 2, 2, 8, 8, 2, 2, 15,15, 6, 8, 2, 8,15,15, 2, 8, 2, 2, 2,15,15, 6, 6, 2, 6, 8,15,15, 2, 2, 15,15,15,15,15, 2, 2,15 };
const uint8_t g_bc7_table_anchor_index_third_subset_1[64] =
{
3, 3,15,15, 8, 3,15,15, 8, 8, 6, 6, 6, 5, 3, 3, 3, 3, 8,15, 3, 3, 6,10, 5, 8, 8, 6, 8, 5,15,15, 8,15, 3, 5, 6,10, 8,15, 15, 3,15, 5,15,15,15,15, 3,15, 5, 5, 5, 8, 5,10, 5,10, 8,13,15,12, 3, 3
};
const uint8_t g_bc7_table_anchor_index_third_subset_2[64] =
{
15, 8, 8, 3,15,15, 3, 8, 15,15,15,15,15,15,15, 8, 15, 8,15, 3,15, 8,15, 8, 3,15, 6,10,15,15,10, 8, 15, 3,15,10,10, 8, 9,10, 6,15, 8,15, 3, 6, 6, 8, 15, 3,15,15,15,15,15,15, 15,15,15,15, 3,15,15, 8
};
inline uint32_t read_bits32(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize)
{
assert(codesize <= 32);
uint32_t bits = 0;
uint32_t total_bits = 0;
while (total_bits < codesize)
{
uint32_t byte_bit_offset = bit_offset & 7;
uint32_t bits_to_read = std::min<int>(codesize - total_bits, 8 - byte_bit_offset);
uint32_t byte_bits = pBuf[bit_offset >> 3] >> byte_bit_offset;
byte_bits &= ((1 << bits_to_read) - 1);
bits |= (byte_bits << total_bits);
total_bits += bits_to_read;
bit_offset += bits_to_read;
}
return bits;
}
// BC7 mode 0-7 decompression.
// Instead of one monster routine to unpack all the BC7 modes, we're lumping the 3 subset, 2 subset, 1 subset, and dual plane modes together into simple shared routines.
static inline uint32_t bc7_dequant(uint32_t val, uint32_t pbit, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(pbit < 2); assert(val_bits >= 4 && val_bits <= 8); const uint32_t total_bits = val_bits + 1; val = (val << 1) | pbit; val <<= (8 - total_bits); val |= (val >> total_bits); assert(val <= 255); return val; }
static inline uint32_t bc7_dequant(uint32_t val, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(val_bits >= 4 && val_bits <= 8); val <<= (8 - val_bits); val |= (val >> val_bits); assert(val <= 255); return val; }
static inline uint32_t bc7_interp2(uint32_t l, uint32_t h, uint32_t w) { assert(w < 4); return (l * (64 - g_bc7_weights2[w]) + h * g_bc7_weights2[w] + 32) >> 6; }
static inline uint32_t bc7_interp3(uint32_t l, uint32_t h, uint32_t w) { assert(w < 8); return (l * (64 - g_bc7_weights3[w]) + h * g_bc7_weights3[w] + 32) >> 6; }
static inline uint32_t bc7_interp4(uint32_t l, uint32_t h, uint32_t w) { assert(w < 16); return (l * (64 - g_bc7_weights4[w]) + h * g_bc7_weights4[w] + 32) >> 6; }
static inline uint32_t bc7_interp(uint32_t l, uint32_t h, uint32_t w, uint32_t bits)
{
assert(l <= 255 && h <= 255);
switch (bits)
{
case 2: return bc7_interp2(l, h, w);
case 3: return bc7_interp3(l, h, w);
case 4: return bc7_interp4(l, h, w);
default:
break;
}
return 0;
}
bool unpack_bc7_mode0_2(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels)
{
//const uint32_t SUBSETS = 3;
const uint32_t ENDPOINTS = 6;
const uint32_t COMPS = 3;
const uint32_t WEIGHT_BITS = (mode == 0) ? 3 : 2;
const uint32_t ENDPOINT_BITS = (mode == 0) ? 4 : 5;
const uint32_t PBITS = (mode == 0) ? 6 : 0;
const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS;
uint32_t bit_offset = 0;
const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits);
if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false;
const uint32_t part = read_bits32(pBuf, bit_offset, (mode == 0) ? 4 : 6);
color_rgba endpoints[ENDPOINTS];
for (uint32_t c = 0; c < COMPS; c++)
for (uint32_t e = 0; e < ENDPOINTS; e++)
endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS);
uint32_t pbits[6];
for (uint32_t p = 0; p < PBITS; p++)
pbits[p] = read_bits32(pBuf, bit_offset, 1);
uint32_t weights[16];
for (uint32_t i = 0; i < 16; i++)
weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == g_bc7_table_anchor_index_third_subset_1[part]) || (i == g_bc7_table_anchor_index_third_subset_2[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS);
assert(bit_offset == 128);
for (uint32_t e = 0; e < ENDPOINTS; e++)
for (uint32_t c = 0; c < 4; c++)
endpoints[e][c] = (uint8_t)((c == 3) ? 255 : (PBITS ? bc7_dequant(endpoints[e][c], pbits[e], ENDPOINT_BITS) : bc7_dequant(endpoints[e][c], ENDPOINT_BITS)));
color_rgba block_colors[3][8];
for (uint32_t s = 0; s < 3; s++)
for (uint32_t i = 0; i < WEIGHT_VALS; i++)
{
for (uint32_t c = 0; c < 3; c++)
block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS);
block_colors[s][i][3] = 255;
}
for (uint32_t i = 0; i < 16; i++)
pPixels[i] = block_colors[g_bc7_partition3[part * 16 + i]][weights[i]];
return true;
}
bool unpack_bc7_mode1_3_7(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels)
{
//const uint32_t SUBSETS = 2;
const uint32_t ENDPOINTS = 4;
const uint32_t COMPS = (mode == 7) ? 4 : 3;
const uint32_t WEIGHT_BITS = (mode == 1) ? 3 : 2;
const uint32_t ENDPOINT_BITS = (mode == 7) ? 5 : ((mode == 1) ? 6 : 7);
const uint32_t PBITS = (mode == 1) ? 2 : 4;
const uint32_t SHARED_PBITS = (mode == 1) ? true : false;
const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS;
uint32_t bit_offset = 0;
const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits);
if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false;
const uint32_t part = read_bits32(pBuf, bit_offset, 6);
color_rgba endpoints[ENDPOINTS];
for (uint32_t c = 0; c < COMPS; c++)
for (uint32_t e = 0; e < ENDPOINTS; e++)
endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS);
uint32_t pbits[4];
for (uint32_t p = 0; p < PBITS; p++)
pbits[p] = read_bits32(pBuf, bit_offset, 1);
uint32_t weights[16];
for (uint32_t i = 0; i < 16; i++)
weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == g_bc7_table_anchor_index_second_subset[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS);
assert(bit_offset == 128);
for (uint32_t e = 0; e < ENDPOINTS; e++)
for (uint32_t c = 0; c < 4; c++)
endpoints[e][c] = (uint8_t)((c == ((mode == 7U) ? 4U : 3U)) ? 255 : bc7_dequant(endpoints[e][c], pbits[SHARED_PBITS ? (e >> 1) : e], ENDPOINT_BITS));
color_rgba block_colors[2][8];
for (uint32_t s = 0; s < 2; s++)
for (uint32_t i = 0; i < WEIGHT_VALS; i++)
{
for (uint32_t c = 0; c < COMPS; c++)
block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS);
block_colors[s][i][3] = (COMPS == 3) ? 255 : block_colors[s][i][3];
}
for (uint32_t i = 0; i < 16; i++)
pPixels[i] = block_colors[g_bc7_partition2[part * 16 + i]][weights[i]];
return true;
}
bool unpack_bc7_mode4_5(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels)
{
const uint32_t ENDPOINTS = 2;
const uint32_t COMPS = 4;
const uint32_t WEIGHT_BITS = 2;
const uint32_t A_WEIGHT_BITS = (mode == 4) ? 3 : 2;
const uint32_t ENDPOINT_BITS = (mode == 4) ? 5 : 7;
const uint32_t A_ENDPOINT_BITS = (mode == 4) ? 6 : 8;
//const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS;
//const uint32_t A_WEIGHT_VALS = 1 << A_WEIGHT_BITS;
uint32_t bit_offset = 0;
const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits);
if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false;
const uint32_t comp_rot = read_bits32(pBuf, bit_offset, 2);
const uint32_t index_mode = (mode == 4) ? read_bits32(pBuf, bit_offset, 1) : 0;
color_rgba endpoints[ENDPOINTS];
for (uint32_t c = 0; c < COMPS; c++)
for (uint32_t e = 0; e < ENDPOINTS; e++)
endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS);
const uint32_t weight_bits[2] = { index_mode ? A_WEIGHT_BITS : WEIGHT_BITS, index_mode ? WEIGHT_BITS : A_WEIGHT_BITS };
uint32_t weights[16], a_weights[16];
for (uint32_t i = 0; i < 16; i++)
(index_mode ? a_weights : weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[index_mode] - ((!i) ? 1 : 0));
for (uint32_t i = 0; i < 16; i++)
(index_mode ? weights : a_weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[1 - index_mode] - ((!i) ? 1 : 0));
assert(bit_offset == 128);
for (uint32_t e = 0; e < ENDPOINTS; e++)
for (uint32_t c = 0; c < 4; c++)
endpoints[e][c] = (uint8_t)bc7_dequant(endpoints[e][c], (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS);
color_rgba block_colors[8];
for (uint32_t i = 0; i < (1U << weight_bits[0]); i++)
for (uint32_t c = 0; c < 3; c++)
block_colors[i][c] = (uint8_t)bc7_interp(endpoints[0][c], endpoints[1][c], i, weight_bits[0]);
for (uint32_t i = 0; i < (1U << weight_bits[1]); i++)
block_colors[i][3] = (uint8_t)bc7_interp(endpoints[0][3], endpoints[1][3], i, weight_bits[1]);
for (uint32_t i = 0; i < 16; i++)
{
pPixels[i] = block_colors[weights[i]];
pPixels[i].a = block_colors[a_weights[i]].a;
if (comp_rot >= 1)
std::swap(pPixels[i].a, pPixels[i].m_comps[comp_rot - 1]);
}
return true;
}
struct bc7_mode_6
{
struct
{
uint64_t m_mode : 7;
uint64_t m_r0 : 7;
uint64_t m_r1 : 7;
uint64_t m_g0 : 7;
uint64_t m_g1 : 7;
uint64_t m_b0 : 7;
uint64_t m_b1 : 7;
uint64_t m_a0 : 7;
uint64_t m_a1 : 7;
uint64_t m_p0 : 1;
} m_lo;
union
{
struct
{
uint64_t m_p1 : 1;
uint64_t m_s00 : 3;
uint64_t m_s10 : 4;
uint64_t m_s20 : 4;
uint64_t m_s30 : 4;
uint64_t m_s01 : 4;
uint64_t m_s11 : 4;
uint64_t m_s21 : 4;
uint64_t m_s31 : 4;
uint64_t m_s02 : 4;
uint64_t m_s12 : 4;
uint64_t m_s22 : 4;
uint64_t m_s32 : 4;
uint64_t m_s03 : 4;
uint64_t m_s13 : 4;
uint64_t m_s23 : 4;
uint64_t m_s33 : 4;
} m_hi;
uint64_t m_hi_bits;
};
};
bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels)
{
static_assert(sizeof(bc7_mode_6) == 16, "sizeof(bc7_mode_6) == 16");
const bc7_mode_6 &block = *static_cast<const bc7_mode_6 *>(pBlock_bits);
if (block.m_lo.m_mode != (1 << 6))
return false;
const uint32_t r0 = (uint32_t)((block.m_lo.m_r0 << 1) | block.m_lo.m_p0);
const uint32_t g0 = (uint32_t)((block.m_lo.m_g0 << 1) | block.m_lo.m_p0);
const uint32_t b0 = (uint32_t)((block.m_lo.m_b0 << 1) | block.m_lo.m_p0);
const uint32_t a0 = (uint32_t)((block.m_lo.m_a0 << 1) | block.m_lo.m_p0);
const uint32_t r1 = (uint32_t)((block.m_lo.m_r1 << 1) | block.m_hi.m_p1);
const uint32_t g1 = (uint32_t)((block.m_lo.m_g1 << 1) | block.m_hi.m_p1);
const uint32_t b1 = (uint32_t)((block.m_lo.m_b1 << 1) | block.m_hi.m_p1);
const uint32_t a1 = (uint32_t)((block.m_lo.m_a1 << 1) | block.m_hi.m_p1);
color_rgba vals[16];
for (uint32_t i = 0; i < 16; i++)
{
const uint32_t w = g_bc7_weights4[i];
const uint32_t iw = 64 - w;
vals[i].set_noclamp_rgba(
(r0 * iw + r1 * w + 32) >> 6,
(g0 * iw + g1 * w + 32) >> 6,
(b0 * iw + b1 * w + 32) >> 6,
(a0 * iw + a1 * w + 32) >> 6);
}
pPixels[0] = vals[block.m_hi.m_s00];
pPixels[1] = vals[block.m_hi.m_s10];
pPixels[2] = vals[block.m_hi.m_s20];
pPixels[3] = vals[block.m_hi.m_s30];
pPixels[4] = vals[block.m_hi.m_s01];
pPixels[5] = vals[block.m_hi.m_s11];
pPixels[6] = vals[block.m_hi.m_s21];
pPixels[7] = vals[block.m_hi.m_s31];
pPixels[8] = vals[block.m_hi.m_s02];
pPixels[9] = vals[block.m_hi.m_s12];
pPixels[10] = vals[block.m_hi.m_s22];
pPixels[11] = vals[block.m_hi.m_s32];
pPixels[12] = vals[block.m_hi.m_s03];
pPixels[13] = vals[block.m_hi.m_s13];
pPixels[14] = vals[block.m_hi.m_s23];
pPixels[15] = vals[block.m_hi.m_s33];
return true;
}
bool unpack_bc7(const void *pBlock, color_rgba *pPixels)
{
const uint32_t first_byte = static_cast<const uint8_t*>(pBlock)[0];
for (uint32_t mode = 0; mode <= 7; mode++)
{
if (first_byte & (1U << mode))
{
switch (mode)
{
case 0:
case 2:
return unpack_bc7_mode0_2(mode, pBlock, pPixels);
case 1:
case 3:
case 7:
return unpack_bc7_mode1_3_7(mode, pBlock, pPixels);
case 4:
case 5:
return unpack_bc7_mode4_5(mode, pBlock, pPixels);
case 6:
return unpack_bc7_mode6(pBlock, pPixels);
default:
break;
}
}
}
return false;
}
} // namespace bc7decomp
/*
------------------------------------------------------------------------------
This software is available under 2 licenses -- choose whichever you prefer.
------------------------------------------------------------------------------
ALTERNATIVE A - MIT License
Copyright(c) 2020 Richard Geldreich, Jr.
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files(the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and / or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions :
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
------------------------------------------------------------------------------
ALTERNATIVE B - Public Domain(www.unlicense.org)
This is free and unencumbered software released into the public domain.
Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
software, either in source code form or as a compiled binary, for any purpose,
commercial or non - commercial, and by any means.
In jurisdictions that recognize copyright laws, the author or authors of this
software dedicate any and all copyright interest in the software to the public
domain.We make this dedication for the benefit of the public at large and to
the detriment of our heirs and successors.We intend this dedication to be an
overt act of relinquishment in perpetuity of all present and future rights to
this software under copyright law.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
------------------------------------------------------------------------------
*/

@ -0,0 +1,165 @@
#pragma once
#include <stdlib.h>
#include <stdint.h>
#include <algorithm>
#include <math.h>
#include <assert.h>
namespace bc7decomp
{
enum eNoClamp { cNoClamp };
template <typename S> inline S clamp(S value, S low, S high) { return (value < low) ? low : ((value > high) ? high : value); }
class color_rgba
{
public:
union
{
uint8_t m_comps[4];
struct
{
uint8_t r;
uint8_t g;
uint8_t b;
uint8_t a;
};
};
inline color_rgba()
{
static_assert(sizeof(*this) == 4, "sizeof(*this) != 4");
}
inline color_rgba(int y)
{
set(y);
}
inline color_rgba(int y, int na)
{
set(y, na);
}
inline color_rgba(int sr, int sg, int sb, int sa)
{
set(sr, sg, sb, sa);
}
inline color_rgba(eNoClamp, int sr, int sg, int sb, int sa)
{
set_noclamp_rgba((uint8_t)sr, (uint8_t)sg, (uint8_t)sb, (uint8_t)sa);
}
inline color_rgba& set_noclamp_y(int y)
{
m_comps[0] = (uint8_t)y;
m_comps[1] = (uint8_t)y;
m_comps[2] = (uint8_t)y;
m_comps[3] = (uint8_t)255;
return *this;
}
inline color_rgba &set_noclamp_rgba(int sr, int sg, int sb, int sa)
{
m_comps[0] = (uint8_t)sr;
m_comps[1] = (uint8_t)sg;
m_comps[2] = (uint8_t)sb;
m_comps[3] = (uint8_t)sa;
return *this;
}
inline color_rgba &set(int y)
{
m_comps[0] = static_cast<uint8_t>(clamp<int>(y, 0, 255));
m_comps[1] = m_comps[0];
m_comps[2] = m_comps[0];
m_comps[3] = 255;
return *this;
}
inline color_rgba &set(int y, int na)
{
m_comps[0] = static_cast<uint8_t>(clamp<int>(y, 0, 255));
m_comps[1] = m_comps[0];
m_comps[2] = m_comps[0];
m_comps[3] = static_cast<uint8_t>(clamp<int>(na, 0, 255));
return *this;
}
inline color_rgba &set(int sr, int sg, int sb, int sa)
{
m_comps[0] = static_cast<uint8_t>(clamp<int>(sr, 0, 255));
m_comps[1] = static_cast<uint8_t>(clamp<int>(sg, 0, 255));
m_comps[2] = static_cast<uint8_t>(clamp<int>(sb, 0, 255));
m_comps[3] = static_cast<uint8_t>(clamp<int>(sa, 0, 255));
return *this;
}
inline color_rgba &set_rgb(int sr, int sg, int sb)
{
m_comps[0] = static_cast<uint8_t>(clamp<int>(sr, 0, 255));
m_comps[1] = static_cast<uint8_t>(clamp<int>(sg, 0, 255));
m_comps[2] = static_cast<uint8_t>(clamp<int>(sb, 0, 255));
return *this;
}
inline color_rgba &set_rgb(const color_rgba &other)
{
r = other.r;
g = other.g;
b = other.b;
return *this;
}
inline const uint8_t &operator[] (uint32_t index) const { assert(index < 4); return m_comps[index]; }
inline uint8_t &operator[] (uint32_t index) { assert(index < 4); return m_comps[index]; }
inline void clear()
{
m_comps[0] = 0;
m_comps[1] = 0;
m_comps[2] = 0;
m_comps[3] = 0;
}
inline bool operator== (const color_rgba &rhs) const
{
if (m_comps[0] != rhs.m_comps[0]) return false;
if (m_comps[1] != rhs.m_comps[1]) return false;
if (m_comps[2] != rhs.m_comps[2]) return false;
if (m_comps[3] != rhs.m_comps[3]) return false;
return true;
}
inline bool operator!= (const color_rgba &rhs) const
{
return !(*this == rhs);
}
inline bool operator<(const color_rgba &rhs) const
{
for (int i = 0; i < 4; i++)
{
if (m_comps[i] < rhs.m_comps[i])
return true;
else if (m_comps[i] != rhs.m_comps[i])
return false;
}
return false;
}
inline int get_601_luma() const { return (19595U * m_comps[0] + 38470U * m_comps[1] + 7471U * m_comps[2] + 32768U) >> 16U; }
inline int get_709_luma() const { return (13938U * m_comps[0] + 46869U * m_comps[1] + 4729U * m_comps[2] + 32768U) >> 16U; }
inline int get_luma(bool luma_601) const { return luma_601 ? get_601_luma() : get_709_luma(); }
static color_rgba comp_min(const color_rgba& a, const color_rgba& b) { return color_rgba(std::min(a[0], b[0]), std::min(a[1], b[1]), std::min(a[2], b[2]), std::min(a[3], b[3])); }
static color_rgba comp_max(const color_rgba& a, const color_rgba& b) { return color_rgba(std::max(a[0], b[0]), std::max(a[1], b[1]), std::max(a[2], b[2]), std::max(a[3], b[3])); }
};
bool unpack_bc7(const void *pBlock, color_rgba *pPixels);
} // namespace bc7decomp

File diff suppressed because it is too large Load Diff

@ -0,0 +1,82 @@
// File: bc7enc.h - Richard Geldreich, Jr. - MIT license or public domain (see end of bc7enc.c)
#include <stdlib.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
#define BC7ENC_BLOCK_SIZE (16)
#define BC7ENC_MAX_PARTITIONS1 (64)
#define BC7ENC_MAX_UBER_LEVEL (4)
typedef uint8_t bc7enc_bool;
#define BC7ENC_TRUE (1)
#define BC7ENC_FALSE (0)
typedef struct
{
// m_max_partitions_mode may range from 0 (disables mode 1) to BC7ENC_MAX_PARTITIONS1. The higher this value, the slower the compressor, but the higher the quality.
uint32_t m_max_partitions_mode;
// Relative RGBA or YCbCrA weights.
uint32_t m_weights[4];
// m_uber_level may range from 0 to BC7ENC_MAX_UBER_LEVEL. The higher this value, the slower the compressor, but the higher the quality.
uint32_t m_uber_level;
// If m_perceptual is true, colorspace error is computed in YCbCr space, otherwise RGB.
bc7enc_bool m_perceptual;
// Set m_try_least_squares to false for slightly faster/lower quality compression.
bc7enc_bool m_try_least_squares;
// When m_mode_partition_estimation_filterbank, the mode1 partition estimator skips lesser used partition patterns unless they are strongly predicted to be potentially useful.
// There's a slight loss in quality with this enabled (around .08 dB RGB PSNR or .05 dB Y PSNR), but up to a 11% gain in speed depending on the other settings.
bc7enc_bool m_mode_partition_estimation_filterbank;
bc7enc_bool m_use_mode5_for_alpha;
bc7enc_bool m_use_mode7_for_alpha;
} bc7enc_compress_block_params;
inline void bc7enc_compress_block_params_init_linear_weights(bc7enc_compress_block_params *p)
{
p->m_perceptual = BC7ENC_FALSE;
p->m_weights[0] = 1;
p->m_weights[1] = 1;
p->m_weights[2] = 1;
p->m_weights[3] = 1;
}
inline void bc7enc_compress_block_params_init_perceptual_weights(bc7enc_compress_block_params *p)
{
p->m_perceptual = BC7ENC_TRUE;
p->m_weights[0] = 128;
p->m_weights[1] = 64;
p->m_weights[2] = 16;
p->m_weights[3] = 32;
}
inline void bc7enc_compress_block_params_init(bc7enc_compress_block_params *p)
{
p->m_max_partitions_mode = BC7ENC_MAX_PARTITIONS1;
p->m_try_least_squares = BC7ENC_TRUE;
p->m_mode_partition_estimation_filterbank = BC7ENC_TRUE;
p->m_uber_level = 0;
p->m_use_mode5_for_alpha = BC7ENC_TRUE;
p->m_use_mode7_for_alpha = BC7ENC_TRUE;
bc7enc_compress_block_params_init_perceptual_weights(p);
}
// bc7enc_compress_block_init() MUST be called before calling bc7enc_compress_block() (or you'll get artifacts).
void bc7enc_compress_block_init();
// Packs a single block of 16x16 RGBA pixels (R first in memory) to 128-bit BC7 block pBlock, using either mode 1 and/or 6.
// Alpha blocks will always use mode 6, and by default opaque blocks will use either modes 1 or 6.
// Returns BC7ENC_TRUE if the block had any pixels with alpha < 255, otherwise it return BC7ENC_FALSE. (This is not an error code - a block is always encoded.)
bc7enc_bool bc7enc_compress_block(void *pBlock, const void *pPixelsRGBA, const bc7enc_compress_block_params *pComp_params);
#ifdef __cplusplus
}
#endif

@ -0,0 +1,286 @@
// File: dds_defs.h
// DX9/10 .DDS file header definitions.
#pragma once
#define PIXEL_FMT_FOURCC(a, b, c, d) ((a) | ((b) << 8U) | ((c) << 16U) | ((d) << 24U))
enum pixel_format
{
PIXEL_FMT_INVALID = 0,
PIXEL_FMT_DXT1 = PIXEL_FMT_FOURCC('D', 'X', 'T', '1'),
PIXEL_FMT_DXT2 = PIXEL_FMT_FOURCC('D', 'X', 'T', '2'),
PIXEL_FMT_DXT3 = PIXEL_FMT_FOURCC('D', 'X', 'T', '3'),
PIXEL_FMT_DXT4 = PIXEL_FMT_FOURCC('D', 'X', 'T', '4'),
PIXEL_FMT_DXT5 = PIXEL_FMT_FOURCC('D', 'X', 'T', '5'),
PIXEL_FMT_3DC = PIXEL_FMT_FOURCC('A', 'T', 'I', '2'), // DXN_YX
PIXEL_FMT_DXN = PIXEL_FMT_FOURCC('A', '2', 'X', 'Y'), // DXN_XY
PIXEL_FMT_DXT5A = PIXEL_FMT_FOURCC('A', 'T', 'I', '1'), // ATI1N, http://developer.amd.com/media/gpu_assets/Radeon_X1x00_Programming_Guide.pdf
// Non-standard formats (some of these are supported by ATI's Compressonator)
PIXEL_FMT_DXT5_CCxY = PIXEL_FMT_FOURCC('C', 'C', 'x', 'Y'),
PIXEL_FMT_DXT5_xGxR = PIXEL_FMT_FOURCC('x', 'G', 'x', 'R'),
PIXEL_FMT_DXT5_xGBR = PIXEL_FMT_FOURCC('x', 'G', 'B', 'R'),
PIXEL_FMT_DXT5_AGBR = PIXEL_FMT_FOURCC('A', 'G', 'B', 'R'),
PIXEL_FMT_DXT1A = PIXEL_FMT_FOURCC('D', 'X', '1', 'A'),
PIXEL_FMT_ETC1 = PIXEL_FMT_FOURCC('E', 'T', 'C', '1'),
PIXEL_FMT_R8G8B8 = PIXEL_FMT_FOURCC('R', 'G', 'B', 'x'),
PIXEL_FMT_L8 = PIXEL_FMT_FOURCC('L', 'x', 'x', 'x'),
PIXEL_FMT_A8 = PIXEL_FMT_FOURCC('x', 'x', 'x', 'A'),
PIXEL_FMT_A8L8 = PIXEL_FMT_FOURCC('L', 'x', 'x', 'A'),
PIXEL_FMT_A8R8G8B8 = PIXEL_FMT_FOURCC('R', 'G', 'B', 'A')
};
const uint32_t cDDSMaxImageDimensions = 8192U;
// Total size of header is sizeof(uint32)+cDDSSizeofDDSurfaceDesc2;
const uint32_t cDDSSizeofDDSurfaceDesc2 = 124;
// "DDS "
const uint32_t cDDSFileSignature = 0x20534444;
struct DDCOLORKEY
{
uint32_t dwUnused0;
uint32_t dwUnused1;
};
struct DDPIXELFORMAT
{
uint32_t dwSize;
uint32_t dwFlags;
uint32_t dwFourCC;
uint32_t dwRGBBitCount; // ATI compressonator will place a FOURCC code here for swizzled/cooked DXTn formats
uint32_t dwRBitMask;
uint32_t dwGBitMask;
uint32_t dwBBitMask;
uint32_t dwRGBAlphaBitMask;
};
struct DDSCAPS2
{
uint32_t dwCaps;
uint32_t dwCaps2;
uint32_t dwCaps3;
uint32_t dwCaps4;
};
struct DDSURFACEDESC2
{
uint32_t dwSize;
uint32_t dwFlags;
uint32_t dwHeight;
uint32_t dwWidth;
union
{
int32_t lPitch;
uint32_t dwLinearSize;
};
uint32_t dwBackBufferCount;
uint32_t dwMipMapCount;
uint32_t dwAlphaBitDepth;
uint32_t dwUnused0;
uint32_t lpSurface;
DDCOLORKEY unused0;
DDCOLORKEY unused1;
DDCOLORKEY unused2;
DDCOLORKEY unused3;
DDPIXELFORMAT ddpfPixelFormat;
DDSCAPS2 ddsCaps;
uint32_t dwUnused1;
};
const uint32_t DDSD_CAPS = 0x00000001;
const uint32_t DDSD_HEIGHT = 0x00000002;
const uint32_t DDSD_WIDTH = 0x00000004;
const uint32_t DDSD_PITCH = 0x00000008;
const uint32_t DDSD_BACKBUFFERCOUNT = 0x00000020;
const uint32_t DDSD_ZBUFFERBITDEPTH = 0x00000040;
const uint32_t DDSD_ALPHABITDEPTH = 0x00000080;
const uint32_t DDSD_LPSURFACE = 0x00000800;
const uint32_t DDSD_PIXELFORMAT = 0x00001000;
const uint32_t DDSD_CKDESTOVERLAY = 0x00002000;
const uint32_t DDSD_CKDESTBLT = 0x00004000;
const uint32_t DDSD_CKSRCOVERLAY = 0x00008000;
const uint32_t DDSD_CKSRCBLT = 0x00010000;
const uint32_t DDSD_MIPMAPCOUNT = 0x00020000;
const uint32_t DDSD_REFRESHRATE = 0x00040000;
const uint32_t DDSD_LINEARSIZE = 0x00080000;
const uint32_t DDSD_TEXTURESTAGE = 0x00100000;
const uint32_t DDSD_FVF = 0x00200000;
const uint32_t DDSD_SRCVBHANDLE = 0x00400000;
const uint32_t DDSD_DEPTH = 0x00800000;
const uint32_t DDSD_ALL = 0x00fff9ee;
const uint32_t DDPF_ALPHAPIXELS = 0x00000001;
const uint32_t DDPF_ALPHA = 0x00000002;
const uint32_t DDPF_FOURCC = 0x00000004;
const uint32_t DDPF_PALETTEINDEXED8 = 0x00000020;
const uint32_t DDPF_RGB = 0x00000040;
const uint32_t DDPF_LUMINANCE = 0x00020000;
const uint32_t DDSCAPS_COMPLEX = 0x00000008;
const uint32_t DDSCAPS_TEXTURE = 0x00001000;
const uint32_t DDSCAPS_MIPMAP = 0x00400000;
const uint32_t DDSCAPS2_CUBEMAP = 0x00000200;
const uint32_t DDSCAPS2_CUBEMAP_POSITIVEX = 0x00000400;
const uint32_t DDSCAPS2_CUBEMAP_NEGATIVEX = 0x00000800;
const uint32_t DDSCAPS2_CUBEMAP_POSITIVEY = 0x00001000;
const uint32_t DDSCAPS2_CUBEMAP_NEGATIVEY = 0x00002000;
const uint32_t DDSCAPS2_CUBEMAP_POSITIVEZ = 0x00004000;
const uint32_t DDSCAPS2_CUBEMAP_NEGATIVEZ = 0x00008000;
const uint32_t DDSCAPS2_VOLUME = 0x00200000;
typedef enum DXGI_FORMAT
{
DXGI_FORMAT_UNKNOWN = 0,
DXGI_FORMAT_R32G32B32A32_TYPELESS = 1,
DXGI_FORMAT_R32G32B32A32_FLOAT = 2,
DXGI_FORMAT_R32G32B32A32_UINT = 3,
DXGI_FORMAT_R32G32B32A32_SINT = 4,
DXGI_FORMAT_R32G32B32_TYPELESS = 5,
DXGI_FORMAT_R32G32B32_FLOAT = 6,
DXGI_FORMAT_R32G32B32_UINT = 7,
DXGI_FORMAT_R32G32B32_SINT = 8,
DXGI_FORMAT_R16G16B16A16_TYPELESS = 9,
DXGI_FORMAT_R16G16B16A16_FLOAT = 10,
DXGI_FORMAT_R16G16B16A16_UNORM = 11,
DXGI_FORMAT_R16G16B16A16_UINT = 12,
DXGI_FORMAT_R16G16B16A16_SNORM = 13,
DXGI_FORMAT_R16G16B16A16_SINT = 14,
DXGI_FORMAT_R32G32_TYPELESS = 15,
DXGI_FORMAT_R32G32_FLOAT = 16,
DXGI_FORMAT_R32G32_UINT = 17,
DXGI_FORMAT_R32G32_SINT = 18,
DXGI_FORMAT_R32G8X24_TYPELESS = 19,
DXGI_FORMAT_D32_FLOAT_S8X24_UINT = 20,
DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS = 21,
DXGI_FORMAT_X32_TYPELESS_G8X24_UINT = 22,
DXGI_FORMAT_R10G10B10A2_TYPELESS = 23,
DXGI_FORMAT_R10G10B10A2_UNORM = 24,
DXGI_FORMAT_R10G10B10A2_UINT = 25,
DXGI_FORMAT_R11G11B10_FLOAT = 26,
DXGI_FORMAT_R8G8B8A8_TYPELESS = 27,
DXGI_FORMAT_R8G8B8A8_UNORM = 28,
DXGI_FORMAT_R8G8B8A8_UNORM_SRGB = 29,
DXGI_FORMAT_R8G8B8A8_UINT = 30,
DXGI_FORMAT_R8G8B8A8_SNORM = 31,
DXGI_FORMAT_R8G8B8A8_SINT = 32,
DXGI_FORMAT_R16G16_TYPELESS = 33,
DXGI_FORMAT_R16G16_FLOAT = 34,
DXGI_FORMAT_R16G16_UNORM = 35,
DXGI_FORMAT_R16G16_UINT = 36,
DXGI_FORMAT_R16G16_SNORM = 37,
DXGI_FORMAT_R16G16_SINT = 38,
DXGI_FORMAT_R32_TYPELESS = 39,
DXGI_FORMAT_D32_FLOAT = 40,
DXGI_FORMAT_R32_FLOAT = 41,
DXGI_FORMAT_R32_UINT = 42,
DXGI_FORMAT_R32_SINT = 43,
DXGI_FORMAT_R24G8_TYPELESS = 44,
DXGI_FORMAT_D24_UNORM_S8_UINT = 45,
DXGI_FORMAT_R24_UNORM_X8_TYPELESS = 46,
DXGI_FORMAT_X24_TYPELESS_G8_UINT = 47,
DXGI_FORMAT_R8G8_TYPELESS = 48,
DXGI_FORMAT_R8G8_UNORM = 49,
DXGI_FORMAT_R8G8_UINT = 50,
DXGI_FORMAT_R8G8_SNORM = 51,
DXGI_FORMAT_R8G8_SINT = 52,
DXGI_FORMAT_R16_TYPELESS = 53,
DXGI_FORMAT_R16_FLOAT = 54,
DXGI_FORMAT_D16_UNORM = 55,
DXGI_FORMAT_R16_UNORM = 56,
DXGI_FORMAT_R16_UINT = 57,
DXGI_FORMAT_R16_SNORM = 58,
DXGI_FORMAT_R16_SINT = 59,
DXGI_FORMAT_R8_TYPELESS = 60,
DXGI_FORMAT_R8_UNORM = 61,
DXGI_FORMAT_R8_UINT = 62,
DXGI_FORMAT_R8_SNORM = 63,
DXGI_FORMAT_R8_SINT = 64,
DXGI_FORMAT_A8_UNORM = 65,
DXGI_FORMAT_R1_UNORM = 66,
DXGI_FORMAT_R9G9B9E5_SHAREDEXP = 67,
DXGI_FORMAT_R8G8_B8G8_UNORM = 68,
DXGI_FORMAT_G8R8_G8B8_UNORM = 69,
DXGI_FORMAT_BC1_TYPELESS = 70,
DXGI_FORMAT_BC1_UNORM = 71,
DXGI_FORMAT_BC1_UNORM_SRGB = 72,
DXGI_FORMAT_BC2_TYPELESS = 73,
DXGI_FORMAT_BC2_UNORM = 74,
DXGI_FORMAT_BC2_UNORM_SRGB = 75,
DXGI_FORMAT_BC3_TYPELESS = 76,
DXGI_FORMAT_BC3_UNORM = 77,
DXGI_FORMAT_BC3_UNORM_SRGB = 78,
DXGI_FORMAT_BC4_TYPELESS = 79,
DXGI_FORMAT_BC4_UNORM = 80,
DXGI_FORMAT_BC4_SNORM = 81,
DXGI_FORMAT_BC5_TYPELESS = 82,
DXGI_FORMAT_BC5_UNORM = 83,
DXGI_FORMAT_BC5_SNORM = 84,
DXGI_FORMAT_B5G6R5_UNORM = 85,
DXGI_FORMAT_B5G5R5A1_UNORM = 86,
DXGI_FORMAT_B8G8R8A8_UNORM = 87,
DXGI_FORMAT_B8G8R8X8_UNORM = 88,
DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM = 89,
DXGI_FORMAT_B8G8R8A8_TYPELESS = 90,
DXGI_FORMAT_B8G8R8A8_UNORM_SRGB = 91,
DXGI_FORMAT_B8G8R8X8_TYPELESS = 92,
DXGI_FORMAT_B8G8R8X8_UNORM_SRGB = 93,
DXGI_FORMAT_BC6H_TYPELESS = 94,
DXGI_FORMAT_BC6H_UF16 = 95,
DXGI_FORMAT_BC6H_SF16 = 96,
DXGI_FORMAT_BC7_TYPELESS = 97,
DXGI_FORMAT_BC7_UNORM = 98,
DXGI_FORMAT_BC7_UNORM_SRGB = 99,
DXGI_FORMAT_AYUV = 100,
DXGI_FORMAT_Y410 = 101,
DXGI_FORMAT_Y416 = 102,
DXGI_FORMAT_NV12 = 103,
DXGI_FORMAT_P010 = 104,
DXGI_FORMAT_P016 = 105,
DXGI_FORMAT_420_OPAQUE = 106,
DXGI_FORMAT_YUY2 = 107,
DXGI_FORMAT_Y210 = 108,
DXGI_FORMAT_Y216 = 109,
DXGI_FORMAT_NV11 = 110,
DXGI_FORMAT_AI44 = 111,
DXGI_FORMAT_IA44 = 112,
DXGI_FORMAT_P8 = 113,
DXGI_FORMAT_A8P8 = 114,
DXGI_FORMAT_B4G4R4A4_UNORM = 115,
DXGI_FORMAT_P208 = 130,
DXGI_FORMAT_V208 = 131,
DXGI_FORMAT_V408 = 132,
DXGI_FORMAT_FORCE_UINT = 0xffffffff
} DXGI_FORMAT;
enum D3D10_RESOURCE_DIMENSION
{
D3D10_RESOURCE_DIMENSION_UNKNOWN = 0,
D3D10_RESOURCE_DIMENSION_BUFFER = 1,
D3D10_RESOURCE_DIMENSION_TEXTURE1D = 2,
D3D10_RESOURCE_DIMENSION_TEXTURE2D = 3,
D3D10_RESOURCE_DIMENSION_TEXTURE3D = 4
};
struct DDS_HEADER_DXT10
{
DXGI_FORMAT dxgiFormat;
D3D10_RESOURCE_DIMENSION resourceDimension;
uint32_t miscFlag;
uint32_t arraySize;
uint32_t miscFlags2;
};

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,954 @@
// test.cpp - Command line example/test app
#ifdef _MSC_VER
#define _CRT_SECURE_NO_WARNINGS
#endif
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <algorithm>
#include <assert.h>
#include <time.h>
#include "bc7enc.h"
#include "lodepng.h"
#include "dds_defs.h"
#include "bc7decomp.h"
#include "../rgbcx.h"
const int MAX_UBER_LEVEL = 5;
inline int iabs(int i) { if (i < 0) i = -i; return i; }
inline uint8_t clamp255(int32_t i) { return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); }
template <typename S> inline S clamp(S value, S low, S high) { return (value < low) ? low : ((value > high) ? high : value); }
static int print_usage()
{
fprintf(stderr, "bc7enc\n");
fprintf(stderr, "Reads PNG files (with or without alpha channels) and packs them to BC1-5 or BC7/BPTC (default) using\nmodes 1, 6 (opaque blocks) or modes 1, 5, 6, and 7 (alpha blocks).\n");
fprintf(stderr, "By default, a DX10 DDS file and a unpacked PNG file will be written to the current\ndirectory with the .dds/_unpacked.png/_unpacked_alpha.png suffixes.\n\n");
fprintf(stderr, "Usage: bc7enc [-apng_filename] [options] input_filename.png [compressed_output.dds] [unpacked_output.png]\n\n");
fprintf(stderr, "-apng_filename Load G channel of PNG file into alpha channel of source image\n");
fprintf(stderr, "-g Don't write unpacked output PNG files (this disables PSNR metrics too).\n");
fprintf(stderr, "-y Flip source image along Y axis before packing\n");
fprintf(stderr, "-o Write output files to the current directory\n");
fprintf(stderr, "-1 Encode to BC1. -u[0,5] controls quality vs. perf. tradeoff for RGB.\n");
fprintf(stderr, "-3 Encode to BC3. -u[0,5] controls quality vs. perf. tradeoff for RGB.\n");
fprintf(stderr, "-4 Encode to BC4\n");
fprintf(stderr, "-5 Encode to BC5\n");
fprintf(stderr, "\n");
fprintf(stderr, "-X# BC4/5: Set first color channel (defaults to 0 or red)\n");
fprintf(stderr, "-Y# BC4/5: Set second color channel (defaults to 1 or green)\n");
fprintf(stderr, "\n");
fprintf(stderr, "-l BC7: Use linear colorspace metrics instead of perceptual\n");
fprintf(stderr, "-uX BC1/3/7: Higher quality levels, X ranges from [0,4] for BC7, or [0,5] for BC1-3\n");
fprintf(stderr, "-pX BC7: Scan X partitions in mode 1, X ranges from [0,64], use 0 to disable mode 1 entirely (faster)\n");
fprintf(stderr, "\n");
fprintf(stderr, "-b BC1: Enable 3-color mode for blocks containing black or very dark pixels. (Important: engine/shader MUST ignore decoded texture alpha if this flag is enabled!)\n");
fprintf(stderr, "-c BC1: Disable 3-color mode for solid color blocks\n");
fprintf(stderr, "-n BC1: Encode/decode for NVidia GPU's\n");
fprintf(stderr, "-m BC1: Encode/decode for AMD GPU's\n");
fprintf(stderr, "-r BC1: Encode/decode using ideal BC1 formulas with rounding for 4-color block colors 2,3 (same as AMD Compressonator)\n");
fprintf(stderr, "-LX BC1: Set encoding level, where 0=fastest and 19=slowest but highest quality\n");
fprintf(stderr, "-f Force writing DX10-style DDS files (otherwise for BC1-5 it uses DX9-style DDS files)\n");
fprintf(stderr, "\nBy default, this tool encodes to BC1 without rounding 4-color block colors 2,3, which may not match the output of some software decoders.\n");
fprintf(stderr, "\nFor BC4 and BC5: Not all tools support reading DX9-style BC4/BC5 format files (or BC4/5 files at all). AMD Compressonator does.\n");
return EXIT_FAILURE;
}
struct color_quad_u8
{
uint8_t m_c[4];
inline color_quad_u8(uint8_t r, uint8_t g, uint8_t b, uint8_t a)
{
set(r, g, b, a);
}
inline color_quad_u8(uint8_t y = 0, uint8_t a = 255)
{
set(y, a);
}
inline color_quad_u8 &set(uint8_t y, uint8_t a = 255)
{
m_c[0] = y;
m_c[1] = y;
m_c[2] = y;
m_c[3] = a;
return *this;
}
inline color_quad_u8 &set(uint8_t r, uint8_t g, uint8_t b, uint8_t a)
{
m_c[0] = r;
m_c[1] = g;
m_c[2] = b;
m_c[3] = a;
return *this;
}
inline uint8_t &operator[] (uint32_t i) { assert(i < 4); return m_c[i]; }
inline uint8_t operator[] (uint32_t i) const { assert(i < 4); return m_c[i]; }
inline int get_luma() const { return (13938U * m_c[0] + 46869U * m_c[1] + 4729U * m_c[2] + 32768U) >> 16U; } // REC709 weightings
};
typedef std::vector<color_quad_u8> color_quad_u8_vec;
class image_u8
{
public:
image_u8() :
m_width(0), m_height(0)
{
}
image_u8(uint32_t width, uint32_t height) :
m_width(width), m_height(height)
{
m_pixels.resize(width * height);
}
inline const color_quad_u8_vec &get_pixels() const { return m_pixels; }
inline color_quad_u8_vec &get_pixels() { return m_pixels; }
inline uint32_t width() const { return m_width; }
inline uint32_t height() const { return m_height; }
inline uint32_t total_pixels() const { return m_width * m_height; }
inline color_quad_u8 &operator()(uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_pixels[x + m_width * y]; }
inline const color_quad_u8 &operator()(uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_pixels[x + m_width * y]; }
image_u8& clear()
{
m_width = m_height = 0;
m_pixels.clear();
return *this;
}
image_u8& init(uint32_t width, uint32_t height)
{
clear();
m_width = width;
m_height = height;
m_pixels.resize(width * height);
return *this;
}
image_u8& set_all(const color_quad_u8 &p)
{
for (uint32_t i = 0; i < m_pixels.size(); i++)
m_pixels[i] = p;
return *this;
}
image_u8& crop(uint32_t new_width, uint32_t new_height)
{
if ((m_width == new_width) && (m_height == new_height))
return *this;
image_u8 new_image(new_width, new_height);
const uint32_t w = std::min(m_width, new_width);
const uint32_t h = std::min(m_height, new_height);
for (uint32_t y = 0; y < h; y++)
for (uint32_t x = 0; x < w; x++)
new_image(x, y) = (*this)(x, y);
return swap(new_image);
}
image_u8 &swap(image_u8 &other)
{
std::swap(m_width, other.m_width);
std::swap(m_height, other.m_height);
std::swap(m_pixels, other.m_pixels);
return *this;
}
inline void get_block(uint32_t bx, uint32_t by, uint32_t width, uint32_t height, color_quad_u8 *pPixels)
{
assert((bx * width + width) <= m_width);
assert((by * height + height) <= m_height);
for (uint32_t y = 0; y < height; y++)
memcpy(pPixels + y * width, &(*this)(bx * width, by * height + y), width * sizeof(color_quad_u8));
}
inline void set_block(uint32_t bx, uint32_t by, uint32_t width, uint32_t height, const color_quad_u8 *pPixels)
{
assert((bx * width + width) <= m_width);
assert((by * height + height) <= m_height);
for (uint32_t y = 0; y < height; y++)
memcpy(&(*this)(bx * width, by * height + y), pPixels + y * width, width * sizeof(color_quad_u8));
}
image_u8 &swizzle(uint32_t r, uint32_t g, uint32_t b, uint32_t a)
{
assert((r | g | b | a) <= 3);
for (uint32_t y = 0; y < m_height; y++)
{
for (uint32_t x = 0; x < m_width; x++)
{
color_quad_u8 tmp((*this)(x, y));
(*this)(x, y).set(tmp[r], tmp[g], tmp[b], tmp[a]);
}
}
return *this;
}
private:
color_quad_u8_vec m_pixels;
uint32_t m_width, m_height;
};
static bool load_png(const char *pFilename, image_u8 &img)
{
img.clear();
std::vector<unsigned char> pixels;
unsigned int w = 0, h = 0;
unsigned int e = lodepng::decode(pixels, w, h, pFilename);
if (e != 0)
{
fprintf(stderr, "Failed loading PNG file %s\n", pFilename);
return false;
}
img.init(w, h);
memcpy(&img.get_pixels()[0], &pixels[0], w * h * sizeof(uint32_t));
return true;
}
static bool save_png(const char *pFilename, const image_u8 &img, bool save_alpha)
{
const uint32_t w = img.width();
const uint32_t h = img.height();
std::vector<unsigned char> pixels;
if (save_alpha)
{
pixels.resize(w * h * sizeof(color_quad_u8));
memcpy(&pixels[0], &img.get_pixels()[0], w * h * sizeof(color_quad_u8));
}
else
{
pixels.resize(w * h * 3);
unsigned char *pDst = &pixels[0];
for (uint32_t y = 0; y < h; y++)
for (uint32_t x = 0; x < w; x++, pDst += 3)
pDst[0] = img(x, y)[0], pDst[1] = img(x, y)[1], pDst[2] = img(x, y)[2];
}
return lodepng::encode(pFilename, pixels, w, h, save_alpha ? LCT_RGBA : LCT_RGB) == 0;
}
class image_metrics
{
public:
double m_max, m_mean, m_mean_squared, m_root_mean_squared, m_peak_snr;
image_metrics()
{
clear();
}
void clear()
{
memset(this, 0, sizeof(*this));
}
void compute(const image_u8 &a, const image_u8 &b, uint32_t first_channel, uint32_t num_channels)
{
const bool average_component_error = true;
const uint32_t width = std::min(a.width(), b.width());
const uint32_t height = std::min(a.height(), b.height());
assert((first_channel < 4U) && (first_channel + num_channels <= 4U));
// Histogram approach originally due to Charles Bloom.
double hist[256];
memset(hist, 0, sizeof(hist));
for (uint32_t y = 0; y < height; y++)
{
for (uint32_t x = 0; x < width; x++)
{
const color_quad_u8 &ca = a(x, y);
const color_quad_u8 &cb = b(x, y);
if (!num_channels)
hist[iabs(ca.get_luma() - cb.get_luma())]++;
else
{
for (uint32_t c = 0; c < num_channels; c++)
hist[iabs(ca[first_channel + c] - cb[first_channel + c])]++;
}
}
}
m_max = 0;
double sum = 0.0f, sum2 = 0.0f;
for (uint32_t i = 0; i < 256; i++)
{
if (!hist[i])
continue;
m_max = std::max<double>(m_max, i);
double x = i * hist[i];
sum += x;
sum2 += i * x;
}
// See http://richg42.blogspot.com/2016/09/how-to-compute-psnr-from-old-berkeley.html
double total_values = width * height;
if (average_component_error)
total_values *= clamp<uint32_t>(num_channels, 1, 4);
m_mean = clamp<double>(sum / total_values, 0.0f, 255.0f);
m_mean_squared = clamp<double>(sum2 / total_values, 0.0f, 255.0f * 255.0f);
m_root_mean_squared = sqrt(m_mean_squared);
if (!m_root_mean_squared)
m_peak_snr = 100.0f;
else
m_peak_snr = clamp<double>(log10(255.0f / m_root_mean_squared) * 20.0f, 0.0f, 100.0f);
}
};
struct block8
{
uint64_t m_vals[1];
};
typedef std::vector<block8> block8_vec;
struct block16
{
uint64_t m_vals[2];
};
typedef std::vector<block16> block16_vec;
static bool save_dds(const char *pFilename, uint32_t width, uint32_t height, const void *pBlocks, uint32_t pixel_format_bpp, DXGI_FORMAT dxgi_format, bool srgb, bool force_dx10_header)
{
(void)srgb;
FILE *pFile = NULL;
pFile = fopen(pFilename, "wb");
if (!pFile)
{
fprintf(stderr, "Failed creating file %s!\n", pFilename);
return false;
}
fwrite("DDS ", 4, 1, pFile);
DDSURFACEDESC2 desc;
memset(&desc, 0, sizeof(desc));
desc.dwSize = sizeof(desc);
desc.dwFlags = DDSD_WIDTH | DDSD_HEIGHT | DDSD_PIXELFORMAT | DDSD_CAPS;
desc.dwWidth = width;
desc.dwHeight = height;
desc.ddsCaps.dwCaps = DDSCAPS_TEXTURE;
desc.ddpfPixelFormat.dwSize = sizeof(desc.ddpfPixelFormat);
desc.ddpfPixelFormat.dwFlags |= DDPF_FOURCC;
desc.lPitch = (((desc.dwWidth + 3) & ~3) * ((desc.dwHeight + 3) & ~3) * pixel_format_bpp) >> 3;
desc.dwFlags |= DDSD_LINEARSIZE;
desc.ddpfPixelFormat.dwRGBBitCount = 0;
if ( (!force_dx10_header) &&
((dxgi_format == DXGI_FORMAT_BC1_UNORM) ||
(dxgi_format == DXGI_FORMAT_BC3_UNORM) ||
(dxgi_format == DXGI_FORMAT_BC4_UNORM) ||
(dxgi_format == DXGI_FORMAT_BC5_UNORM)) )
{
if (dxgi_format == DXGI_FORMAT_BC1_UNORM)
desc.ddpfPixelFormat.dwFourCC = (uint32_t)PIXEL_FMT_FOURCC('D', 'X', 'T', '1');
else if (dxgi_format == DXGI_FORMAT_BC3_UNORM)
desc.ddpfPixelFormat.dwFourCC = (uint32_t)PIXEL_FMT_FOURCC('D', 'X', 'T', '5');
else if (dxgi_format == DXGI_FORMAT_BC4_UNORM)
desc.ddpfPixelFormat.dwFourCC = (uint32_t)PIXEL_FMT_FOURCC('A', 'T', 'I', '1');
else if (dxgi_format == DXGI_FORMAT_BC5_UNORM)
desc.ddpfPixelFormat.dwFourCC = (uint32_t)PIXEL_FMT_FOURCC('A', 'T', 'I', '2');
fwrite(&desc, sizeof(desc), 1, pFile);
}
else
{
desc.ddpfPixelFormat.dwFourCC = (uint32_t)PIXEL_FMT_FOURCC('D', 'X', '1', '0');
fwrite(&desc, sizeof(desc), 1, pFile);
DDS_HEADER_DXT10 hdr10;
memset(&hdr10, 0, sizeof(hdr10));
// Not all tools support DXGI_FORMAT_BC7_UNORM_SRGB (like NVTT), but ddsview in DirectXTex pays attention to it. So not sure what to do here.
// For best compatibility just write DXGI_FORMAT_BC7_UNORM.
//hdr10.dxgiFormat = srgb ? DXGI_FORMAT_BC7_UNORM_SRGB : DXGI_FORMAT_BC7_UNORM;
hdr10.dxgiFormat = dxgi_format; // DXGI_FORMAT_BC7_UNORM;
hdr10.resourceDimension = D3D10_RESOURCE_DIMENSION_TEXTURE2D;
hdr10.arraySize = 1;
fwrite(&hdr10, sizeof(hdr10), 1, pFile);
}
fwrite(pBlocks, desc.lPitch, 1, pFile);
if (fclose(pFile) == EOF)
{
fprintf(stderr, "Failed writing to DDS file %s!\n", pFilename);
return false;
}
return true;
}
static void strip_extension(std::string &s)
{
for (int32_t i = (int32_t)s.size() - 1; i >= 0; i--)
{
if (s[i] == '.')
{
s.resize(i);
break;
}
}
}
static void strip_path(std::string& s)
{
for (int32_t i = (int32_t)s.size() - 1; i >= 0; i--)
{
if ((s[i] == '/') || (s[i] == ':') || (s[i] == '\\'))
{
s.erase(0, i + 1);
break;
}
}
}
int main(int argc, char *argv[])
{
if (argc < 2)
return print_usage();
std::string src_filename;
std::string src_alpha_filename;
std::string dds_output_filename;
std::string png_output_filename;
std::string png_alpha_output_filename;
bool no_output_png = false;
bool out_cur_dir = false;
int uber_level = 0;
int max_partitions_to_scan = BC7ENC_MAX_PARTITIONS1;
bool perceptual = true;
bool y_flip = false;
uint32_t bc45_channel0 = 0;
uint32_t bc45_channel1 = 1;
rgbcx::bc1_approx_mode bc1_mode = rgbcx::bc1_approx_mode::cBC1Ideal;
bool use_bc1_3color_mode = true;
bool use_bc1_3color_mode_for_black = false;
int bc1_quality_level = 2;
DXGI_FORMAT dxgi_format = DXGI_FORMAT_BC7_UNORM;
uint32_t pixel_format_bpp = 8;
bool force_dx10_dds = false;
for (int i = 1; i < argc; i++)
{
const char *pArg = argv[i];
if (pArg[0] == '-')
{
switch (pArg[1])
{
case '1':
{
dxgi_format = DXGI_FORMAT_BC1_UNORM;
pixel_format_bpp = 4;
printf("Compressing to BC1\n");
break;
}
case '3':
{
dxgi_format = DXGI_FORMAT_BC3_UNORM;
pixel_format_bpp = 8;
printf("Compressing to BC3\n");
break;
}
case '4':
{
dxgi_format = DXGI_FORMAT_BC4_UNORM;
pixel_format_bpp = 4;
printf("Compressing to BC4\n");
break;
}
case '5':
{
dxgi_format = DXGI_FORMAT_BC5_UNORM;
pixel_format_bpp = 8;
printf("Compressing to BC5\n");
break;
}
case 'y':
{
y_flip = true;
break;
}
case 'a':
{
src_alpha_filename = pArg + 2;
break;
}
case 'X':
{
bc45_channel0 = atoi(pArg + 2);
if ((bc45_channel0 < 0) || (bc45_channel0 > 3))
{
fprintf(stderr, "Invalid argument: %s\n", pArg);
return EXIT_FAILURE;
}
break;
}
case 'Y':
{
bc45_channel1 = atoi(pArg + 2);
if ((bc45_channel1 < 0) || (bc45_channel1 > 3))
{
fprintf(stderr, "Invalid argument: %s\n", pArg);
return EXIT_FAILURE;
}
break;
}
case 'f':
{
force_dx10_dds = true;
break;
}
case 'u':
{
uber_level = atoi(pArg + 2);
if ((uber_level < 0) || (uber_level > MAX_UBER_LEVEL))
{
fprintf(stderr, "Invalid argument: %s\n", pArg);
return EXIT_FAILURE;
}
break;
}
case 'L':
{
bc1_quality_level = atoi(pArg + 2);
if (((int)bc1_quality_level < (int)rgbcx::MIN_LEVEL) || ((int)bc1_quality_level > (int)(rgbcx::MAX_LEVEL + 1)))
{
fprintf(stderr, "Invalid argument: %s\n", pArg);
return EXIT_FAILURE;
}
break;
}
case 'g':
{
no_output_png = true;
break;
}
case 'l':
{
perceptual = false;
break;
}
case 'p':
{
max_partitions_to_scan = atoi(pArg + 2);
if ((max_partitions_to_scan < 0) || (max_partitions_to_scan > BC7ENC_MAX_PARTITIONS1))
{
fprintf(stderr, "Invalid argument: %s\n", pArg);
return EXIT_FAILURE;
}
break;
}
case 'n':
{
bc1_mode = rgbcx::bc1_approx_mode::cBC1NVidia;
break;
}
case 'm':
{
bc1_mode = rgbcx::bc1_approx_mode::cBC1AMD;
break;
}
case 'r':
{
bc1_mode = rgbcx::bc1_approx_mode::cBC1IdealRound4;
break;
}
case 'o':
{
out_cur_dir = true;
break;
}
case 'b':
{
use_bc1_3color_mode_for_black = true;
break;
}
case 'c':
{
use_bc1_3color_mode = false;
break;
}
default:
{
fprintf(stderr, "Invalid argument: %s\n", pArg);
return EXIT_FAILURE;
}
}
}
else
{
if (!src_filename.size())
src_filename = pArg;
else if (!dds_output_filename.size())
dds_output_filename = pArg;
else if (!png_output_filename.size())
png_output_filename = pArg;
else
{
fprintf(stderr, "Invalid argument: %s\n", pArg);
return EXIT_FAILURE;
}
}
}
const uint32_t bytes_per_block = (16 * pixel_format_bpp) / 8;
assert(bytes_per_block == 8 || bytes_per_block == 16);
if (!src_filename.size())
{
fprintf(stderr, "No source filename specified!\n");
return EXIT_FAILURE;
}
if (!dds_output_filename.size())
{
dds_output_filename = src_filename;
strip_extension(dds_output_filename);
if (out_cur_dir)
strip_path(dds_output_filename);
dds_output_filename += ".dds";
}
if (!png_output_filename.size())
{
png_output_filename = src_filename;
strip_extension(png_output_filename);
if (out_cur_dir)
strip_path(png_output_filename);
png_output_filename += "_unpacked.png";
}
png_alpha_output_filename = png_output_filename;
strip_extension(png_alpha_output_filename);
png_alpha_output_filename += "_alpha.png";
image_u8 source_image;
if (!load_png(src_filename.c_str(), source_image))
return EXIT_FAILURE;
printf("Source image: %s %ux%u\n", src_filename.c_str(), source_image.width(), source_image.height());
if (src_alpha_filename.size())
{
image_u8 source_alpha_image;
if (!load_png(src_alpha_filename.c_str(), source_alpha_image))
return EXIT_FAILURE;
printf("Source alpha image: %s %ux%u\n", src_alpha_filename.c_str(), source_alpha_image.width(), source_alpha_image.height());
const uint32_t w = std::min(source_alpha_image.width(), source_image.width());
const uint32_t h = std::min(source_alpha_image.height(), source_image.height());
for (uint32_t y = 0; y < h; y++)
for (uint32_t x = 0; x < w; x++)
source_image(x, y)[3] = source_alpha_image(x, y)[1];
}
#if 0
// HACK HACK
for (uint32_t y = 0; y < source_image.height(); y++)
for (uint32_t x = 0; x < source_image.width(); x++)
source_image(x, y)[3] = 254;
#endif
const uint32_t orig_width = source_image.width();
const uint32_t orig_height = source_image.height();
if (y_flip)
{
image_u8 temp;
temp.init(orig_width, orig_height);
for (uint32_t y = 0; y < orig_height; y++)
for (uint32_t x = 0; x < orig_width; x++)
temp(x, (orig_height - 1) - y) = source_image(x, y);
temp.swap(source_image);
}
source_image.crop((source_image.width() + 3) & ~3, (source_image.height() + 3) & ~3);
const uint32_t blocks_x = source_image.width() / 4;
const uint32_t blocks_y = source_image.height() / 4;
block16_vec packed_image16(blocks_x * blocks_y);
block8_vec packed_image8(blocks_x * blocks_y);
bc7enc_compress_block_params pack_params;
bc7enc_compress_block_params_init(&pack_params);
if (!perceptual)
bc7enc_compress_block_params_init_linear_weights(&pack_params);
pack_params.m_max_partitions_mode = max_partitions_to_scan;
pack_params.m_uber_level = std::min(BC7ENC_MAX_UBER_LEVEL, uber_level);
if (dxgi_format == DXGI_FORMAT_BC7_UNORM)
{
printf("Max mode 1 partitions: %u, uber level: %u, perceptual: %u\n", pack_params.m_max_partitions_mode, pack_params.m_uber_level, perceptual);
}
else
{
printf("Level: %u, use 3-color mode: %u, use 3-color mode for black: %u, bc1_mode: %u\n",
bc1_quality_level, use_bc1_3color_mode, use_bc1_3color_mode_for_black, (int)bc1_mode);
}
bc7enc_compress_block_init();
rgbcx::init(bc1_mode);
bool has_alpha = false;
clock_t start_t = clock();
uint32_t bc7_mode_hist[8];
memset(bc7_mode_hist, 0, sizeof(bc7_mode_hist));
for (uint32_t by = 0; by < blocks_y; by++)
{
for (uint32_t bx = 0; bx < blocks_x; bx++)
{
color_quad_u8 pixels[16];
source_image.get_block(bx, by, 4, 4, pixels);
if (!has_alpha)
{
for (uint32_t i = 0; i < 16; i++)
{
if (pixels[i].m_c[3] < 255)
{
has_alpha = true;
break;
}
}
}
switch (dxgi_format)
{
case DXGI_FORMAT_BC1_UNORM:
{
block8* pBlock = &packed_image8[bx + by * blocks_x];
rgbcx::encode_bc1(bc1_quality_level, pBlock, &pixels[0].m_c[0], use_bc1_3color_mode, use_bc1_3color_mode_for_black);
break;
}
case DXGI_FORMAT_BC3_UNORM:
{
block16* pBlock = &packed_image16[bx + by * blocks_x];
rgbcx::encode_bc3(bc1_quality_level, pBlock, &pixels[0].m_c[0]);
break;
}
case DXGI_FORMAT_BC4_UNORM:
{
block8* pBlock = &packed_image8[bx + by * blocks_x];
rgbcx::encode_bc4(pBlock, &pixels[0].m_c[bc45_channel0], 4);
break;
}
case DXGI_FORMAT_BC5_UNORM:
{
block16* pBlock = &packed_image16[bx + by * blocks_x];
rgbcx::encode_bc5(pBlock, &pixels[0].m_c[0], bc45_channel0, bc45_channel1, 4);
break;
}
case DXGI_FORMAT_BC7_UNORM:
{
block16* pBlock = &packed_image16[bx + by * blocks_x];
bc7enc_compress_block(pBlock, pixels, &pack_params);
uint32_t mode = ((uint8_t *)pBlock)[0];
for (uint32_t m = 0; m <= 7; m++)
{
if (mode & (1 << m))
{
bc7_mode_hist[m]++;
break;
}
}
break;
}
default:
{
assert(0);
break;
}
}
}
if ((by & 127) == 0)
printf(".");
}
clock_t end_t = clock();
printf("\nTotal time: %f secs\n", (double)(end_t - start_t) / CLOCKS_PER_SEC);
if (dxgi_format == DXGI_FORMAT_BC7_UNORM)
{
printf("BC7 mode histogram:\n");
for (uint32_t i = 0; i < 8; i++)
printf("%u: %u\n", i, bc7_mode_hist[i]);
}
if (has_alpha)
printf("Source image had an alpha channel.\n");
bool failed = false;
if (!save_dds(dds_output_filename.c_str(), orig_width, orig_height, (bytes_per_block == 16) ? (void*)&packed_image16[0] : (void*)&packed_image8[0], pixel_format_bpp, dxgi_format, perceptual, force_dx10_dds))
failed = true;
else
printf("Wrote DDS file %s\n", dds_output_filename.c_str());
if ((!no_output_png) && (png_output_filename.size()))
{
image_u8 unpacked_image(source_image.width(), source_image.height());
bool punchthrough_flag = false;
for (uint32_t by = 0; by < blocks_y; by++)
{
for (uint32_t bx = 0; bx < blocks_x; bx++)
{
void* pBlock = (bytes_per_block == 16) ? (void *)&packed_image16[bx + by * blocks_x] : (void*)&packed_image8[bx + by * blocks_x];
color_quad_u8 unpacked_pixels[16];
for (uint32_t i = 0; i < 16; i++)
unpacked_pixels[i].set(0, 0, 0, 255);
switch (dxgi_format)
{
case DXGI_FORMAT_BC1_UNORM:
rgbcx::unpack_bc1(pBlock, unpacked_pixels, true, bc1_mode);
break;
case DXGI_FORMAT_BC3_UNORM:
if (!rgbcx::unpack_bc3(pBlock, unpacked_pixels, bc1_mode))
punchthrough_flag = true;
break;
case DXGI_FORMAT_BC4_UNORM:
rgbcx::unpack_bc4(pBlock, &unpacked_pixels[0][0], 4);
break;
case DXGI_FORMAT_BC5_UNORM:
rgbcx::unpack_bc5(pBlock, &unpacked_pixels[0][0], 0, 1, 4);
break;
case DXGI_FORMAT_BC7_UNORM:
bc7decomp::unpack_bc7((const uint8_t*)pBlock, (bc7decomp::color_rgba*)unpacked_pixels);
break;
default:
assert(0);
break;
}
unpacked_image.set_block(bx, by, 4, 4, unpacked_pixels);
} // bx
} // by
if ((punchthrough_flag) && (dxgi_format == DXGI_FORMAT_BC3_UNORM))
fprintf(stderr, "Warning: BC3 mode selected, but rgbcx::unpack_bc3() returned one or more blocks using 3-color mode!\n");
if ((dxgi_format != DXGI_FORMAT_BC4_UNORM) && (dxgi_format != DXGI_FORMAT_BC5_UNORM))
{
image_metrics y_metrics;
y_metrics.compute(source_image, unpacked_image, 0, 0);
printf("Luma Max error: %3.0f RMSE: %f PSNR %03.02f dB\n", y_metrics.m_max, y_metrics.m_root_mean_squared, y_metrics.m_peak_snr);
image_metrics rgb_metrics;
rgb_metrics.compute(source_image, unpacked_image, 0, 3);
printf("RGB Max error: %3.0f RMSE: %f PSNR %03.02f dB\n", rgb_metrics.m_max, rgb_metrics.m_root_mean_squared, rgb_metrics.m_peak_snr);
image_metrics rgba_metrics;
rgba_metrics.compute(source_image, unpacked_image, 0, 4);
printf("RGBA Max error: %3.0f RMSE: %f PSNR %03.02f dB\n", rgba_metrics.m_max, rgba_metrics.m_root_mean_squared, rgba_metrics.m_peak_snr);
}
for (uint32_t chan = 0; chan < 4; chan++)
{
if (dxgi_format == DXGI_FORMAT_BC4_UNORM)
{
if (chan != bc45_channel0)
continue;
}
else if (dxgi_format == DXGI_FORMAT_BC5_UNORM)
{
if ((chan != bc45_channel0) && (chan != bc45_channel1))
continue;
}
image_metrics c_metrics;
c_metrics.compute(source_image, unpacked_image, chan, 1);
static const char *s_chan_names[4] = { "Red ", "Green", "Blue ", "Alpha" };
printf("%s Max error: %3.0f RMSE: %f PSNR %03.02f dB\n", s_chan_names[chan], c_metrics.m_max, c_metrics.m_root_mean_squared, c_metrics.m_peak_snr);
}
if (bc1_mode != rgbcx::bc1_approx_mode::cBC1Ideal)
printf("Note: BC1/BC3 RGB decoding was done with the specified vendor's BC1 approximations.\n");
if (!save_png(png_output_filename.c_str(), unpacked_image, false))
failed = true;
else
printf("Wrote PNG file %s\n", png_output_filename.c_str());
if (png_alpha_output_filename.size())
{
image_u8 unpacked_image_alpha(unpacked_image);
for (uint32_t y = 0; y < unpacked_image_alpha.height(); y++)
for (uint32_t x = 0; x < unpacked_image_alpha.width(); x++)
unpacked_image_alpha(x, y).set(unpacked_image_alpha(x, y)[3], 255);
if (!save_png(png_alpha_output_filename.c_str(), unpacked_image_alpha, false))
failed = true;
else
printf("Wrote PNG file %s\n", png_alpha_output_filename.c_str());
}
}
return failed ? EXIT_FAILURE : EXIT_SUCCESS;
}
Loading…
Cancel
Save