diff --git a/src/nvtt/QuickCompressDXT.cpp b/src/nvtt/QuickCompressDXT.cpp index e6203d3..a08e0d4 100644 --- a/src/nvtt/QuickCompressDXT.cpp +++ b/src/nvtt/QuickCompressDXT.cpp @@ -27,6 +27,7 @@ #include #include "QuickCompressDXT.h" +#include "SingleColorLookup.h" using namespace nv; @@ -108,7 +109,7 @@ inline static void insetBBox(Vector3 * __restrict maxColor, Vector3 * __restrict *minColor = clamp(*minColor + inset, 0.0f, 255.0f); } -inline static uint16 roundAndExpand(Vector3 * v) +inline static uint16 roundAndExpand(Vector3 * __restrict v) { uint r = uint(clamp(v->x() * (31.0f / 255.0f), 0.0f, 31.0f) + 0.5f); uint g = uint(clamp(v->y() * (63.0f / 255.0f), 0.0f, 63.0f) + 0.5f); @@ -202,11 +203,11 @@ static void optimizeEndPoints4(Vector3 block[16], BlockDXT1 * dxtBlock) for( int i = 0; i < 16; ++i ) { const uint bits = dxtBlock->indices >> (2 * i); - + float beta = (bits & 1); if (bits & 2) beta = (1 + beta) / 3.0f; float alpha = 1.0f - beta; - + alpha2_sum += alpha * alpha; beta2_sum += beta * beta; alphabeta_sum += alpha * beta; @@ -288,6 +289,78 @@ static void optimizeEndPoints4(Vector3 block[16], BlockDXT1 * dxtBlock) }*/ +static void optimizeAlpha8(const ColorBlock & rgba, AlphaBlockDXT5 * block) +{ + float alpha2_sum = 0; + float beta2_sum = 0; + float alphabeta_sum = 0; + float alphax_sum = 0; + float betax_sum = 0; + + for (int i = 0; i < 16; i++) + { + uint idx = block->index(i); + float alpha; + if (idx < 2) alpha = 1.0f - idx; + else alpha = (8.0f - idx) / 7.0f; + + float beta = 1 - alpha; + + alpha2_sum += alpha * alpha; + beta2_sum += beta * beta; + alphabeta_sum += alpha * beta; + alphax_sum += alpha * rgba.color(i).a; + betax_sum += beta * rgba.color(i).a; + } + + const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); + + float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; + float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; + + uint alpha0 = uint(min(max(a, 0.0f), 255.0f)); + uint alpha1 = uint(min(max(b, 0.0f), 255.0f)); + + if (alpha0 < alpha1) + { + swap(alpha0, alpha1); + + // Flip indices: + for (int i = 0; i < 16; i++) + { + uint idx = block->index(i); + if (idx < 2) block->setIndex(i, 1 - idx); + else block->setIndex(i, 9 - idx); + } + } + else if (alpha0 == alpha1) + { + for (int i = 0; i < 16; i++) + { + block->setIndex(i, 0); + } + } + + block->alpha0 = alpha0; + block->alpha1 = alpha1; +} + + + + +// Single color compressor, based on: +// https://mollyrocket.com/forums/viewtopic.php?t=392 +void QuickCompress::compressDXT1(Color32 c, BlockDXT1 * dxtBlock) +{ + dxtBlock->col0.r = OMatch5[c.r][0]; + dxtBlock->col0.g = OMatch5[c.g][0]; + dxtBlock->col0.b = OMatch5[c.b][0]; + dxtBlock->col1.r = OMatch5[c.r][1]; + dxtBlock->col1.g = OMatch5[c.g][1]; + dxtBlock->col1.b = OMatch5[c.b][1]; + dxtBlock->indices = 0xaaaaaaaa; +} + void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock) { // read block @@ -357,4 +430,40 @@ void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock) } +uint QuickCompress::compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock) +{ + dxtBlock->alpha0 = rgba.color(0).a >> 4; + dxtBlock->alpha1 = rgba.color(1).a >> 4; + dxtBlock->alpha2 = rgba.color(2).a >> 4; + dxtBlock->alpha3 = rgba.color(3).a >> 4; + dxtBlock->alpha4 = rgba.color(4).a >> 4; + dxtBlock->alpha5 = rgba.color(5).a >> 4; + dxtBlock->alpha6 = rgba.color(6).a >> 4; + dxtBlock->alpha7 = rgba.color(7).a >> 4; + dxtBlock->alpha8 = rgba.color(8).a >> 4; + dxtBlock->alpha9 = rgba.color(9).a >> 4; + dxtBlock->alphaA = rgba.color(10).a >> 4; + dxtBlock->alphaB = rgba.color(11).a >> 4; + dxtBlock->alphaC = rgba.color(12).a >> 4; + dxtBlock->alphaD = rgba.color(13).a >> 4; + dxtBlock->alphaE = rgba.color(14).a >> 4; + dxtBlock->alphaF = rgba.color(15).a >> 4; +} + +uint QuickCompress::compressDXT3(const ColorBlock & rgba, BlockDXT3 * dxtBlock) +{ + compressDXT1(rgba, &dxtBlock->color); + compressDXT3A(rgba, &dxtBlock->alpha); +} + +uint QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock) +{ + // @@ TODO +} + +uint QuickCompress::compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock) +{ + compressDXT1(rgba, &dxtBlock->color); + compressDXT5A(rgba, &dxtBlock->alpha); +} diff --git a/src/nvtt/QuickCompressDXT.h b/src/nvtt/QuickCompressDXT.h index fcbaf71..1d5a544 100644 --- a/src/nvtt/QuickCompressDXT.h +++ b/src/nvtt/QuickCompressDXT.h @@ -30,11 +30,22 @@ namespace nv { struct ColorBlock; struct BlockDXT1; + struct BlockDXT3; + struct BlockDXT5; + struct AlphaBlockDXT3; + struct AlphaBlockDXT5; namespace QuickCompress { + void compressDXT1(const Color32 rgba, BlockDXT1 * dxtBlock); void compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock); void compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock); + + uint compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock); + uint compressDXT3(const ColorBlock & rgba, BlockDXT3 * dxtBlock); + + uint compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock); + uint compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock); } } // nv namespace diff --git a/src/nvtt/SingleColorLookup.h b/src/nvtt/SingleColorLookup.h new file mode 100644 index 0000000..1eab018 --- /dev/null +++ b/src/nvtt/SingleColorLookup.h @@ -0,0 +1,570 @@ + +/* +static int Mul8Bit(int a, int b) +{ + int t = a * b + 128; + return (t + (t >> 8)) >> 8; +} + +static void PrepareOptTable(uint8 * Table, const uint8 * expand, int size) +{ + for (int i = 0; i < 256; i++) + { + int bestErr = 256; + + for (int min = 0; min < size; min++) + { + for (int max = 0; max < size; max++) + { + int mine = expand[min]; + int maxe = expand[max]; + int err = abs(maxe + Mul8Bit(mine-maxe, 0x55) - i); + + if (err < bestErr) + { + Table[i*2+0] = max; + Table[i*2+1] = min; + bestErr = err; + } + } + } + } +} + + +void initTables() +{ + uint8 Expand5[32]; + uint8 Expand6[64]; + + for(sInt i=0;i<32;i++) + Expand5[i] = (i<<3)|(i>>2); + + for(sInt i=0;i<64;i++) + Expand6[i] = (i<<2)|(i>>4); + + PrepareOptTable(OMatch5, Expand5, 32) + PrepareOptTable(OMatch6, Expand6, 64) +}; +*/ + +const static uint8 OMatch5[256][2] = +{ + {0x00, 0x00}, + {0x00, 0x00}, + {0x00, 0x01}, + {0x00, 0x01}, + {0x01, 0x00}, + {0x01, 0x00}, + {0x01, 0x00}, + {0x01, 0x01}, + {0x01, 0x01}, + {0x01, 0x01}, + {0x02, 0x00}, + {0x02, 0x00}, + {0x02, 0x00}, + {0x02, 0x01}, + {0x00, 0x05}, + {0x03, 0x00}, + {0x03, 0x00}, + {0x03, 0x00}, + {0x03, 0x01}, + {0x03, 0x01}, + {0x03, 0x01}, + {0x03, 0x02}, + {0x04, 0x00}, + {0x04, 0x00}, + {0x03, 0x03}, + {0x04, 0x01}, + {0x05, 0x00}, + {0x05, 0x00}, + {0x05, 0x00}, + {0x05, 0x01}, + {0x05, 0x01}, + {0x05, 0x01}, + {0x03, 0x06}, + {0x06, 0x00}, + {0x06, 0x00}, + {0x06, 0x01}, + {0x04, 0x05}, + {0x07, 0x00}, + {0x07, 0x00}, + {0x07, 0x00}, + {0x07, 0x01}, + {0x07, 0x01}, + {0x07, 0x01}, + {0x07, 0x02}, + {0x08, 0x00}, + {0x08, 0x00}, + {0x07, 0x03}, + {0x08, 0x01}, + {0x09, 0x00}, + {0x09, 0x00}, + {0x09, 0x00}, + {0x09, 0x01}, + {0x09, 0x01}, + {0x09, 0x01}, + {0x07, 0x06}, + {0x0A, 0x00}, + {0x0A, 0x00}, + {0x0A, 0x01}, + {0x08, 0x05}, + {0x0B, 0x00}, + {0x0B, 0x00}, + {0x0B, 0x00}, + {0x0B, 0x01}, + {0x0B, 0x01}, + {0x0B, 0x01}, + {0x0B, 0x02}, + {0x0C, 0x00}, + {0x0C, 0x00}, + {0x0B, 0x03}, + {0x0C, 0x01}, + {0x0D, 0x00}, + {0x0D, 0x00}, + {0x0D, 0x00}, + {0x0D, 0x01}, + {0x0D, 0x01}, + {0x0D, 0x01}, + {0x0B, 0x06}, + {0x0E, 0x00}, + {0x0E, 0x00}, + {0x0E, 0x01}, + {0x0C, 0x05}, + {0x0F, 0x00}, + {0x0F, 0x00}, + {0x0F, 0x00}, + {0x0F, 0x01}, + {0x0F, 0x01}, + {0x0F, 0x01}, + {0x0F, 0x02}, + {0x10, 0x00}, + {0x10, 0x00}, + {0x0F, 0x03}, + {0x10, 0x01}, + {0x11, 0x00}, + {0x11, 0x00}, + {0x11, 0x00}, + {0x11, 0x01}, + {0x11, 0x01}, + {0x11, 0x01}, + {0x0F, 0x06}, + {0x12, 0x00}, + {0x12, 0x00}, + {0x12, 0x01}, + {0x10, 0x05}, + {0x13, 0x00}, + {0x13, 0x00}, + {0x13, 0x00}, + {0x13, 0x01}, + {0x13, 0x01}, + {0x13, 0x01}, + {0x13, 0x02}, + {0x14, 0x00}, + {0x14, 0x00}, + {0x13, 0x03}, + {0x14, 0x01}, + {0x15, 0x00}, + {0x15, 0x00}, + {0x15, 0x00}, + {0x15, 0x01}, + {0x15, 0x01}, + {0x15, 0x01}, + {0x13, 0x06}, + {0x16, 0x00}, + {0x16, 0x00}, + {0x16, 0x01}, + {0x14, 0x05}, + {0x17, 0x00}, + {0x17, 0x00}, + {0x17, 0x00}, + {0x17, 0x01}, + {0x17, 0x01}, + {0x17, 0x01}, + {0x17, 0x02}, + {0x18, 0x00}, + {0x18, 0x00}, + {0x17, 0x03}, + {0x18, 0x01}, + {0x19, 0x00}, + {0x19, 0x00}, + {0x19, 0x00}, + {0x19, 0x01}, + {0x19, 0x01}, + {0x19, 0x01}, + {0x17, 0x06}, + {0x1A, 0x00}, + {0x1A, 0x00}, + {0x1A, 0x01}, + {0x18, 0x05}, + {0x1B, 0x00}, + {0x1B, 0x00}, + {0x1B, 0x00}, + {0x1B, 0x01}, + {0x1B, 0x01}, + {0x1B, 0x01}, + {0x1B, 0x02}, + {0x1C, 0x00}, + {0x1C, 0x00}, + {0x1B, 0x03}, + {0x1C, 0x01}, + {0x1D, 0x00}, + {0x1D, 0x00}, + {0x1D, 0x00}, + {0x1D, 0x01}, + {0x1D, 0x01}, + {0x1D, 0x01}, + {0x1B, 0x06}, + {0x1E, 0x00}, + {0x1E, 0x00}, + {0x1E, 0x01}, + {0x1C, 0x05}, + {0x1F, 0x00}, + {0x1F, 0x00}, + {0x1F, 0x00}, + {0x1F, 0x01}, + {0x1F, 0x01}, + {0x1F, 0x01}, + {0x1F, 0x02}, + {0x1E, 0x04}, + {0x1F, 0x03}, + {0x1F, 0x03}, + {0x1C, 0x09}, + {0x1F, 0x04}, + {0x1F, 0x04}, + {0x1F, 0x04}, + {0x1F, 0x05}, + {0x1F, 0x05}, + {0x1F, 0x05}, + {0x1F, 0x06}, + {0x1E, 0x08}, + {0x1F, 0x07}, + {0x1F, 0x07}, + {0x1C, 0x0D}, + {0x1F, 0x08}, + {0x1F, 0x08}, + {0x1F, 0x08}, + {0x1F, 0x09}, + {0x1F, 0x09}, + {0x1F, 0x09}, + {0x1F, 0x0A}, + {0x1E, 0x0C}, + {0x1F, 0x0B}, + {0x1F, 0x0B}, + {0x1C, 0x11}, + {0x1F, 0x0C}, + {0x1F, 0x0C}, + {0x1F, 0x0C}, + {0x1F, 0x0D}, + {0x1F, 0x0D}, + {0x1F, 0x0D}, + {0x1F, 0x0E}, + {0x1E, 0x10}, + {0x1F, 0x0F}, + {0x1F, 0x0F}, + {0x1C, 0x15}, + {0x1F, 0x10}, + {0x1F, 0x10}, + {0x1F, 0x10}, + {0x1F, 0x11}, + {0x1F, 0x11}, + {0x1F, 0x11}, + {0x1F, 0x12}, + {0x1E, 0x14}, + {0x1F, 0x13}, + {0x1F, 0x13}, + {0x1C, 0x19}, + {0x1F, 0x14}, + {0x1F, 0x14}, + {0x1F, 0x14}, + {0x1F, 0x15}, + {0x1F, 0x15}, + {0x1F, 0x15}, + {0x1F, 0x16}, + {0x1E, 0x18}, + {0x1F, 0x17}, + {0x1F, 0x17}, + {0x1C, 0x1D}, + {0x1F, 0x18}, + {0x1F, 0x18}, + {0x1F, 0x18}, + {0x1F, 0x19}, + {0x1F, 0x19}, + {0x1F, 0x19}, + {0x1F, 0x1A}, + {0x1E, 0x1C}, + {0x1F, 0x1B}, + {0x1F, 0x1B}, + {0x1F, 0x1B}, + {0x1F, 0x1C}, + {0x1F, 0x1C}, + {0x1F, 0x1C}, + {0x1F, 0x1D}, + {0x1F, 0x1D}, + {0x1F, 0x1D}, + {0x1F, 0x1E}, + {0x1F, 0x1E}, + {0x1F, 0x1F}, + {0x1F, 0x1F}, +}; + +const static uint8 OMatch6[256][2] = +{ + {0x00, 0x00}, + {0x00, 0x01}, + {0x01, 0x00}, + {0x01, 0x00}, + {0x01, 0x01}, + {0x02, 0x00}, + {0x02, 0x00}, + {0x02, 0x01}, + {0x03, 0x00}, + {0x03, 0x01}, + {0x04, 0x00}, + {0x04, 0x00}, + {0x04, 0x01}, + {0x05, 0x00}, + {0x05, 0x00}, + {0x05, 0x01}, + {0x06, 0x00}, + {0x06, 0x01}, + {0x07, 0x00}, + {0x07, 0x00}, + {0x07, 0x01}, + {0x08, 0x00}, + {0x00, 0x10}, + {0x08, 0x01}, + {0x09, 0x00}, + {0x09, 0x01}, + {0x01, 0x11}, + {0x0A, 0x00}, + {0x0A, 0x01}, + {0x0B, 0x00}, + {0x03, 0x10}, + {0x0B, 0x01}, + {0x0C, 0x00}, + {0x0C, 0x01}, + {0x04, 0x11}, + {0x0D, 0x00}, + {0x0D, 0x01}, + {0x0E, 0x00}, + {0x06, 0x10}, + {0x0E, 0x01}, + {0x0F, 0x00}, + {0x0F, 0x01}, + {0x07, 0x11}, + {0x10, 0x00}, + {0x0F, 0x03}, + {0x10, 0x01}, + {0x11, 0x00}, + {0x11, 0x01}, + {0x0F, 0x06}, + {0x12, 0x00}, + {0x12, 0x01}, + {0x13, 0x00}, + {0x0F, 0x09}, + {0x13, 0x01}, + {0x14, 0x00}, + {0x14, 0x01}, + {0x0F, 0x0C}, + {0x15, 0x00}, + {0x15, 0x01}, + {0x16, 0x00}, + {0x0F, 0x0F}, + {0x16, 0x01}, + {0x17, 0x00}, + {0x17, 0x01}, + {0x0F, 0x12}, + {0x18, 0x00}, + {0x18, 0x01}, + {0x19, 0x00}, + {0x11, 0x10}, + {0x19, 0x01}, + {0x1A, 0x00}, + {0x1A, 0x01}, + {0x12, 0x11}, + {0x1B, 0x00}, + {0x1B, 0x01}, + {0x1C, 0x00}, + {0x14, 0x10}, + {0x1C, 0x01}, + {0x1D, 0x00}, + {0x1D, 0x01}, + {0x15, 0x11}, + {0x1E, 0x00}, + {0x1E, 0x01}, + {0x1F, 0x00}, + {0x17, 0x10}, + {0x1F, 0x01}, + {0x1F, 0x02}, + {0x20, 0x00}, + {0x20, 0x01}, + {0x21, 0x00}, + {0x1F, 0x05}, + {0x21, 0x01}, + {0x22, 0x00}, + {0x22, 0x01}, + {0x1F, 0x08}, + {0x23, 0x00}, + {0x23, 0x01}, + {0x24, 0x00}, + {0x1F, 0x0B}, + {0x24, 0x01}, + {0x25, 0x00}, + {0x25, 0x01}, + {0x1F, 0x0E}, + {0x26, 0x00}, + {0x26, 0x01}, + {0x27, 0x00}, + {0x1F, 0x11}, + {0x27, 0x01}, + {0x28, 0x00}, + {0x28, 0x01}, + {0x20, 0x11}, + {0x29, 0x00}, + {0x29, 0x01}, + {0x2A, 0x00}, + {0x22, 0x10}, + {0x2A, 0x01}, + {0x2B, 0x00}, + {0x2B, 0x01}, + {0x23, 0x11}, + {0x2C, 0x00}, + {0x2C, 0x01}, + {0x2D, 0x00}, + {0x25, 0x10}, + {0x2D, 0x01}, + {0x2E, 0x00}, + {0x2E, 0x01}, + {0x26, 0x11}, + {0x2F, 0x00}, + {0x2F, 0x01}, + {0x2F, 0x02}, + {0x30, 0x00}, + {0x30, 0x01}, + {0x2F, 0x04}, + {0x31, 0x00}, + {0x31, 0x01}, + {0x32, 0x00}, + {0x2F, 0x07}, + {0x32, 0x01}, + {0x33, 0x00}, + {0x33, 0x01}, + {0x2F, 0x0A}, + {0x34, 0x00}, + {0x34, 0x01}, + {0x35, 0x00}, + {0x2F, 0x0D}, + {0x35, 0x01}, + {0x36, 0x00}, + {0x36, 0x01}, + {0x2F, 0x10}, + {0x37, 0x00}, + {0x37, 0x01}, + {0x38, 0x00}, + {0x30, 0x10}, + {0x38, 0x01}, + {0x39, 0x00}, + {0x39, 0x01}, + {0x31, 0x11}, + {0x3A, 0x00}, + {0x3A, 0x01}, + {0x3B, 0x00}, + {0x33, 0x10}, + {0x3B, 0x01}, + {0x3C, 0x00}, + {0x3C, 0x01}, + {0x34, 0x11}, + {0x3D, 0x00}, + {0x3D, 0x01}, + {0x3E, 0x00}, + {0x36, 0x10}, + {0x3E, 0x01}, + {0x3F, 0x00}, + {0x3F, 0x01}, + {0x37, 0x11}, + {0x3F, 0x02}, + {0x3F, 0x03}, + {0x3F, 0x04}, + {0x39, 0x10}, + {0x3F, 0x05}, + {0x3F, 0x06}, + {0x3F, 0x07}, + {0x3A, 0x11}, + {0x3F, 0x08}, + {0x3F, 0x09}, + {0x3F, 0x0A}, + {0x3C, 0x10}, + {0x3F, 0x0B}, + {0x3F, 0x0C}, + {0x3F, 0x0D}, + {0x3D, 0x11}, + {0x3F, 0x0E}, + {0x3F, 0x0F}, + {0x36, 0x21}, + {0x3F, 0x10}, + {0x3F, 0x11}, + {0x3F, 0x12}, + {0x38, 0x20}, + {0x3F, 0x13}, + {0x3F, 0x14}, + {0x3F, 0x15}, + {0x39, 0x21}, + {0x3F, 0x16}, + {0x3F, 0x17}, + {0x3F, 0x18}, + {0x3B, 0x20}, + {0x3F, 0x19}, + {0x3F, 0x1A}, + {0x3F, 0x1B}, + {0x3C, 0x21}, + {0x3F, 0x1C}, + {0x3F, 0x1D}, + {0x3F, 0x1E}, + {0x3E, 0x20}, + {0x3F, 0x1F}, + {0x3F, 0x20}, + {0x37, 0x30}, + {0x3F, 0x21}, + {0x3F, 0x22}, + {0x3F, 0x23}, + {0x38, 0x31}, + {0x3F, 0x24}, + {0x3F, 0x25}, + {0x3F, 0x26}, + {0x3A, 0x30}, + {0x3F, 0x27}, + {0x3F, 0x28}, + {0x3F, 0x29}, + {0x3B, 0x31}, + {0x3F, 0x2A}, + {0x3F, 0x2B}, + {0x3F, 0x2C}, + {0x3D, 0x30}, + {0x3F, 0x2D}, + {0x3F, 0x2E}, + {0x3F, 0x2F}, + {0x3E, 0x31}, + {0x3F, 0x30}, + {0x3F, 0x31}, + {0x3F, 0x31}, + {0x3F, 0x32}, + {0x3F, 0x33}, + {0x3F, 0x34}, + {0x3F, 0x34}, + {0x3F, 0x35}, + {0x3F, 0x36}, + {0x3F, 0x37}, + {0x3F, 0x37}, + {0x3F, 0x38}, + {0x3F, 0x39}, + {0x3F, 0x3A}, + {0x3F, 0x3A}, + {0x3F, 0x3B}, + {0x3F, 0x3C}, + {0x3F, 0x3D}, + {0x3F, 0x3D}, + {0x3F, 0x3E}, + {0x3F, 0x3F}, +}; +