diff --git a/src/nvtt/CompressDXT.cpp b/src/nvtt/CompressDXT.cpp index 06af8c2..355ac7e 100644 --- a/src/nvtt/CompressDXT.cpp +++ b/src/nvtt/CompressDXT.cpp @@ -97,7 +97,6 @@ void nv::fastCompressDXT1a(const Image * image, const OutputOptions::Private & o for (uint y = 0; y < h; y += 4) { for (uint x = 0; x < w; x += 4) { rgba.init(image, x, y); - QuickCompress::compressDXT1a(rgba, &block); if (outputOptions.outputHandler != NULL) { @@ -119,7 +118,7 @@ void nv::fastCompressDXT3(const Image * image, const nvtt::OutputOptions::Privat for (uint y = 0; y < h; y += 4) { for (uint x = 0; x < w; x += 4) { rgba.init(image, x, y); - compressBlock_BoundsRange(rgba, &block); + QuickCompress::compressDXT3(rgba, &block); if (outputOptions.outputHandler != NULL) { outputOptions.outputHandler->writeData(&block, sizeof(block)); @@ -140,7 +139,8 @@ void nv::fastCompressDXT5(const Image * image, const nvtt::OutputOptions::Privat for (uint y = 0; y < h; y += 4) { for (uint x = 0; x < w; x += 4) { rgba.init(image, x, y); - compressBlock_BoundsRange(rgba, &block); + //QuickCompress::compressDXT5(rgba, &block); // @@ Use fast version!! + nv::compressBlock_BoundsRange(rgba, &block); if (outputOptions.outputHandler != NULL) { outputOptions.outputHandler->writeData(&block, sizeof(block)); @@ -164,8 +164,9 @@ void nv::fastCompressDXT5n(const Image * image, const nvtt::OutputOptions::Priva // copy X coordinate to alpha channel and Y coordinate to green channel. rgba.swizzleDXT5n(); - - compressBlock_BoundsRange(rgba, &block); + + //QuickCompress::compressDXT5(rgba, &block); // @@ Use fast version!! + nv::compressBlock_BoundsRange(rgba, &block); if (outputOptions.outputHandler != NULL) { outputOptions.outputHandler->writeData(&block, sizeof(block)); @@ -286,7 +287,7 @@ void nv::compressDXT3(const Image * image, const OutputOptions::Private & output rgba.init(image, x, y); // Compress explicit alpha. - compressBlock(rgba, &block.alpha); + QuickCompress::compressDXT3A(rgba, &block.alpha); // Compress color. squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha); @@ -317,14 +318,13 @@ void nv::compressDXT5(const Image * image, const OutputOptions::Private & output rgba.init(image, x, y); // Compress alpha. - uint error; if (compressionOptions.quality == Quality_Highest) { - error = compressBlock_BruteForce(rgba, &block.alpha); + compressBlock_BruteForce(rgba, &block.alpha); } else { - error = compressBlock_Iterative(rgba, &block.alpha); + QuickCompress::compressDXT5A(rgba, &block.alpha); } // Compress color. @@ -359,10 +359,13 @@ void nv::compressDXT5n(const Image * image, const OutputOptions::Private & outpu rgba.swizzleDXT5n(); // Compress X. - uint error = compressBlock_Iterative(rgba, &block.alpha); if (compressionOptions.quality == Quality_Highest) { - error = compressBlock_BruteForce(rgba, &block.alpha); + compressBlock_BruteForce(rgba, &block.alpha); + } + else + { + QuickCompress::compressDXT5A(rgba, &block.alpha); } // Compress Y. @@ -384,23 +387,19 @@ void nv::compressBC4(const Image * image, const nvtt::OutputOptions::Private & o ColorBlock rgba; AlphaBlockDXT5 block; - uint totalError = 0; - for (uint y = 0; y < h; y += 4) { for (uint x = 0; x < w; x += 4) { rgba.init(image, x, y); - //error = compressBlock_BoundsRange(rgba, &block); - uint error = compressBlock_Iterative(rgba, &block); - if (compressionOptions.quality == Quality_Highest) { - // Try brute force algorithm. - error = compressBlock_BruteForce(rgba, &block); + compressBlock_BruteForce(rgba, &block); + } + else + { + QuickCompress::compressDXT5A(rgba, &block); } - - totalError += error; if (outputOptions.outputHandler != NULL) { outputOptions.outputHandler->writeData(&block, sizeof(block)); @@ -429,18 +428,15 @@ void nv::compressBC5(const Image * image, const nvtt::OutputOptions::Private & o ycolor.init(image, x, y); ycolor.splatY(); - // @@ Compute normal error, instead of separate xy errors. - uint xerror, yerror; - if (compressionOptions.quality == Quality_Highest) { - xerror = compressBlock_BruteForce(xcolor, &block.x); - yerror = compressBlock_BruteForce(ycolor, &block.y); + compressBlock_BruteForce(xcolor, &block.x); + compressBlock_BruteForce(ycolor, &block.y); } else { - xerror = compressBlock_Iterative(xcolor, &block.x); - yerror = compressBlock_Iterative(ycolor, &block.y); + QuickCompress::compressDXT5A(xcolor, &block.x); + QuickCompress::compressDXT5A(ycolor, &block.y); } if (outputOptions.outputHandler != NULL) { diff --git a/src/nvtt/FastCompressDXT.cpp b/src/nvtt/FastCompressDXT.cpp index 150290e..e27dd68 100644 --- a/src/nvtt/FastCompressDXT.cpp +++ b/src/nvtt/FastCompressDXT.cpp @@ -163,32 +163,6 @@ inline void vectorEnd() #endif -inline static uint paletteError(const ColorBlock & rgba, Color32 palette[4]) -{ - uint error = 0; - - const VectorColor vcolor0 = loadColor(palette[0]); - const VectorColor vcolor1 = loadColor(palette[1]); - const VectorColor vcolor2 = loadColor(palette[2]); - const VectorColor vcolor3 = loadColor(palette[3]); - - for(uint i = 0; i < 16; i++) { - const VectorColor vcolor = loadColor(rgba.color(i)); - - uint d0 = colorDistance(vcolor, vcolor0); - uint d1 = colorDistance(vcolor, vcolor1); - uint d2 = colorDistance(vcolor, vcolor2); - uint d3 = colorDistance(vcolor, vcolor3); - - error += min(min(d0, d1), min(d2, d3)); - } - - vectorEnd(); - return error; -} - - - inline static uint computeIndices(const ColorBlock & rgba, const Color32 palette[4]) { const VectorColor vcolor0 = loadColor(palette[0]); @@ -222,91 +196,6 @@ inline static uint computeIndices(const ColorBlock & rgba, const Color32 palette return indices; } -inline static uint computeIndicesAlpha(const ColorBlock & rgba, const Color32 palette[4]) -{ - const VectorColor vcolor0 = loadColor(palette[0]); - const VectorColor vcolor1 = loadColor(palette[1]); - const VectorColor vcolor2 = loadColor(palette[2]); - const VectorColor vcolor3 = loadColor(palette[3]); - - uint indices = 0; - for(int i = 0; i < 16; i++) { - const VectorColor vcolor = premultiplyAlpha(loadColor(rgba.color(i))); - - uint d0 = colorDistance(vcolor0, vcolor); - uint d1 = colorDistance(vcolor1, vcolor); - uint d2 = colorDistance(vcolor2, vcolor); - uint d3 = colorDistance(vcolor3, vcolor); - - uint b0 = d0 > d3; - uint b1 = d1 > d2; - uint b2 = d0 > d2; - uint b3 = d1 > d3; - uint b4 = d2 > d3; - - uint x0 = b1 & b2; - uint x1 = b0 & b3; - uint x2 = b0 & b4; - - indices |= (x2 | ((x0 | x1) << 1)) << (2 * i); - } - - vectorEnd(); - return indices; -} - - -inline static Color16 saturate16(int r, int g, int b) -{ - Color16 c; - c.r = clamp(0, 31, r); - c.g = clamp(0, 63, g); - c.b = clamp(0, 31, b); - return c; -} - - -// Compressor that uses the luminance axis. -void nv::compressBlock_LuminanceAxis(const ColorBlock & rgba, BlockDXT1 * block) -{ - Color32 c0, c1; - rgba.luminanceRange(&c0, &c1); - - block->col0 = toColor16(c0); - block->col1 = toColor16(c1); - - // Use 4 color mode only. - if (block->col0.u < block->col1.u) { - swap(block->col0.u, block->col1.u); - } - - Color32 palette[4]; - block->evaluatePalette4(palette); - - block->indices = computeIndices(rgba, palette); -} - - -// Compressor that uses diameter axis. -void nv::compressBlock_DiameterAxis(const ColorBlock & rgba, BlockDXT1 * block) -{ - Color32 c0, c1; - rgba.diameterRange(&c0, &c1); - - block->col0 = toColor16(c0); - block->col1 = toColor16(c1); - - // Use 4 color mode only. - if (block->col0.u < block->col1.u) { - swap(block->col0.u, block->col1.u); - } - - Color32 palette[4]; - block->evaluatePalette4(palette); - - block->indices = computeIndices(rgba, palette); -} - // Compressor that uses bounding box. void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT1 * block) @@ -330,513 +219,12 @@ void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT1 * block) block->indices = computeIndices(rgba, palette); } -// Compressor that uses bounding box and takes alpha into account. -void nv::compressBlock_BoundsRangeAlpha(const ColorBlock & rgba, BlockDXT1 * block) -{ - Color32 c0, c1; - rgba.boundsRange(&c1, &c0); - - if (rgba.hasAlpha()) - { - block->col0 = toColor16(c1); - block->col1 = toColor16(c0); - } - else - { - block->col0 = toColor16(c0); - block->col1 = toColor16(c1); - } - - Color32 palette[4]; - block->evaluatePalette(palette); - - block->indices = computeIndicesAlpha(rgba, palette); -} -// Compressor that tests all input color pairs. -void nv::compressBlock_TestAllPairs(const ColorBlock & rgba, BlockDXT1 * block) -{ - uint best_error = uint(-1); - Color16 best_col0, best_col1; - - Color32 palette[4]; - - // Test all color pairs. - for(uint i = 0; i < 16; i++) { - block->col0 = toColor16(rgba.color(i)); - - for(uint ii = 0; ii < 16; ii++) { - if( i != ii ) { - block->col1 = toColor16(rgba.color(ii)); - block->evaluatePalette(palette); - - const uint error = paletteError(rgba, palette); - if(error < best_error) { - best_error = error; - best_col0 = block->col0; - best_col1 = block->col1; - } - } - } - } - - block->col0 = best_col0; - block->col1 = best_col1; - block->evaluatePalette(palette); - - block->indices = computeIndices(rgba, palette); -} - - -// Improve palette iteratively using alternate 3d search as suggested by Dave Moore. -void nv::refineSolution_3dSearch(const ColorBlock & rgba, BlockDXT1 * block) -{ - Color32 palette[4]; - block->evaluatePalette(palette); - - uint best_error = paletteError(rgba, palette); - Color16 best_col0 = block->col0; - Color16 best_col1 = block->col1; - - const int W = 2; - - while(true) { - bool changed = false; - - const int r0 = best_col0.r; - const int g0 = best_col0.g; - const int b0 = best_col0.b; - - for(int z = -W; z <= W; z++) { - for(int y = -W; y <= W; y++) { - for(int x = -W; x <= W; x++) { - block->col0 = saturate16(r0 + x, g0 + y, b0 + z); - block->evaluatePalette(palette); - - const uint error = paletteError(rgba, palette); - if(error < best_error) { - best_error = error; - best_col0 = block->col0; - best_col1 = block->col1; - changed = true; - } - } - } - } - - const int r1 = best_col1.r; - const int g1 = best_col1.g; - const int b1 = best_col1.b; - - for(int z = -W; z <= W; z++) { - for(int y = -W; y <= W; y++) { - for(int x = -W; x <= W; x++) { - block->col1 = saturate16(r1 + x, g1 + y, b1 + z); - block->evaluatePalette(palette); - - const uint error = paletteError(rgba, palette); - if(error < best_error) { - best_error = error; - best_col0 = block->col0; - best_col1 = block->col1; - changed = true; - } - } - } - } - - if( !changed ) { - // Stop at local minima. - break; - } - } - - block->col0 = best_col0; - block->col1 = best_col1; - block->evaluatePalette(palette); - - block->indices = computeIndices(rgba, palette); -} - - -// Improve the palette iteratively using 6d search as suggested by Charles Bloom. -void nv::refineSolution_6dSearch(const ColorBlock & rgba, BlockDXT1 * block) -{ - Color32 palette[4]; - block->evaluatePalette(palette); - - uint best_error = paletteError(rgba, palette); - Color16 best_col0 = block->col0; - Color16 best_col1 = block->col1; - - const int W = 1; - - while(true) { - bool changed = false; - const int r0 = best_col0.r; - const int g0 = best_col0.g; - const int b0 = best_col0.b; - const int r1 = best_col1.r; - const int g1 = best_col1.g; - const int b1 = best_col1.b; - - for(int z0 = -W; z0 <= W; z0++) { - for(int y0 = -W; y0 <= W; y0++) { - for(int x0 = -W; x0 <= W; x0++) { - for(int z1 = -W; z1 <= W; z1++) { - for(int y1 = -W; y1 <= W; y1++) { - for(int x1 = -W; x1 <= W; x1++) { - - block->col0 = saturate16(r0 + x0, g0 + y0, b0 + z0); - block->col1 = saturate16(r1 + x1, g1 + y1, b1 + z1); - block->evaluatePalette(palette); - - const uint error = paletteError(rgba, palette); - if(error < best_error) { - best_error = error; - best_col0 = block->col0; - best_col1 = block->col1; - changed = true; - } - } - } - } - } - } - } - - if( !changed ) { - // Stop at local minima. - break; - } - } - - block->col0 = best_col0; - block->col1 = best_col1; - block->evaluatePalette(palette); - - block->indices = computeIndices(rgba, palette); -} -// Improve the palette iteratively using alternate 1d search as suggested by Walt Donovan. -void nv::refineSolution_1dSearch(const ColorBlock & rgba, BlockDXT1 * block) -{ - Color32 palette[4]; - block->evaluatePalette(palette); - - uint best_error = paletteError(rgba, palette); - Color16 best_col0 = block->col0; - Color16 best_col1 = block->col1; - - const int W = 4; - - while(true) { - bool changed = false; - - const int r0 = best_col0.r; - const int g0 = best_col0.g; - const int b0 = best_col0.b; - - for(int z = -W; z <= W; z++) { - block->col0.b = clamp(b0 + z, 0, 31); - block->evaluatePalette(palette); - - const uint error = paletteError(rgba, palette); - if(error < best_error) { - best_error = error; - best_col0 = block->col0; - best_col1 = block->col1; - changed = true; - } - } - - for(int y = -W; y <= W; y++) { - block->col0.g = clamp(g0 + y, 0, 63); - block->evaluatePalette(palette); - - const uint error = paletteError(rgba, palette); - if(error < best_error) { - best_error = error; - best_col0 = block->col0; - best_col1 = block->col1; - changed = true; - } - } - - for(int x = -W; x <= W; x++) { - block->col0.r = clamp(r0 + x, 0, 31); - block->evaluatePalette(palette); - - const uint error = paletteError(rgba, palette); - if(error < best_error) { - best_error = error; - best_col0 = block->col0; - best_col1 = block->col1; - changed = true; - } - } - - - const int r1 = best_col1.r; - const int g1 = best_col1.g; - const int b1 = best_col1.b; - - for(int z = -W; z <= W; z++) { - block->col1.b = clamp(b1 + z, 0, 31); - block->evaluatePalette(palette); - - const uint error = paletteError(rgba, palette); - if(error < best_error) { - best_error = error; - best_col0 = block->col0; - best_col1 = block->col1; - changed = true; - } - } - - for(int y = -W; y <= W; y++) { - block->col1.g = clamp(g1 + y, 0, 63); - block->evaluatePalette(palette); - - const uint error = paletteError(rgba, palette); - if(error < best_error) { - best_error = error; - best_col0 = block->col0; - best_col1 = block->col1; - changed = true; - } - } - - for(int x = -W; x <= W; x++) { - block->col1.r = clamp(r1 + x, 0, 31); - block->evaluatePalette(palette); - - const uint error = paletteError(rgba, palette); - if(error < best_error) { - best_error = error; - best_col0 = block->col0; - best_col1 = block->col1; - changed = true; - } - } - - if( !changed ) { - // Stop at local minima. - break; - } - } - - block->col0 = best_col0; - block->col1 = best_col1; - block->evaluatePalette(palette); - - block->indices = computeIndices(rgba, palette); -} - -static uint computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block) -{ - Color32 colors[4]; - block->evaluatePalette4(colors); - - uint totalError = 0; - - for (uint i = 0; i < 16; i++) - { - uint8 green = rgba.color(i).g; - - uint besterror = 256*256; - uint best; - for(uint p = 0; p < 4; p++) - { - int d = colors[p].g - green; - uint error = d * d; - - if (error < besterror) - { - besterror = error; - best = p; - } - } - - totalError += besterror; - } - - return totalError; -} - -// Brute force compressor for DXT5n -void nv::compressGreenBlock_BruteForce(const ColorBlock & rgba, BlockDXT1 * block) -{ - nvDebugCheck(block != NULL); - - uint8 ming = 63; - uint8 maxg = 0; - - // Get min/max green. - for (uint i = 0; i < 16; i++) - { - uint8 green = rgba.color(i).g >> 2; - ming = min(ming, green); - maxg = max(maxg, green); - } - - block->col0.r = 31; - block->col1.r = 31; - block->col0.g = maxg; - block->col1.g = ming; - block->col0.b = 0; - block->col1.b = 0; - - if (maxg - ming > 4) - { - int besterror = computeGreenError(rgba, block); - int bestg0 = maxg; - int bestg1 = ming; - - for (int g0 = ming+5; g0 < maxg; g0++) - { - for (int g1 = ming; g1 < g0-4; g1++) - { - if ((maxg-g0) + (g1-ming) > besterror) - continue; - - block->col0.g = g0; - block->col1.g = g1; - int error = computeGreenError(rgba, block); - - if (error < besterror) - { - besterror = error; - bestg0 = g0; - bestg1 = g1; - } - } - } - - block->col0.g = bestg0; - block->col1.g = bestg1; - } - - Color32 palette[4]; - block->evaluatePalette(palette); - block->indices = computeIndices(rgba, palette); -} - - - -uint nv::blockError(const ColorBlock & rgba, const BlockDXT1 & block) -{ - Color32 palette[4]; - block.evaluatePalette(palette); - - VectorColor vcolors[4]; - vcolors[0] = loadColor(palette[0]); - vcolors[1] = loadColor(palette[1]); - vcolors[2] = loadColor(palette[2]); - vcolors[3] = loadColor(palette[3]); - - uint error = 0; - for(uint i = 0; i < 16; i++) { - const VectorColor vcolor = loadColor(rgba.color(i)); - - int idx = (block.indices >> (2 * i)) & 3; - - uint d = colorDistance(vcolor, vcolors[idx]); - error += d; - } - - //nvDebugCheck(error == paletteError(rgba, palette)); - - vectorEnd(); - return error; -} - - -uint nv::blockError(const ColorBlock & rgba, const AlphaBlockDXT5 & block) -{ - uint8 palette[8]; - block.evaluatePalette(palette); - - uint8 indices[16]; - block.indices(indices); - - uint error = 0; - for(uint i = 0; i < 16; i++) { - int d = palette[indices[i]] - rgba.color(i).a; - error += uint(d * d); - } - - return error; -} - - - -void nv::optimizeEndPoints(const ColorBlock & rgba, BlockDXT1 * block) -{ - float alpha2_sum = 0.0f; - float beta2_sum = 0.0f; - float alphabeta_sum = 0.0f; - Vector3 alphax_sum(zero); - Vector3 betax_sum(zero); - - for( int i = 0; i < 16; ++i ) - { - const uint bits = block->indices >> (2 * i); - - float beta = float(bits & 1); - if (bits & 2) beta = (1 + beta) / 3.0f; - float alpha = 1.0f - beta; - - const Vector3 x = toVector4(rgba.color(i)).xyz(); - - alpha2_sum += alpha * alpha; - beta2_sum += beta * beta; - alphabeta_sum += alpha * beta; - alphax_sum += alpha * x; - betax_sum += beta * x; - } - - float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); - - Vector3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; - Vector3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; - - Vector3 zero(0, 0, 0); - Vector3 one(1, 1, 1); - a = min(one, max(zero, a)); - b = min(one, max(zero, b)); - - BlockDXT1 B; - - // Round a,b to 565. - B.col0.r = uint16(a.x() * 31); - B.col0.g = uint16(a.y() * 63); - B.col0.b = uint16(a.z() * 31); - B.col1.r = uint16(b.x() * 31); - B.col1.g = uint16(b.y() * 63); - B.col1.b = uint16(b.z() * 31); - B.indices = block->indices; - - // Force 4 color mode. - if (B.col0.u < B.col1.u) - { - swap(B.col0.u, B.col1.u); - B.indices ^= 0x55555555; - } - else if (B.col0.u == B.col1.u) - { - block->indices = 0; - } - - if (blockError(rgba, B) < blockError(rgba, *block)) - { - *block = B; - } -} // Encode DXT3 block. void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT3 * block) @@ -1064,161 +452,5 @@ uint nv::compressBlock_BruteForce(const ColorBlock & rgba, AlphaBlockDXT5 * bloc return computeAlphaIndices(rgba, block); } -static void optimizeAlpha8(const ColorBlock & rgba, AlphaBlockDXT5 * block) -{ - float alpha2_sum = 0; - float beta2_sum = 0; - float alphabeta_sum = 0; - float alphax_sum = 0; - float betax_sum = 0; - - for (int i = 0; i < 16; i++) - { - uint idx = block->index(i); - float alpha; - if (idx < 2) alpha = 1.0f - idx; - else alpha = (8.0f - idx) / 7.0f; - - float beta = 1 - alpha; - - alpha2_sum += alpha * alpha; - beta2_sum += beta * beta; - alphabeta_sum += alpha * beta; - alphax_sum += alpha * rgba.color(i).a; - betax_sum += beta * rgba.color(i).a; - } - - const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); - - float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; - float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; - - uint alpha0 = uint(min(max(a, 0.0f), 255.0f)); - uint alpha1 = uint(min(max(b, 0.0f), 255.0f)); - - if (alpha0 < alpha1) - { - swap(alpha0, alpha1); - - // Flip indices: - for (int i = 0; i < 16; i++) - { - uint idx = block->index(i); - if (idx < 2) block->setIndex(i, 1 - idx); - else block->setIndex(i, 9 - idx); - } - } - else if (alpha0 == alpha1) - { - for (int i = 0; i < 16; i++) - { - block->setIndex(i, 0); - } - } - - block->alpha0 = alpha0; - block->alpha1 = alpha1; -} -static void optimizeAlpha6(const ColorBlock & rgba, AlphaBlockDXT5 * block) -{ - float alpha2_sum = 0; - float beta2_sum = 0; - float alphabeta_sum = 0; - float alphax_sum = 0; - float betax_sum = 0; - - for (int i = 0; i < 16; i++) - { - uint8 x = rgba.color(i).a; - if (x == 0 || x == 255) continue; - - uint bits = block->index(i); - if (bits == 6 || bits == 7) continue; - - float alpha; - if (bits == 0) alpha = 1.0f; - else if (bits == 1) alpha = 0.0f; - else alpha = (6.0f - block->index(i)) / 5.0f; - - float beta = 1 - alpha; - - alpha2_sum += alpha * alpha; - beta2_sum += beta * beta; - alphabeta_sum += alpha * beta; - alphax_sum += alpha * x; - betax_sum += beta * x; - } - - const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); - - float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; - float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; - - uint alpha0 = uint(min(max(a, 0.0f), 255.0f)); - uint alpha1 = uint(min(max(b, 0.0f), 255.0f)); - - if (alpha0 > alpha1) - { - swap(alpha0, alpha1); - } - - block->alpha0 = alpha0; - block->alpha1 = alpha1; -} - - - -static bool sameIndices(const AlphaBlockDXT5 & block0, const AlphaBlockDXT5 & block1) -{ - const uint64 mask = ~uint64(0xFFFF); - return (block0.u | mask) == (block1.u | mask); -} - - -uint nv::compressBlock_Iterative(const ColorBlock & rgba, AlphaBlockDXT5 * resultblock) -{ - uint8 alpha0 = 0; - uint8 alpha1 = 255; - - // Get min/max alpha. - for (uint i = 0; i < 16; i++) - { - uint8 alpha = rgba.color(i).a; - alpha0 = max(alpha0, alpha); - alpha1 = min(alpha1, alpha); - } - - AlphaBlockDXT5 block; - block.alpha0 = alpha0 - (alpha0 - alpha1) / 34; - block.alpha1 = alpha1 + (alpha0 - alpha1) / 34; - uint besterror = computeAlphaIndices(rgba, &block); - - AlphaBlockDXT5 bestblock = block; - - while(true) - { - optimizeAlpha8(rgba, &block); - uint error = computeAlphaIndices(rgba, &block); - - if (error >= besterror) - { - // No improvement, stop. - break; - } - if (sameIndices(block, bestblock)) - { - bestblock = block; - break; - } - - besterror = error; - bestblock = block; - }; - - // Copy best block to result; - *resultblock = bestblock; - - return besterror; -} diff --git a/src/nvtt/FastCompressDXT.h b/src/nvtt/FastCompressDXT.h index 6ffe23a..4eca83d 100644 --- a/src/nvtt/FastCompressDXT.h +++ b/src/nvtt/FastCompressDXT.h @@ -38,37 +38,37 @@ namespace nv // Color compression: // Compressor that uses the extremes of the luminance axis. - void compressBlock_DiameterAxis(const ColorBlock & rgba, BlockDXT1 * block); +// void compressBlock_DiameterAxis(const ColorBlock & rgba, BlockDXT1 * block); // Compressor that uses the extremes of the luminance axis. - void compressBlock_LuminanceAxis(const ColorBlock & rgba, BlockDXT1 * block); +// void compressBlock_LuminanceAxis(const ColorBlock & rgba, BlockDXT1 * block); // Compressor that uses bounding box. void compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT1 * block); // Compressor that uses bounding box and takes alpha into account. - void compressBlock_BoundsRangeAlpha(const ColorBlock & rgba, BlockDXT1 * block); +// void compressBlock_BoundsRangeAlpha(const ColorBlock & rgba, BlockDXT1 * block); // Simple, but slow compressor that tests all color pairs. - void compressBlock_TestAllPairs(const ColorBlock & rgba, BlockDXT1 * block); +// void compressBlock_TestAllPairs(const ColorBlock & rgba, BlockDXT1 * block); // Brute force 6d search along the best fit axis. - void compressBlock_AnalyzeBestFitAxis(const ColorBlock & rgba, BlockDXT1 * block); +// void compressBlock_AnalyzeBestFitAxis(const ColorBlock & rgba, BlockDXT1 * block); // Spatial greedy search. - void refineSolution_1dSearch(const ColorBlock & rgba, BlockDXT1 * block); - void refineSolution_3dSearch(const ColorBlock & rgba, BlockDXT1 * block); - void refineSolution_6dSearch(const ColorBlock & rgba, BlockDXT1 * block); +// void refineSolution_1dSearch(const ColorBlock & rgba, BlockDXT1 * block); +// void refineSolution_3dSearch(const ColorBlock & rgba, BlockDXT1 * block); +// void refineSolution_6dSearch(const ColorBlock & rgba, BlockDXT1 * block); // Brute force compressor for DXT5n - void compressGreenBlock_BruteForce(const ColorBlock & rgba, BlockDXT1 * block); +// void compressGreenBlock_BruteForce(const ColorBlock & rgba, BlockDXT1 * block); // Minimize error of the endpoints. - void optimizeEndPoints(const ColorBlock & rgba, BlockDXT1 * block); +// void optimizeEndPoints(const ColorBlock & rgba, BlockDXT1 * block); - uint blockError(const ColorBlock & rgba, const BlockDXT1 & block); - uint blockError(const ColorBlock & rgba, const AlphaBlockDXT5 & block); +// uint blockError(const ColorBlock & rgba, const BlockDXT1 & block); +// uint blockError(const ColorBlock & rgba, const AlphaBlockDXT5 & block); // Alpha compression: void compressBlock(const ColorBlock & rgba, AlphaBlockDXT3 * block); @@ -77,7 +77,7 @@ namespace nv uint compressBlock_BoundsRange(const ColorBlock & rgba, AlphaBlockDXT5 * block); uint compressBlock_BruteForce(const ColorBlock & rgba, AlphaBlockDXT5 * block); - uint compressBlock_Iterative(const ColorBlock & rgba, AlphaBlockDXT5 * block); +// uint compressBlock_Iterative(const ColorBlock & rgba, AlphaBlockDXT5 * block); } // nv namespace diff --git a/src/nvtt/QuickCompressDXT.cpp b/src/nvtt/QuickCompressDXT.cpp index fda8165..5fe51ac 100644 --- a/src/nvtt/QuickCompressDXT.cpp +++ b/src/nvtt/QuickCompressDXT.cpp @@ -288,62 +288,219 @@ static void optimizeEndPoints4(Vector3 block[16], BlockDXT1 * dxtBlock) dxtBlock->indices = computeIndices3(block, a, b); }*/ - -static void optimizeAlpha8(const ColorBlock & rgba, AlphaBlockDXT5 * block) +namespace { - float alpha2_sum = 0; - float beta2_sum = 0; - float alphabeta_sum = 0; - float alphax_sum = 0; - float betax_sum = 0; - - for (int i = 0; i < 16; i++) + static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block) { - uint idx = block->index(i); - float alpha; - if (idx < 2) alpha = 1.0f - idx; - else alpha = (8.0f - idx) / 7.0f; - - float beta = 1 - alpha; - - alpha2_sum += alpha * alpha; - beta2_sum += beta * beta; - alphabeta_sum += alpha * beta; - alphax_sum += alpha * rgba.color(i).a; - betax_sum += beta * rgba.color(i).a; + nvDebugCheck(block != NULL); + + int palette[4]; + palette[0] = (block->col0.g << 2) | (block->col0.g >> 4); + palette[1] = (block->col1.g << 2) | (block->col1.g >> 4); + palette[2] = (2 * palette[0] + palette[1]) / 3; + palette[3] = (2 * palette[1] + palette[0]) / 3; + + int totalError = 0; + + for (int i = 0; i < 16; i++) + { + const int green = rgba.color(i).g; + + int error = abs(green - palette[0]); + error = min(error, abs(green - palette[1])); + error = min(error, abs(green - palette[2])); + error = min(error, abs(green - palette[3])); + + totalError += error; + } + + return totalError; } - const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); - - float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; - float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; - - uint alpha0 = uint(min(max(a, 0.0f), 255.0f)); - uint alpha1 = uint(min(max(b, 0.0f), 255.0f)); - - if (alpha0 < alpha1) + static uint computeGreenIndices(const ColorBlock & rgba, const Color32 palette[4]) { - swap(alpha0, alpha1); + const int color0 = palette[0].g; + const int color1 = palette[1].g; + const int color2 = palette[2].g; + const int color3 = palette[3].g; - // Flip indices: + uint indices = 0; + for (int i = 0; i < 16; i++) + { + const int color = rgba.color(i).g; + + uint d0 = abs(color0 - color); + uint d1 = abs(color1 - color); + uint d2 = abs(color2 - color); + uint d3 = abs(color3 - color); + + uint b0 = d0 > d3; + uint b1 = d1 > d2; + uint b2 = d0 > d2; + uint b3 = d1 > d3; + uint b4 = d2 > d3; + + uint x0 = b1 & b2; + uint x1 = b0 & b3; + uint x2 = b0 & b4; + + indices |= (x2 | ((x0 | x1) << 1)) << (2 * i); + } + + return indices; + } + +} // namespace + +namespace +{ + + static uint computeAlphaIndices(const ColorBlock & rgba, AlphaBlockDXT5 * block) + { + uint8 alphas[8]; + block->evaluatePalette(alphas); + + uint totalError = 0; + + for (uint i = 0; i < 16; i++) + { + uint8 alpha = rgba.color(i).a; + + uint besterror = 256*256; + uint best = 8; + for(uint p = 0; p < 8; p++) + { + int d = alphas[p] - alpha; + uint error = d * d; + + if (error < besterror) + { + besterror = error; + best = p; + } + } + nvDebugCheck(best < 8); + + totalError += besterror; + block->setIndex(i, best); + } + + return totalError; + } + + static void optimizeAlpha8(const ColorBlock & rgba, AlphaBlockDXT5 * block) + { + float alpha2_sum = 0; + float beta2_sum = 0; + float alphabeta_sum = 0; + float alphax_sum = 0; + float betax_sum = 0; + for (int i = 0; i < 16; i++) { uint idx = block->index(i); - if (idx < 2) block->setIndex(i, 1 - idx); - else block->setIndex(i, 9 - idx); + float alpha; + if (idx < 2) alpha = 1.0f - idx; + else alpha = (8.0f - idx) / 7.0f; + + float beta = 1 - alpha; + + alpha2_sum += alpha * alpha; + beta2_sum += beta * beta; + alphabeta_sum += alpha * beta; + alphax_sum += alpha * rgba.color(i).a; + betax_sum += beta * rgba.color(i).a; } - } - else if (alpha0 == alpha1) - { - for (int i = 0; i < 16; i++) + + const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); + + float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; + float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; + + uint alpha0 = uint(min(max(a, 0.0f), 255.0f)); + uint alpha1 = uint(min(max(b, 0.0f), 255.0f)); + + if (alpha0 < alpha1) { - block->setIndex(i, 0); + swap(alpha0, alpha1); + + // Flip indices: + for (int i = 0; i < 16; i++) + { + uint idx = block->index(i); + if (idx < 2) block->setIndex(i, 1 - idx); + else block->setIndex(i, 9 - idx); + } } + else if (alpha0 == alpha1) + { + for (int i = 0; i < 16; i++) + { + block->setIndex(i, 0); + } + } + + block->alpha0 = alpha0; + block->alpha1 = alpha1; } - block->alpha0 = alpha0; - block->alpha1 = alpha1; -} + /* + static void optimizeAlpha6(const ColorBlock & rgba, AlphaBlockDXT5 * block) + { + float alpha2_sum = 0; + float beta2_sum = 0; + float alphabeta_sum = 0; + float alphax_sum = 0; + float betax_sum = 0; + + for (int i = 0; i < 16; i++) + { + uint8 x = rgba.color(i).a; + if (x == 0 || x == 255) continue; + + uint bits = block->index(i); + if (bits == 6 || bits == 7) continue; + + float alpha; + if (bits == 0) alpha = 1.0f; + else if (bits == 1) alpha = 0.0f; + else alpha = (6.0f - block->index(i)) / 5.0f; + + float beta = 1 - alpha; + + alpha2_sum += alpha * alpha; + beta2_sum += beta * beta; + alphabeta_sum += alpha * beta; + alphax_sum += alpha * x; + betax_sum += beta * x; + } + + const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); + + float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; + float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; + + uint alpha0 = uint(min(max(a, 0.0f), 255.0f)); + uint alpha1 = uint(min(max(b, 0.0f), 255.0f)); + + if (alpha0 > alpha1) + { + swap(alpha0, alpha1); + } + + block->alpha0 = alpha0; + block->alpha1 = alpha1; + } + */ + + static bool sameIndices(const AlphaBlockDXT5 & block0, const AlphaBlockDXT5 & block1) + { + const uint64 mask = ~uint64(0xFFFF); + return (block0.u | mask) == (block1.u | mask); + } + +} // namespace + @@ -436,66 +593,6 @@ void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock) } -static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block) -{ - nvDebugCheck(block != NULL); - - int palette[4]; - palette[0] = (block->col0.g << 2) | (block->col0.g >> 4); - palette[1] = (block->col1.g << 2) | (block->col1.g >> 4); - palette[2] = (2 * palette[0] + palette[1]) / 3; - palette[3] = (2 * palette[1] + palette[0]) / 3; - - int totalError = 0; - - for (int i = 0; i < 16; i++) - { - const int green = rgba.color(i).g; - - int error = abs(green - palette[0]); - error = min(error, abs(green - palette[1])); - error = min(error, abs(green - palette[2])); - error = min(error, abs(green - palette[3])); - - totalError += error; - } - - return totalError; -} - -static uint computeGreenIndices(const ColorBlock & rgba, const Color32 palette[4]) -{ - const int color0 = palette[0].g; - const int color1 = palette[1].g; - const int color2 = palette[2].g; - const int color3 = palette[3].g; - - uint indices = 0; - for (int i = 0; i < 16; i++) - { - const int color = rgba.color(i).g; - - uint d0 = abs(color0 - color); - uint d1 = abs(color1 - color); - uint d2 = abs(color2 - color); - uint d3 = abs(color3 - color); - - uint b0 = d0 > d3; - uint b1 = d1 > d2; - uint b2 = d0 > d2; - uint b3 = d1 > d3; - uint b4 = d2 > d3; - - uint x0 = b1 & b2; - uint x1 = b0 & b3; - uint x2 = b0 & b4; - - indices |= (x2 | ((x0 | x1) << 1)) << (2 * i); - } - - return indices; -} - // Brute force green channel compressor void QuickCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block) { @@ -558,6 +655,7 @@ void QuickCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block) void QuickCompress::compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock) { + // @@ Round instead of truncate. When rounding take into account bit expansion. dxtBlock->alpha0 = rgba.color(0).a >> 4; dxtBlock->alpha1 = rgba.color(1).a >> 4; dxtBlock->alpha2 = rgba.color(2).a >> 4; @@ -582,9 +680,49 @@ void QuickCompress::compressDXT3(const ColorBlock & rgba, BlockDXT3 * dxtBlock) compressDXT3A(rgba, &dxtBlock->alpha); } + void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock) { - // @@ TODO + uint8 alpha0 = 0; + uint8 alpha1 = 255; + + // Get min/max alpha. + for (uint i = 0; i < 16; i++) + { + uint8 alpha = rgba.color(i).a; + alpha0 = max(alpha0, alpha); + alpha1 = min(alpha1, alpha); + } + + AlphaBlockDXT5 block; + block.alpha0 = alpha0 - (alpha0 - alpha1) / 34; + block.alpha1 = alpha1 + (alpha0 - alpha1) / 34; + uint besterror = computeAlphaIndices(rgba, &block); + + AlphaBlockDXT5 bestblock = block; + + while(true) + { + optimizeAlpha8(rgba, &block); + uint error = computeAlphaIndices(rgba, &block); + + if (error >= besterror) + { + // No improvement, stop. + break; + } + if (sameIndices(block, bestblock)) + { + bestblock = block; + break; + } + + besterror = error; + bestblock = block; + }; + + // Copy best block to result; + *dxtBlock = bestblock; } void QuickCompress::compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock) diff --git a/src/nvtt/cuda/CudaCompressDXT.cpp b/src/nvtt/cuda/CudaCompressDXT.cpp index 2231953..65af159 100644 --- a/src/nvtt/cuda/CudaCompressDXT.cpp +++ b/src/nvtt/cuda/CudaCompressDXT.cpp @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include "CudaCompressDXT.h" #include "CudaUtils.h" @@ -230,7 +230,7 @@ void CudaCompressor::compressDXT3(const Image * image, const OutputOptions::Priv for (uint i = 0; i < count; i++) { ColorBlock rgba(blockLinearImage + (bn + i) * 16); - compressBlock(rgba, alphaBlocks + i); + QuickCompress::compressDXT3A(rgba, alphaBlocks + i); } // Check for errors. @@ -314,7 +314,7 @@ void CudaCompressor::compressDXT5(const Image * image, const OutputOptions::Priv for (uint i = 0; i < count; i++) { ColorBlock rgba(blockLinearImage + (bn + i) * 16); - compressBlock_Iterative(rgba, alphaBlocks + i); + QuickCompress::compressDXT5A(rgba, alphaBlocks + i); } // Check for errors.