diff --git a/src/nvtt/QuickCompressDXT.cpp b/src/nvtt/QuickCompressDXT.cpp index 78ee6b8..c4e0abc 100644 --- a/src/nvtt/QuickCompressDXT.cpp +++ b/src/nvtt/QuickCompressDXT.cpp @@ -359,6 +359,12 @@ void QuickCompress::compressDXT1(Color32 c, BlockDXT1 * dxtBlock) dxtBlock->col1.g = OMatch6[c.g][1]; dxtBlock->col1.b = OMatch5[c.b][1]; dxtBlock->indices = 0xaaaaaaaa; + + if (dxtBlock->col0.u < dxtBlock->col1.u) + { + swap(dxtBlock->col0.u, dxtBlock->col1.u); + dxtBlock->indices ^= 0x55555555; + } } void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock) diff --git a/src/nvtt/cuda/CompressKernel.cu b/src/nvtt/cuda/CompressKernel.cu index 42c3144..08c331a 100644 --- a/src/nvtt/cuda/CompressKernel.cu +++ b/src/nvtt/cuda/CompressKernel.cu @@ -1012,8 +1012,16 @@ __device__ void saveSingleColorBlockDXT1(float3 color, uint2 * result) ushort color0 = (OMatch5[r][0] << 11) | (OMatch6[g][0] << 5) | OMatch5[b][0]; ushort color1 = (OMatch5[r][1] << 11) | (OMatch6[g][1] << 5) | OMatch5[b][1]; - result[bid].x = (color1 << 16) | color0; - result[bid].y = 0xaaaaaaaa; + if (color0 < color1) + { + result[bid].x = (color0 << 16) | color1; + result[bid].y = 0xffffffff; + } + else + { + result[bid].x = (color1 << 16) | color0; + result[bid].y = 0xaaaaaaaa; + } } @@ -1029,14 +1037,14 @@ __global__ void compressDXT1(const uint * permutations, const uint * image, uint loadColorBlock(image, colors, sums, xrefs, &sameColor); + __syncthreads(); + if (sameColor) { if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result); return; } - __syncthreads(); - ushort bestStart, bestEnd; uint bestPermutation; diff --git a/src/nvtt/tests/stress.cpp b/src/nvtt/tests/stress.cpp index 526bcfa..ea86d1b 100644 --- a/src/nvtt/tests/stress.cpp +++ b/src/nvtt/tests/stress.cpp @@ -31,8 +31,8 @@ #define FRAME_COUNT 1000 -#define WIDTH 1024 -#define HEIGHT 1024 +#define WIDTH 2048 +#define HEIGHT 2048 #define INPUT_SIZE (WIDTH*HEIGHT) #define OUTPUT_SIZE (WIDTH*HEIGHT/16*2) @@ -180,20 +180,20 @@ int main(int argc, char *argv[]) //precomp(); nvtt::InputOptions inputOptions; - inputOptions.setTextureLayout(nvtt::TextureType_2D, 1024, 1024); + inputOptions.setTextureLayout(nvtt::TextureType_2D, WIDTH, HEIGHT); - for (int i = 0; i < 1024 * 1024; i++) + for (int i = 0; i < INPUT_SIZE; i++) { s_input[i] = rand(); } - inputOptions.setMipmapData(s_input, 1024, 1024); + inputOptions.setMipmapData(s_input, WIDTH, HEIGHT); inputOptions.setMipmapGeneration(false); nvtt::CompressionOptions compressionOptions; -// compressionOptions.setFormat(nvtt::Format_DXT1); + compressionOptions.setFormat(nvtt::Format_DXT1); // compressionOptions.setFormat(nvtt::Format_DXT1n); - compressionOptions.setFormat(nvtt::Format_CTX1); +// compressionOptions.setFormat(nvtt::Format_CTX1); nvtt::OutputOptions outputOptions; outputOptions.setOutputHeader(false);