From 49409e9d92e93d2e423843edda6261c7d9e57d20 Mon Sep 17 00:00:00 2001 From: castano Date: Mon, 19 Jan 2009 10:42:31 +0000 Subject: [PATCH] Cleanup color rounding and expansion. --- src/nvtt/cuda/CompressKernel.cu | 44 +++++++++++++-------------------- 1 file changed, 17 insertions(+), 27 deletions(-) diff --git a/src/nvtt/cuda/CompressKernel.cu b/src/nvtt/cuda/CompressKernel.cu index f2846f8..4762d58 100644 --- a/src/nvtt/cuda/CompressKernel.cu +++ b/src/nvtt/cuda/CompressKernel.cu @@ -305,7 +305,6 @@ __device__ void loadColorBlock(const uint * image, float2 colors[16], float2 sum //////////////////////////////////////////////////////////////////////////////// inline __device__ float3 roundAndExpand565(float3 v, ushort * w) { -#if 0 uint x = __float2uint_rn(__saturatef(v.x) * 31.0f); uint y = __float2uint_rn(__saturatef(v.y) * 63.0f); uint z = __float2uint_rn(__saturatef(v.z) * 31.0f); @@ -313,35 +312,26 @@ inline __device__ float3 roundAndExpand565(float3 v, ushort * w) v.x = __uint2float_rn(x) * 1.0f / 31.0f; v.y = __uint2float_rn(y) * 1.0f / 63.0f; v.z = __uint2float_rn(z) * 1.0f / 31.0f; -#else - v.x = rintf(__saturatef(v.x) * 31.0f); - v.y = rintf(__saturatef(v.y) * 63.0f); - v.z = rintf(__saturatef(v.z) * 31.0f); - *w = ((ushort)v.x << 11) | ((ushort)v.y << 5) | (ushort)v.z; - v.x *= 1.0f / 31.0f; - v.y *= 1.0f / 63.0f; - v.z *= 1.0f / 31.0f; -#endif return v; } inline __device__ float2 roundAndExpand56(float2 v, ushort * w) { - v.x = rintf(__saturatef(v.x) * 31.0f); - v.y = rintf(__saturatef(v.y) * 63.0f); - *w = ((ushort)v.x << 11) | ((ushort)v.y << 5); - v.x *= 1.0f / 31.0f; - v.y *= 1.0f / 63.0f; + uint x = __float2uint_rn(__saturatef(v.x) * 31.0f); + uint y = __float2uint_rn(__saturatef(v.y) * 63.0f); + *w = (x << 11) | (y << 5); + v.x = __uint2float_rn(x) * 1.0f / 31.0f; + v.y = __uint2float_rn(y) * 1.0f / 63.0f; return v; } inline __device__ float2 roundAndExpand88(float2 v, ushort * w) { - v.x = rintf(__saturatef(v.x) * 255.0f); - v.y = rintf(__saturatef(v.y) * 255.0f); - *w = ((ushort)v.x << 8) | ((ushort)v.y); - v.x *= 1.0f / 255.0f; - v.y *= 1.0f / 255.0f; + uint x = __float2uint_rn(__saturatef(v.x) * 255.0f); + uint y = __float2uint_rn(__saturatef(v.y) * 255.0f); + *w = (x << 8) | y; + v.x = __uint2float_rn(x) * 1.0f / 255.0f; + v.y = __uint2float_rn(y) * 1.0f / 255.0f; return v; } @@ -1551,13 +1541,13 @@ __global__ void compressDXT5(const uint * permutations, const uint * image, uint /*__device__ void evaluatePalette(uint alpha0, uint alpha1, uint alphas[8]) { - alpha[0] = alpha0; - alpha[1] = alpha1; - alpha[2] = (6 * alpha[0] + 1 * alpha[1]) / 7; // bit code 010 - alpha[3] = (5 * alpha[0] + 2 * alpha[1]) / 7; // bit code 011 - alpha[4] = (4 * alpha[0] + 3 * alpha[1]) / 7; // bit code 100 - alpha[5] = (3 * alpha[0] + 4 * alpha[1]) / 7; // bit code 101 - alpha[6] = (2 * alpha[0] + 5 * alpha[1]) / 7; // bit code 110 + alpha[0] = alpha0; + alpha[1] = alpha1; + alpha[2] = (6 * alpha[0] + 1 * alpha[1]) / 7; // bit code 010 + alpha[3] = (5 * alpha[0] + 2 * alpha[1]) / 7; // bit code 011 + alpha[4] = (4 * alpha[0] + 3 * alpha[1]) / 7; // bit code 100 + alpha[5] = (3 * alpha[0] + 4 * alpha[1]) / 7; // bit code 101 + alpha[6] = (2 * alpha[0] + 5 * alpha[1]) / 7; // bit code 110 alpha[7] = (1 * alpha[0] + 6 * alpha[1]) / 7; // bit code 111 }