From 49409e9d92e93d2e423843edda6261c7d9e57d20 Mon Sep 17 00:00:00 2001
From: castano <castano@95f4ed2b-212e-0410-8b90-d31948207fce>
Date: Mon, 19 Jan 2009 10:42:31 +0000
Subject: [PATCH] Cleanup color rounding and expansion.

---
 src/nvtt/cuda/CompressKernel.cu | 44 +++++++++++++--------------------
 1 file changed, 17 insertions(+), 27 deletions(-)

diff --git a/src/nvtt/cuda/CompressKernel.cu b/src/nvtt/cuda/CompressKernel.cu
index f2846f8..4762d58 100644
--- a/src/nvtt/cuda/CompressKernel.cu
+++ b/src/nvtt/cuda/CompressKernel.cu
@@ -305,7 +305,6 @@ __device__ void loadColorBlock(const uint * image, float2 colors[16], float2 sum
 ////////////////////////////////////////////////////////////////////////////////
 inline __device__ float3 roundAndExpand565(float3 v, ushort * w)
 {
-#if 0
 	uint x = __float2uint_rn(__saturatef(v.x) * 31.0f);
 	uint y = __float2uint_rn(__saturatef(v.y) * 63.0f);
 	uint z = __float2uint_rn(__saturatef(v.z) * 31.0f);
@@ -313,35 +312,26 @@ inline __device__ float3 roundAndExpand565(float3 v, ushort * w)
 	v.x = __uint2float_rn(x) * 1.0f / 31.0f;
 	v.y = __uint2float_rn(y) * 1.0f / 63.0f;
 	v.z = __uint2float_rn(z) * 1.0f / 31.0f;
-#else
-	v.x = rintf(__saturatef(v.x) * 31.0f);
-	v.y = rintf(__saturatef(v.y) * 63.0f);
-	v.z = rintf(__saturatef(v.z) * 31.0f);
-	*w = ((ushort)v.x << 11) | ((ushort)v.y << 5) | (ushort)v.z;
-	v.x *= 1.0f / 31.0f;
-	v.y *= 1.0f / 63.0f;
-	v.z *= 1.0f / 31.0f;
-#endif
 	return v;
 }
 
 inline __device__ float2 roundAndExpand56(float2 v, ushort * w)
 {
-	v.x = rintf(__saturatef(v.x) * 31.0f);
-	v.y = rintf(__saturatef(v.y) * 63.0f);
-	*w = ((ushort)v.x << 11) | ((ushort)v.y << 5);
-	v.x *= 1.0f / 31.0f;
-	v.y *= 1.0f / 63.0f;
+	uint x = __float2uint_rn(__saturatef(v.x) * 31.0f);
+	uint y = __float2uint_rn(__saturatef(v.y) * 63.0f);
+	*w = (x << 11) | (y << 5);
+	v.x = __uint2float_rn(x) * 1.0f / 31.0f;
+	v.y = __uint2float_rn(y) * 1.0f / 63.0f;
 	return v;
 }
 
 inline __device__ float2 roundAndExpand88(float2 v, ushort * w)
 {
-	v.x = rintf(__saturatef(v.x) * 255.0f);
-	v.y = rintf(__saturatef(v.y) * 255.0f);
-	*w = ((ushort)v.x << 8) | ((ushort)v.y);
-	v.x *= 1.0f / 255.0f;
-	v.y *= 1.0f / 255.0f;
+	uint x = __float2uint_rn(__saturatef(v.x) * 255.0f);
+	uint y = __float2uint_rn(__saturatef(v.y) * 255.0f);
+	*w = (x << 8) | y;
+	v.x = __uint2float_rn(x) * 1.0f / 255.0f;
+	v.y = __uint2float_rn(y) * 1.0f / 255.0f;
 	return v;
 }
 
@@ -1551,13 +1541,13 @@ __global__ void compressDXT5(const uint * permutations, const uint * image, uint
 
 /*__device__ void evaluatePalette(uint alpha0, uint alpha1, uint alphas[8])
 {
-	alpha[0] = alpha0;
-	alpha[1] = alpha1;
-	alpha[2] = (6 * alpha[0] + 1 * alpha[1]) / 7;	// bit code 010
-	alpha[3] = (5 * alpha[0] + 2 * alpha[1]) / 7;	// bit code 011
-	alpha[4] = (4 * alpha[0] + 3 * alpha[1]) / 7;	// bit code 100
-	alpha[5] = (3 * alpha[0] + 4 * alpha[1]) / 7;	// bit code 101
-	alpha[6] = (2 * alpha[0] + 5 * alpha[1]) / 7;	// bit code 110
+	alpha[0] = alpha0;
+	alpha[1] = alpha1;
+	alpha[2] = (6 * alpha[0] + 1 * alpha[1]) / 7;	// bit code 010
+	alpha[3] = (5 * alpha[0] + 2 * alpha[1]) / 7;	// bit code 011
+	alpha[4] = (4 * alpha[0] + 3 * alpha[1]) / 7;	// bit code 100
+	alpha[5] = (3 * alpha[0] + 4 * alpha[1]) / 7;	// bit code 101
+	alpha[6] = (2 * alpha[0] + 5 * alpha[1]) / 7;	// bit code 110
 	alpha[7] = (1 * alpha[0] + 6 * alpha[1]) / 7;	// bit code 111
 }