Cleanup color rounding and expansion.

2009-01-19 10:42:31 +00:00
parent f753cc9702
commit 49409e9d92
1 changed files with 17 additions and 27 deletions
--- a/src/nvtt/cuda/CompressKernel.cu
+++ b/src/nvtt/cuda/CompressKernel.cu
@ -305,7 +305,6 @@ __device__ void loadColorBlock(const uint * image, float2 colors[16], float2 sum
 ////////////////////////////////////////////////////////////////////////////////
 inline __device__ float3 roundAndExpand565(float3 v, ushort * w)
 {
 #if 0
 	uint x = __float2uint_rn(__saturatef(v.x) * 31.0f);
 	uint y = __float2uint_rn(__saturatef(v.y) * 63.0f);
 	uint z = __float2uint_rn(__saturatef(v.z) * 31.0f);
@ -313,35 +312,26 @@ inline __device__ float3 roundAndExpand565(float3 v, ushort * w)
 	v.x = __uint2float_rn(x) * 1.0f / 31.0f;
 	v.y = __uint2float_rn(y) * 1.0f / 63.0f;
 	v.z = __uint2float_rn(z) * 1.0f / 31.0f;
 #else
 	v.x = rintf(__saturatef(v.x) * 31.0f);
 	v.y = rintf(__saturatef(v.y) * 63.0f);
 	v.z = rintf(__saturatef(v.z) * 31.0f);
 	*w = ((ushort)v.x << 11) | ((ushort)v.y << 5) | (ushort)v.z;
 	v.x *= 1.0f / 31.0f;
 	v.y *= 1.0f / 63.0f;
 	v.z *= 1.0f / 31.0f;
 #endif
 	return v;
 }
 inline __device__ float2 roundAndExpand56(float2 v, ushort * w)
 {
-	v.x = rintf(__saturatef(v.x) * 31.0f);
+	uint x = __float2uint_rn(__saturatef(v.x) * 31.0f);
-	v.y = rintf(__saturatef(v.y) * 63.0f);
+	uint y = __float2uint_rn(__saturatef(v.y) * 63.0f);
-	*w = ((ushort)v.x << 11) | ((ushort)v.y << 5);
+	*w = (x << 11) | (y << 5);
-	v.x *= 1.0f / 31.0f;
+	v.x = __uint2float_rn(x) * 1.0f / 31.0f;
-	v.y *= 1.0f / 63.0f;
+	v.y = __uint2float_rn(y) * 1.0f / 63.0f;
 	return v;
 }
 inline __device__ float2 roundAndExpand88(float2 v, ushort * w)
 {
-	v.x = rintf(__saturatef(v.x) * 255.0f);
+	uint x = __float2uint_rn(__saturatef(v.x) * 255.0f);
-	v.y = rintf(__saturatef(v.y) * 255.0f);
+	uint y = __float2uint_rn(__saturatef(v.y) * 255.0f);
-	*w = ((ushort)v.x << 8) | ((ushort)v.y);
+	*w = (x << 8) | y;
-	v.x *= 1.0f / 255.0f;
+	v.x = __uint2float_rn(x) * 1.0f / 255.0f;
-	v.y *= 1.0f / 255.0f;
+	v.y = __uint2float_rn(y) * 1.0f / 255.0f;
 	return v;
 }
@ -1551,13 +1541,13 @@ __global__ void compressDXT5(const uint * permutations, const uint * image, uint
 /*__device__ void evaluatePalette(uint alpha0, uint alpha1, uint alphas[8])
 {
-	alpha[0] = alpha0;
+	alpha[0] = alpha0;
-	alpha[1] = alpha1;
+	alpha[1] = alpha1;
-	alpha[2] = (6 * alpha[0] + 1 * alpha[1]) / 7;	// bit code 010
+	alpha[2] = (6 * alpha[0] + 1 * alpha[1]) / 7;	// bit code 010
-	alpha[3] = (5 * alpha[0] + 2 * alpha[1]) / 7;	// bit code 011
+	alpha[3] = (5 * alpha[0] + 2 * alpha[1]) / 7;	// bit code 011
-	alpha[4] = (4 * alpha[0] + 3 * alpha[1]) / 7;	// bit code 100
+	alpha[4] = (4 * alpha[0] + 3 * alpha[1]) / 7;	// bit code 100
-	alpha[5] = (3 * alpha[0] + 4 * alpha[1]) / 7;	// bit code 101
+	alpha[5] = (3 * alpha[0] + 4 * alpha[1]) / 7;	// bit code 101
-	alpha[6] = (2 * alpha[0] + 5 * alpha[1]) / 7;	// bit code 110
+	alpha[6] = (2 * alpha[0] + 5 * alpha[1]) / 7;	// bit code 110
 	alpha[7] = (1 * alpha[0] + 6 * alpha[1]) / 7;	// bit code 111
 }