Use tabs instead of spaces.

2008-01-18 23:51:20 +00:00 · 2008-01-18 23:51:20 +00:00 · 64020a4cb7
commit 64020a4cb7
parent c8ac20ee0f
1 changed files with 241 additions and 241 deletions
--- a/src/nvtt/cuda/CompressKernel.cu
+++ b/src/nvtt/cuda/CompressKernel.cu
@ -189,14 +189,14 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
 ////////////////////////////////////////////////////////////////////////////////
 inline __device__ float3 roundAndExpand(float3 v, ushort * w)
 {
-    v.x = rintf(__saturatef(v.x) * 31.0f);
+	v.x = rintf(__saturatef(v.x) * 31.0f);
-    v.y = rintf(__saturatef(v.y) * 63.0f);
+	v.y = rintf(__saturatef(v.y) * 63.0f);
-    v.z = rintf(__saturatef(v.z) * 31.0f);
+	v.z = rintf(__saturatef(v.z) * 31.0f);
-    *w = ((ushort)v.x << 11) | ((ushort)v.y << 5) | (ushort)v.z;
+	*w = ((ushort)v.x << 11) | ((ushort)v.y << 5) | (ushort)v.z;
-    v.x *= 0.03227752766457f; // approximate integer bit expansion.
+	v.x *= 0.03227752766457f; // approximate integer bit expansion.
-    v.y *= 0.01583151765563f;
+	v.y *= 0.01583151765563f;
-    v.z *= 0.03227752766457f;
+	v.z *= 0.03227752766457f;
-    return v;
+	return v;
 }
@ -205,82 +205,82 @@ inline __device__ float3 roundAndExpand(float3 v, ushort * w)
 ////////////////////////////////////////////////////////////////////////////////
 __device__ float evalPermutation4(const float3 * colors, uint permutation, ushort * start, ushort * end)
 {
-    // Compute endpoints using least squares.
+	// Compute endpoints using least squares.
-    float alpha2_sum = 0.0f;
+	float alpha2_sum = 0.0f;
-    float beta2_sum = 0.0f;
+	float beta2_sum = 0.0f;
-    float alphabeta_sum = 0.0f;
+	float alphabeta_sum = 0.0f;
-    float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
+	float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
-    float3 betax_sum = make_float3(0.0f, 0.0f, 0.0f);
+	float3 betax_sum = make_float3(0.0f, 0.0f, 0.0f);
-    // Compute alpha & beta for this permutation.
+	// Compute alpha & beta for this permutation.
-    for (int i = 0; i < 16; i++)
+	for (int i = 0; i < 16; i++)
-    {
+	{
 		const uint bits = permutation >> (2*i);
-        float beta = (bits & 1);
+		float beta = (bits & 1);
-        if (bits & 2) beta = (1 + beta) / 3.0f;
+		if (bits & 2) beta = (1 + beta) / 3.0f;
-        float alpha = 1.0f - beta;
+		float alpha = 1.0f - beta;
 		alpha2_sum += alpha * alpha;
 		beta2_sum += beta * beta;
 		alphabeta_sum += alpha * beta;
-        alphax_sum += alpha * colors[i];
+		alphax_sum += alpha * colors[i];
-        betax_sum += beta * colors[i];
+		betax_sum += beta * colors[i];
-    }
+	}
-    const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
+	const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
-    float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
+	float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
-    float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
+	float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
-    // Round a, b to the closest 5-6-5 color and expand...
+	// Round a, b to the closest 5-6-5 color and expand...
-    a = roundAndExpand(a, start);
+	a = roundAndExpand(a, start);
-    b = roundAndExpand(b, end);
+	b = roundAndExpand(b, end);
-    // compute the error
+	// compute the error
-    float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
+	float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
-    return dot(e, kColorMetricSqr);
+	return dot(e, kColorMetricSqr);
 }
 __device__ float evalPermutation3(const float3 * colors, uint permutation, ushort * start, ushort * end)
 {
-    // Compute endpoints using least squares.
+	// Compute endpoints using least squares.
-    float alpha2_sum = 0.0f;
+	float alpha2_sum = 0.0f;
-    float beta2_sum = 0.0f;
+	float beta2_sum = 0.0f;
-    float alphabeta_sum = 0.0f;
+	float alphabeta_sum = 0.0f;
-    float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
+	float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
-    float3 betax_sum = make_float3(0.0f, 0.0f, 0.0f);
+	float3 betax_sum = make_float3(0.0f, 0.0f, 0.0f);
-    // Compute alpha & beta for this permutation.
+	// Compute alpha & beta for this permutation.
-    for (int i = 0; i < 16; i++)
+	for (int i = 0; i < 16; i++)
-    {
+	{
-        const uint bits = permutation >> (2*i);
+		const uint bits = permutation >> (2*i);
-        float beta = (bits & 1);
+		float beta = (bits & 1);
-        if (bits & 2) beta = 0.5f;
+		if (bits & 2) beta = 0.5f;
-        float alpha = 1.0f - beta;
+		float alpha = 1.0f - beta;
-        alpha2_sum += alpha * alpha;
+		alpha2_sum += alpha * alpha;
-        beta2_sum += beta * beta;
+		beta2_sum += beta * beta;
-        alphabeta_sum += alpha * beta;
+		alphabeta_sum += alpha * beta;
-        alphax_sum += alpha * colors[i];
+		alphax_sum += alpha * colors[i];
-        betax_sum += beta * colors[i];
+		betax_sum += beta * colors[i];
-    }
+	}
-    const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
+	const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
-    float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
+	float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
-    float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
+	float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
-    // Round a, b to the closest 5-6-5 color and expand...
+	// Round a, b to the closest 5-6-5 color and expand...
-    a = roundAndExpand(a, start);
+	a = roundAndExpand(a, start);
-    b = roundAndExpand(b, end);
+	b = roundAndExpand(b, end);
-    // compute the error
+	// compute the error
-    float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
+	float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
-    return dot(e, kColorMetricSqr);
+	return dot(e, kColorMetricSqr);
 }
 __constant__ const float alphaTable4[4] = { 9.0f, 0.0f, 6.0f, 3.0f };
@ -290,114 +290,114 @@ __constant__ const uint prods3[4] = { 0x040000,0x000400,0x040101,0x010401 };
 __device__ float evalPermutation4(const float3 * colors, float3 color_sum, uint permutation, ushort * start, ushort * end)
 {
-    // Compute endpoints using least squares.
+	// Compute endpoints using least squares.
-    float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
+	float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
 	uint akku = 0;
-    // Compute alpha & beta for this permutation.
+	// Compute alpha & beta for this permutation.
 	#pragma unroll
-    for (int i = 0; i < 16; i++)
+	for (int i = 0; i < 16; i++)
-    {
+	{
 		const uint bits = permutation >> (2*i);
 		alphax_sum += alphaTable4[bits & 3] * colors[i];
-	    akku += prods4[bits & 3];
+		akku += prods4[bits & 3];
-    }
+	}
 	float alpha2_sum = float(akku >> 16);
 	float beta2_sum = float((akku >> 8) & 0xff);
 	float alphabeta_sum = float(akku & 0xff);
 	float3 betax_sum = 9.0f * color_sum - alphax_sum;
-    const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
+	const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
-    float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
+	float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
-    float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
+	float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
-    // Round a, b to the closest 5-6-5 color and expand...
+	// Round a, b to the closest 5-6-5 color and expand...
-    a = roundAndExpand(a, start);
+	a = roundAndExpand(a, start);
-    b = roundAndExpand(b, end);
+	b = roundAndExpand(b, end);
-    // compute the error
+	// compute the error
-    float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
+	float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
-    return (1.0f / 9.0f) * dot(e, kColorMetricSqr);
+	return (1.0f / 9.0f) * dot(e, kColorMetricSqr);
 }
 __device__ float evalPermutation3(const float3 * colors, float3 color_sum, uint permutation, ushort * start, ushort * end)
 {
-    // Compute endpoints using least squares.
+	// Compute endpoints using least squares.
-    float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
+	float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
 	uint akku = 0;
-    // Compute alpha & beta for this permutation.
+	// Compute alpha & beta for this permutation.
 	#pragma unroll
-    for (int i = 0; i < 16; i++)
+	for (int i = 0; i < 16; i++)
-    {
+	{
-        const uint bits = permutation >> (2*i);
+		const uint bits = permutation >> (2*i);
 		alphax_sum += alphaTable3[bits & 3] * colors[i];
-	    akku += prods3[bits & 3];
+		akku += prods3[bits & 3];
-    }
+	}
 	float alpha2_sum = float(akku >> 16);
 	float beta2_sum = float((akku >> 8) & 0xff);
 	float alphabeta_sum = float(akku & 0xff);
 	float3 betax_sum = 4.0f * color_sum - alphax_sum;
-    const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
+	const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
-    float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
+	float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
-    float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
+	float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
-    // Round a, b to the closest 5-6-5 color and expand...
+	// Round a, b to the closest 5-6-5 color and expand...
-    a = roundAndExpand(a, start);
+	a = roundAndExpand(a, start);
-    b = roundAndExpand(b, end);
+	b = roundAndExpand(b, end);
-    // compute the error
+	// compute the error
-    float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
+	float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
-    return (1.0f / 4.0f) * dot(e, kColorMetricSqr);
+	return (1.0f / 4.0f) * dot(e, kColorMetricSqr);
 }
 __device__ float evalPermutation4(const float3 * colors, const float * weights, float3 color_sum, uint permutation, ushort * start, ushort * end)
 {
-    // Compute endpoints using least squares.
+	// Compute endpoints using least squares.
-    float alpha2_sum = 0.0f;
+	float alpha2_sum = 0.0f;
-    float beta2_sum = 0.0f;
+	float beta2_sum = 0.0f;
-    float alphabeta_sum = 0.0f;
+	float alphabeta_sum = 0.0f;
-    float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
+	float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
-    // Compute alpha & beta for this permutation.
+	// Compute alpha & beta for this permutation.
-    for (int i = 0; i < 16; i++)
+	for (int i = 0; i < 16; i++)
-    {
+	{
-        const uint bits = permutation >> (2*i);
+		const uint bits = permutation >> (2*i);
-        float beta = (bits & 1);
+		float beta = (bits & 1);
-        if (bits & 2) beta = (1 + beta) / 3.0f;
+		if (bits & 2) beta = (1 + beta) / 3.0f;
-        float alpha = 1.0f - beta;
+		float alpha = 1.0f - beta;
 		alpha2_sum += alpha * alpha * weights[i];
-        beta2_sum += beta * beta * weights[i];
+		beta2_sum += beta * beta * weights[i];
-        alphabeta_sum += alpha * beta * weights[i];
+		alphabeta_sum += alpha * beta * weights[i];
-        alphax_sum += alpha * colors[i];
+		alphax_sum += alpha * colors[i];
-    }
+	}
 	float3 betax_sum = color_sum - alphax_sum;
-    const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
+	const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
-    float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
+	float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
-    float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
+	float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
-    // Round a, b to the closest 5-6-5 color and expand...
+	// Round a, b to the closest 5-6-5 color and expand...
-    a = roundAndExpand(a, start);
+	a = roundAndExpand(a, start);
-    b = roundAndExpand(b, end);
+	b = roundAndExpand(b, end);
-    // compute the error
+	// compute the error
-    float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
+	float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
-    return dot(e, kColorMetricSqr);
+	return dot(e, kColorMetricSqr);
 }
 /*
@ -454,55 +454,55 @@ __device__ void evalAllPermutations(const float3 * colors, float3 colorSum, cons
 	__shared__ uint s_permutations[160];
-    for(int i = 0; i < 16; i++)
+	for(int i = 0; i < 16; i++)
-    {
+	{
 		int pidx = idx + NUM_THREADS * i;
 		if (pidx >= 992) break;
-        ushort start, end;
+		ushort start, end;
-        uint permutation = permutations[pidx];
+		uint permutation = permutations[pidx];
-        if (pidx < 160) s_permutations[pidx] = permutation;
+		if (pidx < 160) s_permutations[pidx] = permutation;
-        float error = evalPermutation4(colors, colorSum, permutation, &start, &end);
+		float error = evalPermutation4(colors, colorSum, permutation, &start, &end);
-        if (error < bestError)
+		if (error < bestError)
-        {
+		{
-            bestError = error;
+			bestError = error;
-            bestPermutation = permutation;
+			bestPermutation = permutation;
-            bestStart = start;
+			bestStart = start;
-            bestEnd = end;
+			bestEnd = end;
-        }
+		}
-    }
+	}
-    if (bestStart < bestEnd)
+	if (bestStart < bestEnd)
-    {
+	{
-        swap(bestEnd, bestStart);
+		swap(bestEnd, bestStart);
-        bestPermutation ^= 0x55555555;    // Flip indices.
+		bestPermutation ^= 0x55555555;	// Flip indices.
-    }
+	}
-    for(int i = 0; i < 3; i++)
+	for(int i = 0; i < 3; i++)
-    {
+	{
 		int pidx = idx + NUM_THREADS * i;
-        if (pidx >= 160) break;
+		if (pidx >= 160) break;
-        ushort start, end;
+		ushort start, end;
-        uint permutation = s_permutations[pidx];
+		uint permutation = s_permutations[pidx];
-        float error = evalPermutation3(colors, colorSum, permutation, &start, &end);
+		float error = evalPermutation3(colors, colorSum, permutation, &start, &end);
-        if (error < bestError)
+		if (error < bestError)
-        {
+		{
-            bestError = error;
+			bestError = error;
-            bestPermutation = permutation;
+			bestPermutation = permutation;
-            bestStart = start;
+			bestStart = start;
-            bestEnd = end;
+			bestEnd = end;
-            if (bestStart > bestEnd)
+			if (bestStart > bestEnd)
-            {
+			{
-                swap(bestEnd, bestStart);
+				swap(bestEnd, bestStart);
-                bestPermutation ^= (~bestPermutation >> 1) & 0x55555555;    // Flip indices.
+				bestPermutation ^= (~bestPermutation >> 1) & 0x55555555;	// Flip indices.
-            }
+			}
-        }
+		}
-    }
+	}
 	errors[idx] = bestError;
 }
@ -516,55 +516,55 @@ __device__ void evalAllPermutations(const float3 * colors, const float * weights
 	__shared__ uint s_permutations[160];
-    for(int i = 0; i < 16; i++)
+	for(int i = 0; i < 16; i++)
-    {
+	{
 		int pidx = idx + NUM_THREADS * i;
 		if (pidx >= 992) break;
-        ushort start, end;
+		ushort start, end;
-        uint permutation = permutations[pidx];
+		uint permutation = permutations[pidx];
-        if (pidx < 160) s_permutations[pidx] = permutation;
+		if (pidx < 160) s_permutations[pidx] = permutation;
-        float error = evalPermutation4(colors, weights, permutation, &start, &end);
+		float error = evalPermutation4(colors, weights, permutation, &start, &end);
-        if (error < bestError)
+		if (error < bestError)
-        {
+		{
-            bestError = error;
+			bestError = error;
-            bestPermutation = permutation;
+			bestPermutation = permutation;
-            bestStart = start;
+			bestStart = start;
-            bestEnd = end;
+			bestEnd = end;
-        }
+		}
-    }
+	}
-    if (bestStart < bestEnd)
+	if (bestStart < bestEnd)
-    {
+	{
-        swap(bestEnd, bestStart);
+		swap(bestEnd, bestStart);
-        bestPermutation ^= 0x55555555;    // Flip indices.
+		bestPermutation ^= 0x55555555;	// Flip indices.
-    }
+	}
-    for(int i = 0; i < 3; i++)
+	for(int i = 0; i < 3; i++)
-    {
+	{
 		int pidx = idx + NUM_THREADS * i;
-        if (pidx >= 160) break;
+		if (pidx >= 160) break;
-        ushort start, end;
+		ushort start, end;
-        uint permutation = s_permutations[pidx];
+		uint permutation = s_permutations[pidx];
-        float error = evalPermutation3(colors, weights, permutation, &start, &end);
+		float error = evalPermutation3(colors, weights, permutation, &start, &end);
-        if (error < bestError)
+		if (error < bestError)
-        {
+		{
-            bestError = error;
+			bestError = error;
-            bestPermutation = permutation;
+			bestPermutation = permutation;
-            bestStart = start;
+			bestStart = start;
-            bestEnd = end;
+			bestEnd = end;
-            if (bestStart > bestEnd)
+			if (bestStart > bestEnd)
-            {
+			{
-                swap(bestEnd, bestStart);
+				swap(bestEnd, bestStart);
-                bestPermutation ^= (~bestPermutation >> 1) & 0x55555555;    // Flip indices.
+				bestPermutation ^= (~bestPermutation >> 1) & 0x55555555;	// Flip indices.
-            }
+			}
-        }
+		}
-    }
+	}
 	errors[idx] = bestError;
 }
@ -576,30 +576,30 @@ __device__ void evalLevel4Permutations(const float3 * colors, const float * weig
 	float bestError = FLT_MAX;
-    for(int i = 0; i < 16; i++)
+	for(int i = 0; i < 16; i++)
-    {
+	{
 		int pidx = idx + NUM_THREADS * i;
 		if (pidx >= 992) break;
-        ushort start, end;
+		ushort start, end;
-        uint permutation = permutations[pidx];
+		uint permutation = permutations[pidx];
-        float error = evalPermutation4(colors, weights, colorSum, permutation, &start, &end);
+		float error = evalPermutation4(colors, weights, colorSum, permutation, &start, &end);
-        if (error < bestError)
+		if (error < bestError)
-        {
+		{
-            bestError = error;
+			bestError = error;
-            bestPermutation = permutation;
+			bestPermutation = permutation;
-            bestStart = start;
+			bestStart = start;
-            bestEnd = end;
+			bestEnd = end;
-        }
+		}
-    }
+	}
-    if (bestStart < bestEnd)
+	if (bestStart < bestEnd)
-    {
+	{
-        swap(bestEnd, bestStart);
+		swap(bestEnd, bestStart);
-        bestPermutation ^= 0x55555555;    // Flip indices.
+		bestPermutation ^= 0x55555555;	// Flip indices.
-    }
+	}
 	errors[idx] = bestError;
 }
@ -812,7 +812,7 @@ __device__ float computeError(const float weights[16], uchar a0, uchar a1)
 inline __device__ uchar roundAndExpand(float a)
 {
-    return rintf(__saturatef(a) * 255.0f);
+	return rintf(__saturatef(a) * 255.0f);
 }
 */
 /*
@ -833,16 +833,16 @@ __device__ void optimizeAlpha8(const float alphas[16], uchar & a0, uchar & a1)
 		float beta = 1 - alpha;
-        alpha2_sum += alpha * alpha;
+		alpha2_sum += alpha * alpha;
-        beta2_sum += beta * beta;
+		beta2_sum += beta * beta;
-        alphabeta_sum += alpha * beta;
+		alphabeta_sum += alpha * beta;
-        alphax_sum += alpha * alphas[i];
+		alphax_sum += alpha * alphas[i];
-        betax_sum += beta * alphas[i];
+		betax_sum += beta * alphas[i];
 	}
-    const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
+	const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
-    float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
+	float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
 	float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
 	a0 = roundAndExpand(a);