Use tabs instead of spaces.

This commit is contained in:
castano 2008-01-18 23:51:20 +00:00
parent c8ac20ee0f
commit 64020a4cb7

View File

@ -189,14 +189,14 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
inline __device__ float3 roundAndExpand(float3 v, ushort * w) inline __device__ float3 roundAndExpand(float3 v, ushort * w)
{ {
v.x = rintf(__saturatef(v.x) * 31.0f); v.x = rintf(__saturatef(v.x) * 31.0f);
v.y = rintf(__saturatef(v.y) * 63.0f); v.y = rintf(__saturatef(v.y) * 63.0f);
v.z = rintf(__saturatef(v.z) * 31.0f); v.z = rintf(__saturatef(v.z) * 31.0f);
*w = ((ushort)v.x << 11) | ((ushort)v.y << 5) | (ushort)v.z; *w = ((ushort)v.x << 11) | ((ushort)v.y << 5) | (ushort)v.z;
v.x *= 0.03227752766457f; // approximate integer bit expansion. v.x *= 0.03227752766457f; // approximate integer bit expansion.
v.y *= 0.01583151765563f; v.y *= 0.01583151765563f;
v.z *= 0.03227752766457f; v.z *= 0.03227752766457f;
return v; return v;
} }
@ -205,82 +205,82 @@ inline __device__ float3 roundAndExpand(float3 v, ushort * w)
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
__device__ float evalPermutation4(const float3 * colors, uint permutation, ushort * start, ushort * end) __device__ float evalPermutation4(const float3 * colors, uint permutation, ushort * start, ushort * end)
{ {
// Compute endpoints using least squares. // Compute endpoints using least squares.
float alpha2_sum = 0.0f; float alpha2_sum = 0.0f;
float beta2_sum = 0.0f; float beta2_sum = 0.0f;
float alphabeta_sum = 0.0f; float alphabeta_sum = 0.0f;
float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f); float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
float3 betax_sum = make_float3(0.0f, 0.0f, 0.0f); float3 betax_sum = make_float3(0.0f, 0.0f, 0.0f);
// Compute alpha & beta for this permutation. // Compute alpha & beta for this permutation.
for (int i = 0; i < 16; i++) for (int i = 0; i < 16; i++)
{ {
const uint bits = permutation >> (2*i); const uint bits = permutation >> (2*i);
float beta = (bits & 1); float beta = (bits & 1);
if (bits & 2) beta = (1 + beta) / 3.0f; if (bits & 2) beta = (1 + beta) / 3.0f;
float alpha = 1.0f - beta; float alpha = 1.0f - beta;
alpha2_sum += alpha * alpha; alpha2_sum += alpha * alpha;
beta2_sum += beta * beta; beta2_sum += beta * beta;
alphabeta_sum += alpha * beta; alphabeta_sum += alpha * beta;
alphax_sum += alpha * colors[i]; alphax_sum += alpha * colors[i];
betax_sum += beta * colors[i]; betax_sum += beta * colors[i];
} }
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
// Round a, b to the closest 5-6-5 color and expand... // Round a, b to the closest 5-6-5 color and expand...
a = roundAndExpand(a, start); a = roundAndExpand(a, start);
b = roundAndExpand(b, end); b = roundAndExpand(b, end);
// compute the error // compute the error
float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum); float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
return dot(e, kColorMetricSqr); return dot(e, kColorMetricSqr);
} }
__device__ float evalPermutation3(const float3 * colors, uint permutation, ushort * start, ushort * end) __device__ float evalPermutation3(const float3 * colors, uint permutation, ushort * start, ushort * end)
{ {
// Compute endpoints using least squares. // Compute endpoints using least squares.
float alpha2_sum = 0.0f; float alpha2_sum = 0.0f;
float beta2_sum = 0.0f; float beta2_sum = 0.0f;
float alphabeta_sum = 0.0f; float alphabeta_sum = 0.0f;
float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f); float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
float3 betax_sum = make_float3(0.0f, 0.0f, 0.0f); float3 betax_sum = make_float3(0.0f, 0.0f, 0.0f);
// Compute alpha & beta for this permutation. // Compute alpha & beta for this permutation.
for (int i = 0; i < 16; i++) for (int i = 0; i < 16; i++)
{ {
const uint bits = permutation >> (2*i); const uint bits = permutation >> (2*i);
float beta = (bits & 1); float beta = (bits & 1);
if (bits & 2) beta = 0.5f; if (bits & 2) beta = 0.5f;
float alpha = 1.0f - beta; float alpha = 1.0f - beta;
alpha2_sum += alpha * alpha; alpha2_sum += alpha * alpha;
beta2_sum += beta * beta; beta2_sum += beta * beta;
alphabeta_sum += alpha * beta; alphabeta_sum += alpha * beta;
alphax_sum += alpha * colors[i]; alphax_sum += alpha * colors[i];
betax_sum += beta * colors[i]; betax_sum += beta * colors[i];
} }
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
// Round a, b to the closest 5-6-5 color and expand... // Round a, b to the closest 5-6-5 color and expand...
a = roundAndExpand(a, start); a = roundAndExpand(a, start);
b = roundAndExpand(b, end); b = roundAndExpand(b, end);
// compute the error // compute the error
float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum); float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
return dot(e, kColorMetricSqr); return dot(e, kColorMetricSqr);
} }
__constant__ const float alphaTable4[4] = { 9.0f, 0.0f, 6.0f, 3.0f }; __constant__ const float alphaTable4[4] = { 9.0f, 0.0f, 6.0f, 3.0f };
@ -290,114 +290,114 @@ __constant__ const uint prods3[4] = { 0x040000,0x000400,0x040101,0x010401 };
__device__ float evalPermutation4(const float3 * colors, float3 color_sum, uint permutation, ushort * start, ushort * end) __device__ float evalPermutation4(const float3 * colors, float3 color_sum, uint permutation, ushort * start, ushort * end)
{ {
// Compute endpoints using least squares. // Compute endpoints using least squares.
float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f); float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
uint akku = 0; uint akku = 0;
// Compute alpha & beta for this permutation. // Compute alpha & beta for this permutation.
#pragma unroll #pragma unroll
for (int i = 0; i < 16; i++) for (int i = 0; i < 16; i++)
{ {
const uint bits = permutation >> (2*i); const uint bits = permutation >> (2*i);
alphax_sum += alphaTable4[bits & 3] * colors[i]; alphax_sum += alphaTable4[bits & 3] * colors[i];
akku += prods4[bits & 3]; akku += prods4[bits & 3];
} }
float alpha2_sum = float(akku >> 16); float alpha2_sum = float(akku >> 16);
float beta2_sum = float((akku >> 8) & 0xff); float beta2_sum = float((akku >> 8) & 0xff);
float alphabeta_sum = float(akku & 0xff); float alphabeta_sum = float(akku & 0xff);
float3 betax_sum = 9.0f * color_sum - alphax_sum; float3 betax_sum = 9.0f * color_sum - alphax_sum;
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
// Round a, b to the closest 5-6-5 color and expand... // Round a, b to the closest 5-6-5 color and expand...
a = roundAndExpand(a, start); a = roundAndExpand(a, start);
b = roundAndExpand(b, end); b = roundAndExpand(b, end);
// compute the error // compute the error
float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum); float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
return (1.0f / 9.0f) * dot(e, kColorMetricSqr); return (1.0f / 9.0f) * dot(e, kColorMetricSqr);
} }
__device__ float evalPermutation3(const float3 * colors, float3 color_sum, uint permutation, ushort * start, ushort * end) __device__ float evalPermutation3(const float3 * colors, float3 color_sum, uint permutation, ushort * start, ushort * end)
{ {
// Compute endpoints using least squares. // Compute endpoints using least squares.
float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f); float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
uint akku = 0; uint akku = 0;
// Compute alpha & beta for this permutation. // Compute alpha & beta for this permutation.
#pragma unroll #pragma unroll
for (int i = 0; i < 16; i++) for (int i = 0; i < 16; i++)
{ {
const uint bits = permutation >> (2*i); const uint bits = permutation >> (2*i);
alphax_sum += alphaTable3[bits & 3] * colors[i]; alphax_sum += alphaTable3[bits & 3] * colors[i];
akku += prods3[bits & 3]; akku += prods3[bits & 3];
} }
float alpha2_sum = float(akku >> 16); float alpha2_sum = float(akku >> 16);
float beta2_sum = float((akku >> 8) & 0xff); float beta2_sum = float((akku >> 8) & 0xff);
float alphabeta_sum = float(akku & 0xff); float alphabeta_sum = float(akku & 0xff);
float3 betax_sum = 4.0f * color_sum - alphax_sum; float3 betax_sum = 4.0f * color_sum - alphax_sum;
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
// Round a, b to the closest 5-6-5 color and expand... // Round a, b to the closest 5-6-5 color and expand...
a = roundAndExpand(a, start); a = roundAndExpand(a, start);
b = roundAndExpand(b, end); b = roundAndExpand(b, end);
// compute the error // compute the error
float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum); float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
return (1.0f / 4.0f) * dot(e, kColorMetricSqr); return (1.0f / 4.0f) * dot(e, kColorMetricSqr);
} }
__device__ float evalPermutation4(const float3 * colors, const float * weights, float3 color_sum, uint permutation, ushort * start, ushort * end) __device__ float evalPermutation4(const float3 * colors, const float * weights, float3 color_sum, uint permutation, ushort * start, ushort * end)
{ {
// Compute endpoints using least squares. // Compute endpoints using least squares.
float alpha2_sum = 0.0f; float alpha2_sum = 0.0f;
float beta2_sum = 0.0f; float beta2_sum = 0.0f;
float alphabeta_sum = 0.0f; float alphabeta_sum = 0.0f;
float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f); float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
// Compute alpha & beta for this permutation. // Compute alpha & beta for this permutation.
for (int i = 0; i < 16; i++) for (int i = 0; i < 16; i++)
{ {
const uint bits = permutation >> (2*i); const uint bits = permutation >> (2*i);
float beta = (bits & 1); float beta = (bits & 1);
if (bits & 2) beta = (1 + beta) / 3.0f; if (bits & 2) beta = (1 + beta) / 3.0f;
float alpha = 1.0f - beta; float alpha = 1.0f - beta;
alpha2_sum += alpha * alpha * weights[i]; alpha2_sum += alpha * alpha * weights[i];
beta2_sum += beta * beta * weights[i]; beta2_sum += beta * beta * weights[i];
alphabeta_sum += alpha * beta * weights[i]; alphabeta_sum += alpha * beta * weights[i];
alphax_sum += alpha * colors[i]; alphax_sum += alpha * colors[i];
} }
float3 betax_sum = color_sum - alphax_sum; float3 betax_sum = color_sum - alphax_sum;
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
// Round a, b to the closest 5-6-5 color and expand... // Round a, b to the closest 5-6-5 color and expand...
a = roundAndExpand(a, start); a = roundAndExpand(a, start);
b = roundAndExpand(b, end); b = roundAndExpand(b, end);
// compute the error // compute the error
float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum); float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
return dot(e, kColorMetricSqr); return dot(e, kColorMetricSqr);
} }
/* /*
@ -454,55 +454,55 @@ __device__ void evalAllPermutations(const float3 * colors, float3 colorSum, cons
__shared__ uint s_permutations[160]; __shared__ uint s_permutations[160];
for(int i = 0; i < 16; i++) for(int i = 0; i < 16; i++)
{ {
int pidx = idx + NUM_THREADS * i; int pidx = idx + NUM_THREADS * i;
if (pidx >= 992) break; if (pidx >= 992) break;
ushort start, end; ushort start, end;
uint permutation = permutations[pidx]; uint permutation = permutations[pidx];
if (pidx < 160) s_permutations[pidx] = permutation; if (pidx < 160) s_permutations[pidx] = permutation;
float error = evalPermutation4(colors, colorSum, permutation, &start, &end); float error = evalPermutation4(colors, colorSum, permutation, &start, &end);
if (error < bestError) if (error < bestError)
{ {
bestError = error; bestError = error;
bestPermutation = permutation; bestPermutation = permutation;
bestStart = start; bestStart = start;
bestEnd = end; bestEnd = end;
} }
} }
if (bestStart < bestEnd) if (bestStart < bestEnd)
{ {
swap(bestEnd, bestStart); swap(bestEnd, bestStart);
bestPermutation ^= 0x55555555; // Flip indices. bestPermutation ^= 0x55555555; // Flip indices.
} }
for(int i = 0; i < 3; i++) for(int i = 0; i < 3; i++)
{ {
int pidx = idx + NUM_THREADS * i; int pidx = idx + NUM_THREADS * i;
if (pidx >= 160) break; if (pidx >= 160) break;
ushort start, end; ushort start, end;
uint permutation = s_permutations[pidx]; uint permutation = s_permutations[pidx];
float error = evalPermutation3(colors, colorSum, permutation, &start, &end); float error = evalPermutation3(colors, colorSum, permutation, &start, &end);
if (error < bestError) if (error < bestError)
{ {
bestError = error; bestError = error;
bestPermutation = permutation; bestPermutation = permutation;
bestStart = start; bestStart = start;
bestEnd = end; bestEnd = end;
if (bestStart > bestEnd) if (bestStart > bestEnd)
{ {
swap(bestEnd, bestStart); swap(bestEnd, bestStart);
bestPermutation ^= (~bestPermutation >> 1) & 0x55555555; // Flip indices. bestPermutation ^= (~bestPermutation >> 1) & 0x55555555; // Flip indices.
} }
} }
} }
errors[idx] = bestError; errors[idx] = bestError;
} }
@ -516,55 +516,55 @@ __device__ void evalAllPermutations(const float3 * colors, const float * weights
__shared__ uint s_permutations[160]; __shared__ uint s_permutations[160];
for(int i = 0; i < 16; i++) for(int i = 0; i < 16; i++)
{ {
int pidx = idx + NUM_THREADS * i; int pidx = idx + NUM_THREADS * i;
if (pidx >= 992) break; if (pidx >= 992) break;
ushort start, end; ushort start, end;
uint permutation = permutations[pidx]; uint permutation = permutations[pidx];
if (pidx < 160) s_permutations[pidx] = permutation; if (pidx < 160) s_permutations[pidx] = permutation;
float error = evalPermutation4(colors, weights, permutation, &start, &end); float error = evalPermutation4(colors, weights, permutation, &start, &end);
if (error < bestError) if (error < bestError)
{ {
bestError = error; bestError = error;
bestPermutation = permutation; bestPermutation = permutation;
bestStart = start; bestStart = start;
bestEnd = end; bestEnd = end;
} }
} }
if (bestStart < bestEnd) if (bestStart < bestEnd)
{ {
swap(bestEnd, bestStart); swap(bestEnd, bestStart);
bestPermutation ^= 0x55555555; // Flip indices. bestPermutation ^= 0x55555555; // Flip indices.
} }
for(int i = 0; i < 3; i++) for(int i = 0; i < 3; i++)
{ {
int pidx = idx + NUM_THREADS * i; int pidx = idx + NUM_THREADS * i;
if (pidx >= 160) break; if (pidx >= 160) break;
ushort start, end; ushort start, end;
uint permutation = s_permutations[pidx]; uint permutation = s_permutations[pidx];
float error = evalPermutation3(colors, weights, permutation, &start, &end); float error = evalPermutation3(colors, weights, permutation, &start, &end);
if (error < bestError) if (error < bestError)
{ {
bestError = error; bestError = error;
bestPermutation = permutation; bestPermutation = permutation;
bestStart = start; bestStart = start;
bestEnd = end; bestEnd = end;
if (bestStart > bestEnd) if (bestStart > bestEnd)
{ {
swap(bestEnd, bestStart); swap(bestEnd, bestStart);
bestPermutation ^= (~bestPermutation >> 1) & 0x55555555; // Flip indices. bestPermutation ^= (~bestPermutation >> 1) & 0x55555555; // Flip indices.
} }
} }
} }
errors[idx] = bestError; errors[idx] = bestError;
} }
@ -576,30 +576,30 @@ __device__ void evalLevel4Permutations(const float3 * colors, const float * weig
float bestError = FLT_MAX; float bestError = FLT_MAX;
for(int i = 0; i < 16; i++) for(int i = 0; i < 16; i++)
{ {
int pidx = idx + NUM_THREADS * i; int pidx = idx + NUM_THREADS * i;
if (pidx >= 992) break; if (pidx >= 992) break;
ushort start, end; ushort start, end;
uint permutation = permutations[pidx]; uint permutation = permutations[pidx];
float error = evalPermutation4(colors, weights, colorSum, permutation, &start, &end); float error = evalPermutation4(colors, weights, colorSum, permutation, &start, &end);
if (error < bestError) if (error < bestError)
{ {
bestError = error; bestError = error;
bestPermutation = permutation; bestPermutation = permutation;
bestStart = start; bestStart = start;
bestEnd = end; bestEnd = end;
} }
} }
if (bestStart < bestEnd) if (bestStart < bestEnd)
{ {
swap(bestEnd, bestStart); swap(bestEnd, bestStart);
bestPermutation ^= 0x55555555; // Flip indices. bestPermutation ^= 0x55555555; // Flip indices.
} }
errors[idx] = bestError; errors[idx] = bestError;
} }
@ -812,7 +812,7 @@ __device__ float computeError(const float weights[16], uchar a0, uchar a1)
inline __device__ uchar roundAndExpand(float a) inline __device__ uchar roundAndExpand(float a)
{ {
return rintf(__saturatef(a) * 255.0f); return rintf(__saturatef(a) * 255.0f);
} }
*/ */
/* /*
@ -833,16 +833,16 @@ __device__ void optimizeAlpha8(const float alphas[16], uchar & a0, uchar & a1)
float beta = 1 - alpha; float beta = 1 - alpha;
alpha2_sum += alpha * alpha; alpha2_sum += alpha * alpha;
beta2_sum += beta * beta; beta2_sum += beta * beta;
alphabeta_sum += alpha * beta; alphabeta_sum += alpha * beta;
alphax_sum += alpha * alphas[i]; alphax_sum += alpha * alphas[i];
betax_sum += beta * alphas[i]; betax_sum += beta * alphas[i];
} }
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
a0 = roundAndExpand(a); a0 = roundAndExpand(a);