Use tabs instead of spaces.
This commit is contained in:
parent
c8ac20ee0f
commit
64020a4cb7
@ -189,14 +189,14 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
|
|||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
inline __device__ float3 roundAndExpand(float3 v, ushort * w)
|
inline __device__ float3 roundAndExpand(float3 v, ushort * w)
|
||||||
{
|
{
|
||||||
v.x = rintf(__saturatef(v.x) * 31.0f);
|
v.x = rintf(__saturatef(v.x) * 31.0f);
|
||||||
v.y = rintf(__saturatef(v.y) * 63.0f);
|
v.y = rintf(__saturatef(v.y) * 63.0f);
|
||||||
v.z = rintf(__saturatef(v.z) * 31.0f);
|
v.z = rintf(__saturatef(v.z) * 31.0f);
|
||||||
*w = ((ushort)v.x << 11) | ((ushort)v.y << 5) | (ushort)v.z;
|
*w = ((ushort)v.x << 11) | ((ushort)v.y << 5) | (ushort)v.z;
|
||||||
v.x *= 0.03227752766457f; // approximate integer bit expansion.
|
v.x *= 0.03227752766457f; // approximate integer bit expansion.
|
||||||
v.y *= 0.01583151765563f;
|
v.y *= 0.01583151765563f;
|
||||||
v.z *= 0.03227752766457f;
|
v.z *= 0.03227752766457f;
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -205,82 +205,82 @@ inline __device__ float3 roundAndExpand(float3 v, ushort * w)
|
|||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
__device__ float evalPermutation4(const float3 * colors, uint permutation, ushort * start, ushort * end)
|
__device__ float evalPermutation4(const float3 * colors, uint permutation, ushort * start, ushort * end)
|
||||||
{
|
{
|
||||||
// Compute endpoints using least squares.
|
// Compute endpoints using least squares.
|
||||||
float alpha2_sum = 0.0f;
|
float alpha2_sum = 0.0f;
|
||||||
float beta2_sum = 0.0f;
|
float beta2_sum = 0.0f;
|
||||||
float alphabeta_sum = 0.0f;
|
float alphabeta_sum = 0.0f;
|
||||||
float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
|
float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
|
||||||
float3 betax_sum = make_float3(0.0f, 0.0f, 0.0f);
|
float3 betax_sum = make_float3(0.0f, 0.0f, 0.0f);
|
||||||
|
|
||||||
// Compute alpha & beta for this permutation.
|
// Compute alpha & beta for this permutation.
|
||||||
for (int i = 0; i < 16; i++)
|
for (int i = 0; i < 16; i++)
|
||||||
{
|
{
|
||||||
const uint bits = permutation >> (2*i);
|
const uint bits = permutation >> (2*i);
|
||||||
|
|
||||||
float beta = (bits & 1);
|
float beta = (bits & 1);
|
||||||
if (bits & 2) beta = (1 + beta) / 3.0f;
|
if (bits & 2) beta = (1 + beta) / 3.0f;
|
||||||
float alpha = 1.0f - beta;
|
float alpha = 1.0f - beta;
|
||||||
|
|
||||||
alpha2_sum += alpha * alpha;
|
alpha2_sum += alpha * alpha;
|
||||||
beta2_sum += beta * beta;
|
beta2_sum += beta * beta;
|
||||||
alphabeta_sum += alpha * beta;
|
alphabeta_sum += alpha * beta;
|
||||||
alphax_sum += alpha * colors[i];
|
alphax_sum += alpha * colors[i];
|
||||||
betax_sum += beta * colors[i];
|
betax_sum += beta * colors[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
|
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
|
||||||
|
|
||||||
float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
|
float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
|
||||||
float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
|
float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
|
||||||
|
|
||||||
// Round a, b to the closest 5-6-5 color and expand...
|
// Round a, b to the closest 5-6-5 color and expand...
|
||||||
a = roundAndExpand(a, start);
|
a = roundAndExpand(a, start);
|
||||||
b = roundAndExpand(b, end);
|
b = roundAndExpand(b, end);
|
||||||
|
|
||||||
// compute the error
|
// compute the error
|
||||||
float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
|
float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
|
||||||
|
|
||||||
return dot(e, kColorMetricSqr);
|
return dot(e, kColorMetricSqr);
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__ float evalPermutation3(const float3 * colors, uint permutation, ushort * start, ushort * end)
|
__device__ float evalPermutation3(const float3 * colors, uint permutation, ushort * start, ushort * end)
|
||||||
{
|
{
|
||||||
// Compute endpoints using least squares.
|
// Compute endpoints using least squares.
|
||||||
float alpha2_sum = 0.0f;
|
float alpha2_sum = 0.0f;
|
||||||
float beta2_sum = 0.0f;
|
float beta2_sum = 0.0f;
|
||||||
float alphabeta_sum = 0.0f;
|
float alphabeta_sum = 0.0f;
|
||||||
float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
|
float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
|
||||||
float3 betax_sum = make_float3(0.0f, 0.0f, 0.0f);
|
float3 betax_sum = make_float3(0.0f, 0.0f, 0.0f);
|
||||||
|
|
||||||
// Compute alpha & beta for this permutation.
|
// Compute alpha & beta for this permutation.
|
||||||
for (int i = 0; i < 16; i++)
|
for (int i = 0; i < 16; i++)
|
||||||
{
|
{
|
||||||
const uint bits = permutation >> (2*i);
|
const uint bits = permutation >> (2*i);
|
||||||
|
|
||||||
float beta = (bits & 1);
|
float beta = (bits & 1);
|
||||||
if (bits & 2) beta = 0.5f;
|
if (bits & 2) beta = 0.5f;
|
||||||
float alpha = 1.0f - beta;
|
float alpha = 1.0f - beta;
|
||||||
|
|
||||||
alpha2_sum += alpha * alpha;
|
alpha2_sum += alpha * alpha;
|
||||||
beta2_sum += beta * beta;
|
beta2_sum += beta * beta;
|
||||||
alphabeta_sum += alpha * beta;
|
alphabeta_sum += alpha * beta;
|
||||||
alphax_sum += alpha * colors[i];
|
alphax_sum += alpha * colors[i];
|
||||||
betax_sum += beta * colors[i];
|
betax_sum += beta * colors[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
|
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
|
||||||
|
|
||||||
float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
|
float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
|
||||||
float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
|
float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
|
||||||
|
|
||||||
// Round a, b to the closest 5-6-5 color and expand...
|
// Round a, b to the closest 5-6-5 color and expand...
|
||||||
a = roundAndExpand(a, start);
|
a = roundAndExpand(a, start);
|
||||||
b = roundAndExpand(b, end);
|
b = roundAndExpand(b, end);
|
||||||
|
|
||||||
// compute the error
|
// compute the error
|
||||||
float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
|
float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
|
||||||
|
|
||||||
return dot(e, kColorMetricSqr);
|
return dot(e, kColorMetricSqr);
|
||||||
}
|
}
|
||||||
|
|
||||||
__constant__ const float alphaTable4[4] = { 9.0f, 0.0f, 6.0f, 3.0f };
|
__constant__ const float alphaTable4[4] = { 9.0f, 0.0f, 6.0f, 3.0f };
|
||||||
@ -290,114 +290,114 @@ __constant__ const uint prods3[4] = { 0x040000,0x000400,0x040101,0x010401 };
|
|||||||
|
|
||||||
__device__ float evalPermutation4(const float3 * colors, float3 color_sum, uint permutation, ushort * start, ushort * end)
|
__device__ float evalPermutation4(const float3 * colors, float3 color_sum, uint permutation, ushort * start, ushort * end)
|
||||||
{
|
{
|
||||||
// Compute endpoints using least squares.
|
// Compute endpoints using least squares.
|
||||||
float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
|
float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
|
||||||
uint akku = 0;
|
uint akku = 0;
|
||||||
|
|
||||||
// Compute alpha & beta for this permutation.
|
// Compute alpha & beta for this permutation.
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int i = 0; i < 16; i++)
|
for (int i = 0; i < 16; i++)
|
||||||
{
|
{
|
||||||
const uint bits = permutation >> (2*i);
|
const uint bits = permutation >> (2*i);
|
||||||
|
|
||||||
alphax_sum += alphaTable4[bits & 3] * colors[i];
|
alphax_sum += alphaTable4[bits & 3] * colors[i];
|
||||||
akku += prods4[bits & 3];
|
akku += prods4[bits & 3];
|
||||||
}
|
}
|
||||||
|
|
||||||
float alpha2_sum = float(akku >> 16);
|
float alpha2_sum = float(akku >> 16);
|
||||||
float beta2_sum = float((akku >> 8) & 0xff);
|
float beta2_sum = float((akku >> 8) & 0xff);
|
||||||
float alphabeta_sum = float(akku & 0xff);
|
float alphabeta_sum = float(akku & 0xff);
|
||||||
float3 betax_sum = 9.0f * color_sum - alphax_sum;
|
float3 betax_sum = 9.0f * color_sum - alphax_sum;
|
||||||
|
|
||||||
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
|
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
|
||||||
|
|
||||||
float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
|
float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
|
||||||
float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
|
float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
|
||||||
|
|
||||||
// Round a, b to the closest 5-6-5 color and expand...
|
// Round a, b to the closest 5-6-5 color and expand...
|
||||||
a = roundAndExpand(a, start);
|
a = roundAndExpand(a, start);
|
||||||
b = roundAndExpand(b, end);
|
b = roundAndExpand(b, end);
|
||||||
|
|
||||||
// compute the error
|
// compute the error
|
||||||
float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
|
float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
|
||||||
|
|
||||||
return (1.0f / 9.0f) * dot(e, kColorMetricSqr);
|
return (1.0f / 9.0f) * dot(e, kColorMetricSqr);
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__ float evalPermutation3(const float3 * colors, float3 color_sum, uint permutation, ushort * start, ushort * end)
|
__device__ float evalPermutation3(const float3 * colors, float3 color_sum, uint permutation, ushort * start, ushort * end)
|
||||||
{
|
{
|
||||||
// Compute endpoints using least squares.
|
// Compute endpoints using least squares.
|
||||||
float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
|
float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
|
||||||
uint akku = 0;
|
uint akku = 0;
|
||||||
|
|
||||||
// Compute alpha & beta for this permutation.
|
// Compute alpha & beta for this permutation.
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int i = 0; i < 16; i++)
|
for (int i = 0; i < 16; i++)
|
||||||
{
|
{
|
||||||
const uint bits = permutation >> (2*i);
|
const uint bits = permutation >> (2*i);
|
||||||
|
|
||||||
alphax_sum += alphaTable3[bits & 3] * colors[i];
|
alphax_sum += alphaTable3[bits & 3] * colors[i];
|
||||||
akku += prods3[bits & 3];
|
akku += prods3[bits & 3];
|
||||||
}
|
}
|
||||||
|
|
||||||
float alpha2_sum = float(akku >> 16);
|
float alpha2_sum = float(akku >> 16);
|
||||||
float beta2_sum = float((akku >> 8) & 0xff);
|
float beta2_sum = float((akku >> 8) & 0xff);
|
||||||
float alphabeta_sum = float(akku & 0xff);
|
float alphabeta_sum = float(akku & 0xff);
|
||||||
float3 betax_sum = 4.0f * color_sum - alphax_sum;
|
float3 betax_sum = 4.0f * color_sum - alphax_sum;
|
||||||
|
|
||||||
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
|
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
|
||||||
|
|
||||||
float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
|
float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
|
||||||
float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
|
float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
|
||||||
|
|
||||||
// Round a, b to the closest 5-6-5 color and expand...
|
// Round a, b to the closest 5-6-5 color and expand...
|
||||||
a = roundAndExpand(a, start);
|
a = roundAndExpand(a, start);
|
||||||
b = roundAndExpand(b, end);
|
b = roundAndExpand(b, end);
|
||||||
|
|
||||||
// compute the error
|
// compute the error
|
||||||
float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
|
float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
|
||||||
|
|
||||||
return (1.0f / 4.0f) * dot(e, kColorMetricSqr);
|
return (1.0f / 4.0f) * dot(e, kColorMetricSqr);
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__ float evalPermutation4(const float3 * colors, const float * weights, float3 color_sum, uint permutation, ushort * start, ushort * end)
|
__device__ float evalPermutation4(const float3 * colors, const float * weights, float3 color_sum, uint permutation, ushort * start, ushort * end)
|
||||||
{
|
{
|
||||||
// Compute endpoints using least squares.
|
// Compute endpoints using least squares.
|
||||||
float alpha2_sum = 0.0f;
|
float alpha2_sum = 0.0f;
|
||||||
float beta2_sum = 0.0f;
|
float beta2_sum = 0.0f;
|
||||||
float alphabeta_sum = 0.0f;
|
float alphabeta_sum = 0.0f;
|
||||||
float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
|
float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
|
||||||
|
|
||||||
// Compute alpha & beta for this permutation.
|
// Compute alpha & beta for this permutation.
|
||||||
for (int i = 0; i < 16; i++)
|
for (int i = 0; i < 16; i++)
|
||||||
{
|
{
|
||||||
const uint bits = permutation >> (2*i);
|
const uint bits = permutation >> (2*i);
|
||||||
|
|
||||||
float beta = (bits & 1);
|
float beta = (bits & 1);
|
||||||
if (bits & 2) beta = (1 + beta) / 3.0f;
|
if (bits & 2) beta = (1 + beta) / 3.0f;
|
||||||
float alpha = 1.0f - beta;
|
float alpha = 1.0f - beta;
|
||||||
|
|
||||||
alpha2_sum += alpha * alpha * weights[i];
|
alpha2_sum += alpha * alpha * weights[i];
|
||||||
beta2_sum += beta * beta * weights[i];
|
beta2_sum += beta * beta * weights[i];
|
||||||
alphabeta_sum += alpha * beta * weights[i];
|
alphabeta_sum += alpha * beta * weights[i];
|
||||||
alphax_sum += alpha * colors[i];
|
alphax_sum += alpha * colors[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
float3 betax_sum = color_sum - alphax_sum;
|
float3 betax_sum = color_sum - alphax_sum;
|
||||||
|
|
||||||
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
|
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
|
||||||
|
|
||||||
float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
|
float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
|
||||||
float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
|
float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
|
||||||
|
|
||||||
// Round a, b to the closest 5-6-5 color and expand...
|
// Round a, b to the closest 5-6-5 color and expand...
|
||||||
a = roundAndExpand(a, start);
|
a = roundAndExpand(a, start);
|
||||||
b = roundAndExpand(b, end);
|
b = roundAndExpand(b, end);
|
||||||
|
|
||||||
// compute the error
|
// compute the error
|
||||||
float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
|
float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
|
||||||
|
|
||||||
return dot(e, kColorMetricSqr);
|
return dot(e, kColorMetricSqr);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -454,55 +454,55 @@ __device__ void evalAllPermutations(const float3 * colors, float3 colorSum, cons
|
|||||||
|
|
||||||
__shared__ uint s_permutations[160];
|
__shared__ uint s_permutations[160];
|
||||||
|
|
||||||
for(int i = 0; i < 16; i++)
|
for(int i = 0; i < 16; i++)
|
||||||
{
|
{
|
||||||
int pidx = idx + NUM_THREADS * i;
|
int pidx = idx + NUM_THREADS * i;
|
||||||
if (pidx >= 992) break;
|
if (pidx >= 992) break;
|
||||||
|
|
||||||
ushort start, end;
|
ushort start, end;
|
||||||
uint permutation = permutations[pidx];
|
uint permutation = permutations[pidx];
|
||||||
if (pidx < 160) s_permutations[pidx] = permutation;
|
if (pidx < 160) s_permutations[pidx] = permutation;
|
||||||
|
|
||||||
float error = evalPermutation4(colors, colorSum, permutation, &start, &end);
|
float error = evalPermutation4(colors, colorSum, permutation, &start, &end);
|
||||||
|
|
||||||
if (error < bestError)
|
if (error < bestError)
|
||||||
{
|
{
|
||||||
bestError = error;
|
bestError = error;
|
||||||
bestPermutation = permutation;
|
bestPermutation = permutation;
|
||||||
bestStart = start;
|
bestStart = start;
|
||||||
bestEnd = end;
|
bestEnd = end;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bestStart < bestEnd)
|
if (bestStart < bestEnd)
|
||||||
{
|
{
|
||||||
swap(bestEnd, bestStart);
|
swap(bestEnd, bestStart);
|
||||||
bestPermutation ^= 0x55555555; // Flip indices.
|
bestPermutation ^= 0x55555555; // Flip indices.
|
||||||
}
|
}
|
||||||
|
|
||||||
for(int i = 0; i < 3; i++)
|
for(int i = 0; i < 3; i++)
|
||||||
{
|
{
|
||||||
int pidx = idx + NUM_THREADS * i;
|
int pidx = idx + NUM_THREADS * i;
|
||||||
if (pidx >= 160) break;
|
if (pidx >= 160) break;
|
||||||
|
|
||||||
ushort start, end;
|
ushort start, end;
|
||||||
uint permutation = s_permutations[pidx];
|
uint permutation = s_permutations[pidx];
|
||||||
float error = evalPermutation3(colors, colorSum, permutation, &start, &end);
|
float error = evalPermutation3(colors, colorSum, permutation, &start, &end);
|
||||||
|
|
||||||
if (error < bestError)
|
if (error < bestError)
|
||||||
{
|
{
|
||||||
bestError = error;
|
bestError = error;
|
||||||
bestPermutation = permutation;
|
bestPermutation = permutation;
|
||||||
bestStart = start;
|
bestStart = start;
|
||||||
bestEnd = end;
|
bestEnd = end;
|
||||||
|
|
||||||
if (bestStart > bestEnd)
|
if (bestStart > bestEnd)
|
||||||
{
|
{
|
||||||
swap(bestEnd, bestStart);
|
swap(bestEnd, bestStart);
|
||||||
bestPermutation ^= (~bestPermutation >> 1) & 0x55555555; // Flip indices.
|
bestPermutation ^= (~bestPermutation >> 1) & 0x55555555; // Flip indices.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
errors[idx] = bestError;
|
errors[idx] = bestError;
|
||||||
}
|
}
|
||||||
@ -516,55 +516,55 @@ __device__ void evalAllPermutations(const float3 * colors, const float * weights
|
|||||||
|
|
||||||
__shared__ uint s_permutations[160];
|
__shared__ uint s_permutations[160];
|
||||||
|
|
||||||
for(int i = 0; i < 16; i++)
|
for(int i = 0; i < 16; i++)
|
||||||
{
|
{
|
||||||
int pidx = idx + NUM_THREADS * i;
|
int pidx = idx + NUM_THREADS * i;
|
||||||
if (pidx >= 992) break;
|
if (pidx >= 992) break;
|
||||||
|
|
||||||
ushort start, end;
|
ushort start, end;
|
||||||
uint permutation = permutations[pidx];
|
uint permutation = permutations[pidx];
|
||||||
if (pidx < 160) s_permutations[pidx] = permutation;
|
if (pidx < 160) s_permutations[pidx] = permutation;
|
||||||
|
|
||||||
float error = evalPermutation4(colors, weights, permutation, &start, &end);
|
float error = evalPermutation4(colors, weights, permutation, &start, &end);
|
||||||
|
|
||||||
if (error < bestError)
|
if (error < bestError)
|
||||||
{
|
{
|
||||||
bestError = error;
|
bestError = error;
|
||||||
bestPermutation = permutation;
|
bestPermutation = permutation;
|
||||||
bestStart = start;
|
bestStart = start;
|
||||||
bestEnd = end;
|
bestEnd = end;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bestStart < bestEnd)
|
if (bestStart < bestEnd)
|
||||||
{
|
{
|
||||||
swap(bestEnd, bestStart);
|
swap(bestEnd, bestStart);
|
||||||
bestPermutation ^= 0x55555555; // Flip indices.
|
bestPermutation ^= 0x55555555; // Flip indices.
|
||||||
}
|
}
|
||||||
|
|
||||||
for(int i = 0; i < 3; i++)
|
for(int i = 0; i < 3; i++)
|
||||||
{
|
{
|
||||||
int pidx = idx + NUM_THREADS * i;
|
int pidx = idx + NUM_THREADS * i;
|
||||||
if (pidx >= 160) break;
|
if (pidx >= 160) break;
|
||||||
|
|
||||||
ushort start, end;
|
ushort start, end;
|
||||||
uint permutation = s_permutations[pidx];
|
uint permutation = s_permutations[pidx];
|
||||||
float error = evalPermutation3(colors, weights, permutation, &start, &end);
|
float error = evalPermutation3(colors, weights, permutation, &start, &end);
|
||||||
|
|
||||||
if (error < bestError)
|
if (error < bestError)
|
||||||
{
|
{
|
||||||
bestError = error;
|
bestError = error;
|
||||||
bestPermutation = permutation;
|
bestPermutation = permutation;
|
||||||
bestStart = start;
|
bestStart = start;
|
||||||
bestEnd = end;
|
bestEnd = end;
|
||||||
|
|
||||||
if (bestStart > bestEnd)
|
if (bestStart > bestEnd)
|
||||||
{
|
{
|
||||||
swap(bestEnd, bestStart);
|
swap(bestEnd, bestStart);
|
||||||
bestPermutation ^= (~bestPermutation >> 1) & 0x55555555; // Flip indices.
|
bestPermutation ^= (~bestPermutation >> 1) & 0x55555555; // Flip indices.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
errors[idx] = bestError;
|
errors[idx] = bestError;
|
||||||
}
|
}
|
||||||
@ -576,30 +576,30 @@ __device__ void evalLevel4Permutations(const float3 * colors, const float * weig
|
|||||||
|
|
||||||
float bestError = FLT_MAX;
|
float bestError = FLT_MAX;
|
||||||
|
|
||||||
for(int i = 0; i < 16; i++)
|
for(int i = 0; i < 16; i++)
|
||||||
{
|
{
|
||||||
int pidx = idx + NUM_THREADS * i;
|
int pidx = idx + NUM_THREADS * i;
|
||||||
if (pidx >= 992) break;
|
if (pidx >= 992) break;
|
||||||
|
|
||||||
ushort start, end;
|
ushort start, end;
|
||||||
uint permutation = permutations[pidx];
|
uint permutation = permutations[pidx];
|
||||||
|
|
||||||
float error = evalPermutation4(colors, weights, colorSum, permutation, &start, &end);
|
float error = evalPermutation4(colors, weights, colorSum, permutation, &start, &end);
|
||||||
|
|
||||||
if (error < bestError)
|
if (error < bestError)
|
||||||
{
|
{
|
||||||
bestError = error;
|
bestError = error;
|
||||||
bestPermutation = permutation;
|
bestPermutation = permutation;
|
||||||
bestStart = start;
|
bestStart = start;
|
||||||
bestEnd = end;
|
bestEnd = end;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bestStart < bestEnd)
|
if (bestStart < bestEnd)
|
||||||
{
|
{
|
||||||
swap(bestEnd, bestStart);
|
swap(bestEnd, bestStart);
|
||||||
bestPermutation ^= 0x55555555; // Flip indices.
|
bestPermutation ^= 0x55555555; // Flip indices.
|
||||||
}
|
}
|
||||||
|
|
||||||
errors[idx] = bestError;
|
errors[idx] = bestError;
|
||||||
}
|
}
|
||||||
@ -812,7 +812,7 @@ __device__ float computeError(const float weights[16], uchar a0, uchar a1)
|
|||||||
|
|
||||||
inline __device__ uchar roundAndExpand(float a)
|
inline __device__ uchar roundAndExpand(float a)
|
||||||
{
|
{
|
||||||
return rintf(__saturatef(a) * 255.0f);
|
return rintf(__saturatef(a) * 255.0f);
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
/*
|
/*
|
||||||
@ -833,16 +833,16 @@ __device__ void optimizeAlpha8(const float alphas[16], uchar & a0, uchar & a1)
|
|||||||
|
|
||||||
float beta = 1 - alpha;
|
float beta = 1 - alpha;
|
||||||
|
|
||||||
alpha2_sum += alpha * alpha;
|
alpha2_sum += alpha * alpha;
|
||||||
beta2_sum += beta * beta;
|
beta2_sum += beta * beta;
|
||||||
alphabeta_sum += alpha * beta;
|
alphabeta_sum += alpha * beta;
|
||||||
alphax_sum += alpha * alphas[i];
|
alphax_sum += alpha * alphas[i];
|
||||||
betax_sum += beta * alphas[i];
|
betax_sum += beta * alphas[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
|
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
|
||||||
|
|
||||||
float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
|
float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
|
||||||
float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
|
float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
|
||||||
|
|
||||||
a0 = roundAndExpand(a);
|
a0 = roundAndExpand(a);
|
||||||
|
Loading…
Reference in New Issue
Block a user