|
|
|
@ -220,7 +220,7 @@ __device__ float evalPermutation4(const float3 * colors, uint permutation, ushor
|
|
|
|
|
alphax_sum += alpha * colors[i];
|
|
|
|
|
betax_sum += beta * colors[i];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
|
|
|
|
|
|
|
|
|
|
float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
|
|
|
|
@ -689,33 +689,33 @@ __global__ void compressWeighted(const uint * permutations, const uint * image,
|
|
|
|
|
|
|
|
|
|
__device__ float computeError(const float weights[16], uchar a0, uchar a1)
|
|
|
|
|
{
|
|
|
|
|
float palette[6];
|
|
|
|
|
palette[0] = (6.0f/7.0f * a0 + 1.0f/7.0f * a1);
|
|
|
|
|
palette[1] = (5.0f/7.0f * a0 + 2.0f/7.0f * a1);
|
|
|
|
|
palette[2] = (4.0f/7.0f * a0 + 3.0f/7.0f * a1);
|
|
|
|
|
palette[3] = (3.0f/7.0f * a0 + 4.0f/7.0f * a1);
|
|
|
|
|
palette[4] = (2.0f/7.0f * a0 + 5.0f/7.0f * a1);
|
|
|
|
|
palette[5] = (1.0f/7.0f * a0 + 6.0f/7.0f * a1);
|
|
|
|
|
|
|
|
|
|
float total = 0.0f;
|
|
|
|
|
|
|
|
|
|
for (uint i = 0; i < 16; i++)
|
|
|
|
|
{
|
|
|
|
|
float alpha = weights[i];
|
|
|
|
|
|
|
|
|
|
float error = a0 - alpha;
|
|
|
|
|
error = min(error, palette[0] - alpha);
|
|
|
|
|
error = min(error, palette[1] - alpha);
|
|
|
|
|
error = min(error, palette[2] - alpha);
|
|
|
|
|
error = min(error, palette[3] - alpha);
|
|
|
|
|
error = min(error, palette[4] - alpha);
|
|
|
|
|
error = min(error, palette[5] - alpha);
|
|
|
|
|
error = min(error, a1 - alpha);
|
|
|
|
|
|
|
|
|
|
total += error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return total;
|
|
|
|
|
float palette[6];
|
|
|
|
|
palette[0] = (6.0f/7.0f * a0 + 1.0f/7.0f * a1);
|
|
|
|
|
palette[1] = (5.0f/7.0f * a0 + 2.0f/7.0f * a1);
|
|
|
|
|
palette[2] = (4.0f/7.0f * a0 + 3.0f/7.0f * a1);
|
|
|
|
|
palette[3] = (3.0f/7.0f * a0 + 4.0f/7.0f * a1);
|
|
|
|
|
palette[4] = (2.0f/7.0f * a0 + 5.0f/7.0f * a1);
|
|
|
|
|
palette[5] = (1.0f/7.0f * a0 + 6.0f/7.0f * a1);
|
|
|
|
|
|
|
|
|
|
float total = 0.0f;
|
|
|
|
|
|
|
|
|
|
for (uint i = 0; i < 16; i++)
|
|
|
|
|
{
|
|
|
|
|
float alpha = weights[i];
|
|
|
|
|
|
|
|
|
|
float error = a0 - alpha;
|
|
|
|
|
error = min(error, palette[0] - alpha);
|
|
|
|
|
error = min(error, palette[1] - alpha);
|
|
|
|
|
error = min(error, palette[2] - alpha);
|
|
|
|
|
error = min(error, palette[3] - alpha);
|
|
|
|
|
error = min(error, palette[4] - alpha);
|
|
|
|
|
error = min(error, palette[5] - alpha);
|
|
|
|
|
error = min(error, a1 - alpha);
|
|
|
|
|
|
|
|
|
|
total += error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return total;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
inline __device__ uchar roundAndExpand(float a)
|
|
|
|
@ -726,35 +726,35 @@ inline __device__ uchar roundAndExpand(float a)
|
|
|
|
|
/*
|
|
|
|
|
__device__ void optimizeAlpha8(const float alphas[16], uchar & a0, uchar & a1)
|
|
|
|
|
{
|
|
|
|
|
float alpha2_sum = 0;
|
|
|
|
|
float beta2_sum = 0;
|
|
|
|
|
float alphabeta_sum = 0;
|
|
|
|
|
float alphax_sum = 0;
|
|
|
|
|
float betax_sum = 0;
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < 16; i++)
|
|
|
|
|
{
|
|
|
|
|
uint idx = index[i];
|
|
|
|
|
float alpha;
|
|
|
|
|
if (idx < 2) alpha = 1.0f - idx;
|
|
|
|
|
else alpha = (8.0f - idx) / 7.0f;
|
|
|
|
|
|
|
|
|
|
float beta = 1 - alpha;
|
|
|
|
|
|
|
|
|
|
alpha2_sum += alpha * alpha;
|
|
|
|
|
beta2_sum += beta * beta;
|
|
|
|
|
alphabeta_sum += alpha * beta;
|
|
|
|
|
alphax_sum += alpha * alphas[i];
|
|
|
|
|
betax_sum += beta * alphas[i];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
|
|
|
|
|
|
|
|
|
|
float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
|
|
|
|
|
float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
|
|
|
|
|
|
|
|
|
|
a0 = roundAndExpand(a);
|
|
|
|
|
a1 = roundAndExpand(b);
|
|
|
|
|
float alpha2_sum = 0;
|
|
|
|
|
float beta2_sum = 0;
|
|
|
|
|
float alphabeta_sum = 0;
|
|
|
|
|
float alphax_sum = 0;
|
|
|
|
|
float betax_sum = 0;
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < 16; i++)
|
|
|
|
|
{
|
|
|
|
|
uint idx = index[i];
|
|
|
|
|
float alpha;
|
|
|
|
|
if (idx < 2) alpha = 1.0f - idx;
|
|
|
|
|
else alpha = (8.0f - idx) / 7.0f;
|
|
|
|
|
|
|
|
|
|
float beta = 1 - alpha;
|
|
|
|
|
|
|
|
|
|
alpha2_sum += alpha * alpha;
|
|
|
|
|
beta2_sum += beta * beta;
|
|
|
|
|
alphabeta_sum += alpha * beta;
|
|
|
|
|
alphax_sum += alpha * alphas[i];
|
|
|
|
|
betax_sum += beta * alphas[i];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
|
|
|
|
|
|
|
|
|
|
float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
|
|
|
|
|
float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
|
|
|
|
|
|
|
|
|
|
a0 = roundAndExpand(a);
|
|
|
|
|
a1 = roundAndExpand(b);
|
|
|
|
|
}
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|