Minor cleanups and some experiments.

This commit is contained in:
Ignacio 2020-03-23 10:03:19 -07:00
parent 97723db794
commit 9009962054

View File

@ -38,6 +38,46 @@ ClusterFit::ClusterFit()
{ {
} }
/*
// find minimum and maximum colors based on bounding box in color space
inline static void fit_colors_bbox(const Vector3 * colors, int count, Vector3 * restrict c0, Vector3 * restrict c1)
{
*c0 = Vector3(0);
*c1 = Vector3(1);
for (int i = 0; i < count; i++) {
*c0 = max(*c0, colors[i]);
*c1 = min(*c1, colors[i]);
}
}
inline static void select_diagonal(const Vector3 * colors, int count, Vector3 * restrict c0, Vector3 * restrict c1)
{
Vector3 center = (*c0 + *c1) * 0.5f;
Vector2 covariance = Vector2(0);
for (int i = 0; i < count; i++) {
Vector3 t = colors[i] - center;
covariance += t.xy() * t.z;
}
float x0 = c0->x;
float y0 = c0->y;
float x1 = c1->x;
float y1 = c1->y;
if (covariance.x < 0) {
swap(x0, x1);
}
if (covariance.y < 0) {
swap(y0, y1);
}
c0->set(x0, y0, c0->z);
c1->set(x1, y1, c1->z);
}
*/
void ClusterFit::setColorSet(const Vector3 * colors, const float * weights, int count) void ClusterFit::setColorSet(const Vector3 * colors, const float * weights, int count)
{ {
// initialise the best error // initialise the best error
@ -54,6 +94,14 @@ void ClusterFit::setColorSet(const Vector3 * colors, const float * weights, int
Vector3 principal = Fit::computePrincipalComponent_PowerMethod(count, colors, weights, metric); Vector3 principal = Fit::computePrincipalComponent_PowerMethod(count, colors, weights, metric);
//Vector3 principal = Fit::computePrincipalComponent_EigenSolver(count, colors, weights, metric); //Vector3 principal = Fit::computePrincipalComponent_EigenSolver(count, colors, weights, metric);
/*// This approximation produces slightly lower quality:
Vector3 c0, c1;
fit_colors_bbox(colors, count, &c0, &c1);
select_diagonal(colors, count, &c0, &c1);
if (c0 != c1) {
principal = normalize(c1 - c0);
}*/
// build the list of values // build the list of values
int order[16]; int order[16];
float dps[16]; float dps[16];
@ -145,8 +193,6 @@ bool ClusterFit::compress3( Vector3 * start, Vector3 * end )
SimdVector x0 = zero; SimdVector x0 = zero;
int b0 = 0, b1 = 0;
// check all possible clusters for this total order // check all possible clusters for this total order
for( int c0 = 0; c0 <= count; c0++) for( int c0 = 0; c0 <= count; c0++)
{ {
@ -197,8 +243,6 @@ bool ClusterFit::compress3( Vector3 * start, Vector3 * end )
besterror = error; besterror = error;
beststart = a; beststart = a;
bestend = b; bestend = b;
b0 = c0;
b1 = c1;
} }
x1 += m_weighted[c0+c1]; x1 += m_weighted[c0+c1];
@ -242,7 +286,6 @@ bool ClusterFit::compress4( Vector3 * start, Vector3 * end )
SimdVector besterror = SimdVector( FLT_MAX ); SimdVector besterror = SimdVector( FLT_MAX );
SimdVector x0 = zero; SimdVector x0 = zero;
int b0 = 0, b1 = 0, b2 = 0;
// check all possible clusters for this total order // check all possible clusters for this total order
for( int c0 = 0; c0 <= count; c0++) for( int c0 = 0; c0 <= count; c0++)
@ -283,23 +326,15 @@ bool ClusterFit::compress4( Vector3 * start, Vector3 * end )
b = truncate( multiplyAdd( grid, b, half ) ) * gridrcp; b = truncate( multiplyAdd( grid, b, half ) ) * gridrcp;
// compute the error (we skip the constant xxsum) // compute the error (we skip the constant xxsum)
// error = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum );
SimdVector e1 = multiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); SimdVector e1 = multiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
SimdVector e2 = negativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum ); SimdVector e2 = negativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
SimdVector e3 = negativeMultiplySubtract( b, betax_sum, e2 ); SimdVector e3 = negativeMultiplySubtract( b, betax_sum, e2 );
SimdVector e4 = multiplyAdd( two, e3, e1 ); SimdVector e4 = multiplyAdd( two, e3, e1 );
#if 1
// apply the metric to the error term // apply the metric to the error term
SimdVector e5 = e4 * m_metricSqr; SimdVector e5 = e4 * m_metricSqr;
SimdVector error = e5.splatX() + e5.splatY() + e5.splatZ(); SimdVector error = e5.splatX() + e5.splatY() + e5.splatZ();
#else
// @@ Is there a horizontal max SIMD instruction?
SimdVector error = e4.splatX() + e4.splatY() + e4.splatZ();
error *= two;
error += max(max(e4.splatX(), e4.splatY()), e4.splatZ());
error -= min(min(e4.splatX(), e4.splatY()), e4.splatZ());
#endif
// keep the solution if it wins // keep the solution if it wins
if (compareAnyLessThan(error, besterror)) if (compareAnyLessThan(error, besterror))
@ -307,9 +342,6 @@ bool ClusterFit::compress4( Vector3 * start, Vector3 * end )
besterror = error; besterror = error;
beststart = a; beststart = a;
bestend = b; bestend = b;
b0 = c0;
b1 = c1;
b2 = c2;
} }
x2 += m_weighted[c0+c1+c2]; x2 += m_weighted[c0+c1+c2];
@ -338,27 +370,29 @@ bool ClusterFit::compress4( Vector3 * start, Vector3 * end )
#else #else
static const float midpoints5[32] = {
0.015686f, 0.047059f, 0.078431f, 0.111765f, 0.145098f, 0.176471f, 0.207843f, 0.241176f, 0.274510f, 0.305882f, 0.337255f, 0.370588f, 0.403922f, 0.435294f, 0.466667f, 0.5f,
0.533333f, 0.564706f, 0.596078f, 0.629412f, 0.662745f, 0.694118f, 0.725490f, 0.758824f, 0.792157f, 0.823529f, 0.854902f, 0.888235f, 0.921569f, 0.952941f, 0.984314f, 1.0f
};
static const float midpoints6[64] = {
0.007843f, 0.023529f, 0.039216f, 0.054902f, 0.070588f, 0.086275f, 0.101961f, 0.117647f, 0.133333f, 0.149020f, 0.164706f, 0.180392f, 0.196078f, 0.211765f, 0.227451f, 0.245098f,
0.262745f, 0.278431f, 0.294118f, 0.309804f, 0.325490f, 0.341176f, 0.356863f, 0.372549f, 0.388235f, 0.403922f, 0.419608f, 0.435294f, 0.450980f, 0.466667f, 0.482353f, 0.500000f,
0.517647f, 0.533333f, 0.549020f, 0.564706f, 0.580392f, 0.596078f, 0.611765f, 0.627451f, 0.643137f, 0.658824f, 0.674510f, 0.690196f, 0.705882f, 0.721569f, 0.737255f, 0.754902f,
0.772549f, 0.788235f, 0.803922f, 0.819608f, 0.835294f, 0.850980f, 0.866667f, 0.882353f, 0.898039f, 0.913725f, 0.929412f, 0.945098f, 0.960784f, 0.976471f, 0.992157f, 1.0f
};
// This is the ideal way to round, but it's too expensive to do this in the inner loop.
inline Vector3 round565(const Vector3 & v) { inline Vector3 round565(const Vector3 & v) {
uint r = ftoi_trunc(v.x * 31.0f); const Vector3 grid(31.0f, 63.0f, 31.0f);
float r0 = float(((r+0) << 3) | ((r+0) >> 2)); const Vector3 gridrcp(1.0f / 31.0f, 1.0f / 63.0f, 1.0f / 31.0f);
float r1 = float(((r+1) << 3) | ((r+1) >> 2));
if (fabs(v.x - r1) < fabs(v.x - r0)) r = min(r+1, 31U);
r = (r << 3) | (r >> 2);
uint g = ftoi_trunc(v.y * 63.0f); Vector3 q = floor(grid * v);
float g0 = float(((g+0) << 2) | ((g+0) >> 4)); q.x += (v.x > midpoints5[int(q.x)]);
float g1 = float(((g+1) << 2) | ((g+1) >> 4)); q.y += (v.y > midpoints6[int(q.y)]);
if (fabs(v.y - g1) < fabs(v.y - g0)) g = min(g+1, 63U); q.z += (v.z > midpoints5[int(q.z)]);
g = (g << 2) | (g >> 4); q *= gridrcp;
return q;
uint b = ftoi_trunc(v.z * 31.0f);
float b0 = float(((b+0) << 3) | ((b+0) >> 2));
float b1 = float(((b+1) << 3) | ((b+1) >> 2));
if (fabs(v.z - b1) < fabs(v.z - b0)) b = min(b+1, 31U);
b = (b << 3) | (b >> 2);
return Vector3(float(r)/255, float(g)/255, float(b)/255);
} }
bool ClusterFit::compress3(Vector3 * start, Vector3 * end) bool ClusterFit::compress3(Vector3 * start, Vector3 * end)
@ -406,24 +440,6 @@ bool ClusterFit::compress3(Vector3 * start, Vector3 * end)
a = floor(grid * a + 0.5f) * gridrcp; a = floor(grid * a + 0.5f) * gridrcp;
b = floor(grid * b + 0.5f) * gridrcp; b = floor(grid * b + 0.5f) * gridrcp;
#else #else
//int ar = ftoi_round(31 * a.x); ar = (ar << 3) | (ar >> 2); a.x = float(ar) / 255.0f;
//int ag = ftoi_round(63 * a.y); ar = (ag << 2) | (ag >> 4); a.y = float(ag) / 255.0f;
//int ab = ftoi_round(31 * a.z); ar = (ab << 3) | (ab >> 2); a.z = float(ab) / 255.0f;
//int br = ftoi_round(31 * b.x); br = (br << 3) | (br >> 2); b.x = float(br) / 255.0f;
//int bg = ftoi_round(63 * b.y); br = (bg << 2) | (bg >> 4); b.y = float(bg) / 255.0f;
//int bb = ftoi_round(31 * b.z); br = (bb << 3) | (bb >> 2); b.z = float(bb) / 255.0f;
/*a = floor(a * grid + 0.5f);
a.x = (a.x * 8 + floorf(a.x / 4)) / 255.0f;
a.y = (a.y * 4 + floorf(a.y / 16)) / 255.0f;
a.z = (a.z * 8 + floorf(a.z / 4)) / 255.0f;
b = floor(b * grid + 0.5f);
b.x = (b.x * 8 + floorf(b.x / 4)) / 255.0f;
b.y = (b.y * 4 + floorf(b.y / 16)) / 255.0f;
b.z = (b.z * 8 + floorf(b.z / 4)) / 255.0f;*/
a = round565(a); a = round565(a);
b = round565(b); b = round565(b);
#endif #endif
@ -512,29 +528,10 @@ bool ClusterFit::compress4(Vector3 * start, Vector3 * end)
// clamp to the grid // clamp to the grid
a = clamp(a, 0, 1); a = clamp(a, 0, 1);
b = clamp(b, 0, 1); b = clamp(b, 0, 1);
#if 0 #if 1
a = floor(a * grid + 0.5f) * gridrcp; a = floor(a * grid + 0.5f) * gridrcp;
b = floor(b * grid + 0.5f) * gridrcp; b = floor(b * grid + 0.5f) * gridrcp;
#else #else
//int ar = ftoi_round(31 * a.x); ar = (ar << 3) | (ar >> 2); a.x = float(ar) / 255.0f;
//int ag = ftoi_round(63 * a.y); ar = (ag << 2) | (ag >> 4); a.y = float(ag) / 255.0f;
//int ab = ftoi_round(31 * a.z); ar = (ab << 3) | (ab >> 2); a.z = float(ab) / 255.0f;
//int br = ftoi_round(31 * b.x); br = (br << 3) | (br >> 2); b.x = float(br) / 255.0f;
//int bg = ftoi_round(63 * b.y); br = (bg << 2) | (bg >> 4); b.y = float(bg) / 255.0f;
//int bb = ftoi_round(31 * b.z); br = (bb << 3) | (bb >> 2); b.z = float(bb) / 255.0f;
/*
a = floor(a * grid + 0.5f);
a.x = (a.x * 8 + floorf(a.x / 4)) / 255.0f;
a.y = (a.y * 4 + floorf(a.y / 16)) / 255.0f;
a.z = (a.z * 8 + floorf(a.z / 4)) / 255.0f;
b = floor(b * grid + 0.5f);
b.x = (b.x * 8 + floorf(b.x / 4)) / 255.0f;
b.y = (b.y * 4 + floorf(b.y / 16)) / 255.0f;
b.z = (b.z * 8 + floorf(b.z / 4)) / 255.0f;
*/
a = round565(a); a = round565(a);
b = round565(b); b = round565(b);
#endif #endif