661 lines
21 KiB
C++
661 lines
21 KiB
C++
/* -----------------------------------------------------------------------------
|
|
|
|
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
|
|
Copyright (c) 2006 Ignacio Castano icastano@nvidia.com
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining
|
|
a copy of this software and associated documentation files (the
|
|
"Software"), to deal in the Software without restriction, including
|
|
without limitation the rights to use, copy, modify, merge, publish,
|
|
distribute, sublicense, and/or sell copies of the Software, and to
|
|
permit persons to whom the Software is furnished to do so, subject to
|
|
the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included
|
|
in all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
-------------------------------------------------------------------------- */
|
|
|
|
#include "ClusterFit.h"
|
|
#include "nvmath/Fitting.h"
|
|
#include "nvmath/Vector.inl"
|
|
#include "nvmath/ftoi.h"
|
|
#include "nvimage/ColorBlock.h"
|
|
|
|
#include <float.h> // FLT_MAX
|
|
|
|
using namespace nv;
|
|
|
|
ClusterFit::ClusterFit()
|
|
{
|
|
}
|
|
|
|
#if 0 // @@ Deprecate. Do not use color set directly.
|
|
void ClusterFit::setColorSet(const ColorSet * set)
|
|
{
|
|
// initialise the best error
|
|
#if NVTT_USE_SIMD
|
|
m_besterror = SimdVector( FLT_MAX );
|
|
Vector3 metric = m_metric.toVector3();
|
|
#else
|
|
m_besterror = FLT_MAX;
|
|
Vector3 metric = m_metric;
|
|
#endif
|
|
|
|
// cache some values
|
|
m_count = set->colorCount;
|
|
|
|
Vector3 values[16];
|
|
for (uint i = 0; i < m_count; i++)
|
|
{
|
|
values[i] = set->colors[i].xyz();
|
|
}
|
|
|
|
Vector3 principal = Fit::computePrincipalComponent_PowerMethod(m_count, values, set->weights, metric);
|
|
//Vector3 principal = Fit::computePrincipalComponent_EigenSolver(m_count, values, set->weights, metric);
|
|
|
|
// build the list of values
|
|
int order[16];
|
|
float dps[16];
|
|
for (uint i = 0; i < m_count; ++i)
|
|
{
|
|
dps[i] = dot(values[i], principal);
|
|
order[i] = i;
|
|
}
|
|
|
|
// stable sort
|
|
for (uint i = 0; i < m_count; ++i)
|
|
{
|
|
for (uint j = i; j > 0 && dps[j] < dps[j - 1]; --j)
|
|
{
|
|
swap(dps[j], dps[j - 1]);
|
|
swap(order[j], order[j - 1]);
|
|
}
|
|
}
|
|
|
|
// weight all the points
|
|
#if NVTT_USE_SIMD
|
|
m_xxsum = SimdVector( 0.0f );
|
|
m_xsum = SimdVector( 0.0f );
|
|
#else
|
|
m_xxsum = Vector3(0.0f);
|
|
m_xsum = Vector3(0.0f);
|
|
m_wsum = 0.0f;
|
|
#endif
|
|
|
|
for (uint i = 0; i < m_count; ++i)
|
|
{
|
|
int p = order[i];
|
|
#if NVTT_USE_SIMD
|
|
NV_ALIGN_16 Vector4 tmp(values[p], 1);
|
|
m_weighted[i] = SimdVector(tmp.component) * SimdVector(set->weights[p]);
|
|
m_xxsum += m_weighted[i] * m_weighted[i];
|
|
m_xsum += m_weighted[i];
|
|
#else
|
|
m_weighted[i] = values[p] * set->weights[p];
|
|
m_xxsum += m_weighted[i] * m_weighted[i];
|
|
m_xsum += m_weighted[i];
|
|
m_weights[i] = set->weights[p];
|
|
m_wsum += m_weights[i];
|
|
#endif
|
|
}
|
|
}
|
|
#endif // 0
|
|
|
|
|
|
void ClusterFit::setColorSet(const Vector3 * colors, const float * weights, int count)
|
|
{
|
|
// initialise the best error
|
|
#if NVTT_USE_SIMD
|
|
m_besterror = SimdVector( FLT_MAX );
|
|
Vector3 metric = m_metric.toVector3();
|
|
#else
|
|
m_besterror = FLT_MAX;
|
|
Vector3 metric = m_metric;
|
|
#endif
|
|
|
|
m_count = count;
|
|
|
|
Vector3 principal = Fit::computePrincipalComponent_PowerMethod(count, colors, weights, metric);
|
|
//Vector3 principal = Fit::computePrincipalComponent_EigenSolver(count, colors, weights, metric);
|
|
|
|
// build the list of values
|
|
int order[16];
|
|
float dps[16];
|
|
for (uint i = 0; i < m_count; ++i)
|
|
{
|
|
dps[i] = dot(colors[i], principal);
|
|
order[i] = i;
|
|
}
|
|
|
|
// stable sort
|
|
for (uint i = 0; i < m_count; ++i)
|
|
{
|
|
for (uint j = i; j > 0 && dps[j] < dps[j - 1]; --j)
|
|
{
|
|
swap(dps[j], dps[j - 1]);
|
|
swap(order[j], order[j - 1]);
|
|
}
|
|
}
|
|
|
|
// weight all the points
|
|
#if NVTT_USE_SIMD
|
|
m_xxsum = SimdVector( 0.0f );
|
|
m_xsum = SimdVector( 0.0f );
|
|
#else
|
|
m_xxsum = Vector3(0.0f);
|
|
m_xsum = Vector3(0.0f);
|
|
m_wsum = 0.0f;
|
|
#endif
|
|
|
|
for (uint i = 0; i < m_count; ++i)
|
|
{
|
|
int p = order[i];
|
|
#if NVTT_USE_SIMD
|
|
NV_ALIGN_16 Vector4 tmp(colors[p], 1);
|
|
m_weighted[i] = SimdVector(tmp.component) * SimdVector(weights[p]);
|
|
m_xxsum += m_weighted[i] * m_weighted[i];
|
|
m_xsum += m_weighted[i];
|
|
#else
|
|
m_weighted[i] = colors[p] * weights[p];
|
|
m_xxsum += m_weighted[i] * m_weighted[i];
|
|
m_xsum += m_weighted[i];
|
|
m_weights[i] = weights[p];
|
|
m_wsum += m_weights[i];
|
|
#endif
|
|
}
|
|
}
|
|
|
|
|
|
|
|
void ClusterFit::setColorWeights(Vector4::Arg w)
|
|
{
|
|
#if NVTT_USE_SIMD
|
|
NV_ALIGN_16 Vector4 tmp(w.xyz(), 1);
|
|
m_metric = SimdVector(tmp.component);
|
|
#else
|
|
m_metric = w.xyz();
|
|
#endif
|
|
m_metricSqr = m_metric * m_metric;
|
|
}
|
|
|
|
float ClusterFit::bestError() const
|
|
{
|
|
#if NVTT_USE_SIMD
|
|
SimdVector x = m_xxsum * m_metricSqr;
|
|
SimdVector error = m_besterror + x.splatX() + x.splatY() + x.splatZ();
|
|
return error.toFloat();
|
|
#else
|
|
return m_besterror + dot(m_xxsum, m_metricSqr);
|
|
#endif
|
|
|
|
}
|
|
|
|
#if NVTT_USE_SIMD
|
|
|
|
bool ClusterFit::compress3( Vector3 * start, Vector3 * end )
|
|
{
|
|
const int count = m_count;
|
|
const SimdVector one = SimdVector(1.0f);
|
|
const SimdVector zero = SimdVector(0.0f);
|
|
const SimdVector half(0.5f, 0.5f, 0.5f, 0.25f);
|
|
const SimdVector two = SimdVector(2.0);
|
|
const SimdVector grid( 31.0f, 63.0f, 31.0f, 0.0f );
|
|
const SimdVector gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
|
|
|
|
// declare variables
|
|
SimdVector beststart = SimdVector( 0.0f );
|
|
SimdVector bestend = SimdVector( 0.0f );
|
|
SimdVector besterror = SimdVector( FLT_MAX );
|
|
|
|
SimdVector x0 = zero;
|
|
|
|
int b0 = 0, b1 = 0;
|
|
|
|
// check all possible clusters for this total order
|
|
for( int c0 = 0; c0 <= count; c0++)
|
|
{
|
|
SimdVector x1 = zero;
|
|
|
|
for( int c1 = 0; c1 <= count-c0; c1++)
|
|
{
|
|
const SimdVector x2 = m_xsum - x1 - x0;
|
|
|
|
//Vector3 alphax_sum = x0 + x1 * 0.5f;
|
|
//float alpha2_sum = w0 + w1 * 0.25f;
|
|
const SimdVector alphax_sum = multiplyAdd(x1, half, x0); // alphax_sum, alpha2_sum
|
|
const SimdVector alpha2_sum = alphax_sum.splatW();
|
|
|
|
//const Vector3 betax_sum = x2 + x1 * 0.5f;
|
|
//const float beta2_sum = w2 + w1 * 0.25f;
|
|
const SimdVector betax_sum = multiplyAdd(x1, half, x2); // betax_sum, beta2_sum
|
|
const SimdVector beta2_sum = betax_sum.splatW();
|
|
|
|
//const float alphabeta_sum = w1 * 0.25f;
|
|
const SimdVector alphabeta_sum = (x1 * half).splatW(); // alphabeta_sum
|
|
|
|
// const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
|
|
const SimdVector factor = reciprocal( negativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) );
|
|
|
|
SimdVector a = negativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
|
|
SimdVector b = negativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
|
|
|
|
// clamp to the grid
|
|
a = min( one, max( zero, a ) );
|
|
b = min( one, max( zero, b ) );
|
|
a = truncate( multiplyAdd( grid, a, half ) ) * gridrcp;
|
|
b = truncate( multiplyAdd( grid, b, half ) ) * gridrcp;
|
|
|
|
// compute the error (we skip the constant xxsum)
|
|
SimdVector e1 = multiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
|
|
SimdVector e2 = negativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
|
|
SimdVector e3 = negativeMultiplySubtract( b, betax_sum, e2 );
|
|
SimdVector e4 = multiplyAdd( two, e3, e1 );
|
|
|
|
// apply the metric to the error term
|
|
SimdVector e5 = e4 * m_metricSqr;
|
|
SimdVector error = e5.splatX() + e5.splatY() + e5.splatZ();
|
|
|
|
// keep the solution if it wins
|
|
if( compareAnyLessThan( error, besterror ) )
|
|
{
|
|
besterror = error;
|
|
beststart = a;
|
|
bestend = b;
|
|
b0 = c0;
|
|
b1 = c1;
|
|
}
|
|
|
|
x1 += m_weighted[c0+c1];
|
|
}
|
|
|
|
x0 += m_weighted[c0];
|
|
}
|
|
|
|
// save the block if necessary
|
|
if( compareAnyLessThan( besterror, m_besterror ) )
|
|
{
|
|
|
|
*start = beststart.toVector3();
|
|
*end = bestend.toVector3();
|
|
|
|
// save the error
|
|
m_besterror = besterror;
|
|
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool ClusterFit::compress4( Vector3 * start, Vector3 * end )
|
|
{
|
|
const int count = m_count;
|
|
const SimdVector one = SimdVector(1.0f);
|
|
const SimdVector zero = SimdVector(0.0f);
|
|
const SimdVector half = SimdVector(0.5f);
|
|
const SimdVector two = SimdVector(2.0);
|
|
const SimdVector onethird( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
|
|
const SimdVector twothirds( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
|
|
const SimdVector twonineths = SimdVector( 2.0f/9.0f );
|
|
const SimdVector grid( 31.0f, 63.0f, 31.0f, 0.0f );
|
|
const SimdVector gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
|
|
|
|
// declare variables
|
|
SimdVector beststart = SimdVector( 0.0f );
|
|
SimdVector bestend = SimdVector( 0.0f );
|
|
SimdVector besterror = SimdVector( FLT_MAX );
|
|
|
|
SimdVector x0 = zero;
|
|
int b0 = 0, b1 = 0, b2 = 0;
|
|
|
|
// check all possible clusters for this total order
|
|
for( int c0 = 0; c0 <= count; c0++)
|
|
{
|
|
SimdVector x1 = zero;
|
|
|
|
for( int c1 = 0; c1 <= count-c0; c1++)
|
|
{
|
|
SimdVector x2 = zero;
|
|
|
|
for( int c2 = 0; c2 <= count-c0-c1; c2++)
|
|
{
|
|
const SimdVector x3 = m_xsum - x2 - x1 - x0;
|
|
|
|
//const Vector3 alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f);
|
|
//const float alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f);
|
|
const SimdVector alphax_sum = multiplyAdd(x2, onethird, multiplyAdd(x1, twothirds, x0)); // alphax_sum, alpha2_sum
|
|
const SimdVector alpha2_sum = alphax_sum.splatW();
|
|
|
|
//const Vector3 betax_sum = x3 + x2 * (2.0f / 3.0f) + x1 * (1.0f / 3.0f);
|
|
//const float beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f);
|
|
const SimdVector betax_sum = multiplyAdd(x2, twothirds, multiplyAdd(x1, onethird, x3)); // betax_sum, beta2_sum
|
|
const SimdVector beta2_sum = betax_sum.splatW();
|
|
|
|
//const float alphabeta_sum = (w1 + w2) * (2.0f/9.0f);
|
|
const SimdVector alphabeta_sum = twonineths*( x1 + x2 ).splatW(); // alphabeta_sum
|
|
|
|
//const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
|
|
const SimdVector factor = reciprocal( negativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) );
|
|
|
|
SimdVector a = negativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
|
|
SimdVector b = negativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
|
|
|
|
// clamp to the grid
|
|
a = min( one, max( zero, a ) );
|
|
b = min( one, max( zero, b ) );
|
|
a = truncate( multiplyAdd( grid, a, half ) ) * gridrcp;
|
|
b = truncate( multiplyAdd( grid, b, half ) ) * gridrcp;
|
|
|
|
// compute the error (we skip the constant xxsum)
|
|
SimdVector e1 = multiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
|
|
SimdVector e2 = negativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
|
|
SimdVector e3 = negativeMultiplySubtract( b, betax_sum, e2 );
|
|
SimdVector e4 = multiplyAdd( two, e3, e1 );
|
|
|
|
#if 1
|
|
// apply the metric to the error term
|
|
SimdVector e5 = e4 * m_metricSqr;
|
|
SimdVector error = e5.splatX() + e5.splatY() + e5.splatZ();
|
|
#else
|
|
// @@ Is there a horizontal max SIMD instruction?
|
|
SimdVector error = e4.splatX() + e4.splatY() + e4.splatZ();
|
|
error *= two;
|
|
error += max(max(e4.splatX(), e4.splatY()), e4.splatZ());
|
|
error -= min(min(e4.splatX(), e4.splatY()), e4.splatZ());
|
|
|
|
#endif
|
|
|
|
// keep the solution if it wins
|
|
if (compareAnyLessThan(error, besterror))
|
|
{
|
|
besterror = error;
|
|
beststart = a;
|
|
bestend = b;
|
|
b0 = c0;
|
|
b1 = c1;
|
|
b2 = c2;
|
|
}
|
|
|
|
x2 += m_weighted[c0+c1+c2];
|
|
}
|
|
|
|
x1 += m_weighted[c0+c1];
|
|
}
|
|
|
|
x0 += m_weighted[c0];
|
|
}
|
|
|
|
// save the block if necessary
|
|
if (compareAnyLessThan(besterror, m_besterror))
|
|
{
|
|
*start = beststart.toVector3();
|
|
*end = bestend.toVector3();
|
|
|
|
// save the error
|
|
m_besterror = besterror;
|
|
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
#else
|
|
|
|
inline Vector3 round565(const Vector3 & v) {
|
|
uint r = ftoi_trunc(v.x * 31.0f);
|
|
float r0 = float(((r+0) << 3) | ((r+0) >> 2));
|
|
float r1 = float(((r+1) << 3) | ((r+1) >> 2));
|
|
if (fabs(v.x - r1) < fabs(v.x - r0)) r = min(r+1, 31U);
|
|
r = (r << 3) | (r >> 2);
|
|
|
|
uint g = ftoi_trunc(v.y * 63.0f);
|
|
float g0 = float(((g+0) << 2) | ((g+0) >> 4));
|
|
float g1 = float(((g+1) << 2) | ((g+1) >> 4));
|
|
if (fabs(v.y - g1) < fabs(v.y - g0)) g = min(g+1, 63U);
|
|
g = (g << 2) | (g >> 4);
|
|
|
|
uint b = ftoi_trunc(v.z * 31.0f);
|
|
float b0 = float(((b+0) << 3) | ((b+0) >> 2));
|
|
float b1 = float(((b+1) << 3) | ((b+1) >> 2));
|
|
if (fabs(v.z - b1) < fabs(v.z - b0)) b = min(b+1, 31U);
|
|
|
|
b = (b << 3) | (b >> 2);
|
|
|
|
return Vector3(float(r)/255, float(g)/255, float(b)/255);
|
|
}
|
|
|
|
bool ClusterFit::compress3(Vector3 * start, Vector3 * end)
|
|
{
|
|
const uint count = m_count;
|
|
const Vector3 grid( 31.0f, 63.0f, 31.0f );
|
|
const Vector3 gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
|
|
|
|
// declare variables
|
|
Vector3 beststart( 0.0f );
|
|
Vector3 bestend( 0.0f );
|
|
float besterror = FLT_MAX;
|
|
|
|
Vector3 x0(0.0f);
|
|
float w0 = 0.0f;
|
|
|
|
int b0 = 0, b1 = 0;
|
|
|
|
// check all possible clusters for this total order
|
|
for (uint c0 = 0; c0 <= count; c0++)
|
|
{
|
|
Vector3 x1(0.0f);
|
|
float w1 = 0.0f;
|
|
|
|
for (uint c1 = 0; c1 <= count-c0; c1++)
|
|
{
|
|
float w2 = m_wsum - w0 - w1;
|
|
|
|
// These factors could be entirely precomputed.
|
|
float const alpha2_sum = w0 + w1 * 0.25f;
|
|
float const beta2_sum = w2 + w1 * 0.25f;
|
|
float const alphabeta_sum = w1 * 0.25f;
|
|
float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
|
|
|
|
Vector3 const alphax_sum = x0 + x1 * 0.5f;
|
|
Vector3 const betax_sum = m_xsum - alphax_sum;
|
|
|
|
Vector3 a = (alphax_sum*beta2_sum - betax_sum*alphabeta_sum) * factor;
|
|
Vector3 b = (betax_sum*alpha2_sum - alphax_sum*alphabeta_sum) * factor;
|
|
|
|
// clamp to the grid
|
|
a = clamp(a, 0, 1);
|
|
b = clamp(b, 0, 1);
|
|
#if 1
|
|
a = floor(grid * a + 0.5f) * gridrcp;
|
|
b = floor(grid * b + 0.5f) * gridrcp;
|
|
#else
|
|
|
|
//int ar = ftoi_round(31 * a.x); ar = (ar << 3) | (ar >> 2); a.x = float(ar) / 255.0f;
|
|
//int ag = ftoi_round(63 * a.y); ar = (ag << 2) | (ag >> 4); a.y = float(ag) / 255.0f;
|
|
//int ab = ftoi_round(31 * a.z); ar = (ab << 3) | (ab >> 2); a.z = float(ab) / 255.0f;
|
|
//int br = ftoi_round(31 * b.x); br = (br << 3) | (br >> 2); b.x = float(br) / 255.0f;
|
|
//int bg = ftoi_round(63 * b.y); br = (bg << 2) | (bg >> 4); b.y = float(bg) / 255.0f;
|
|
//int bb = ftoi_round(31 * b.z); br = (bb << 3) | (bb >> 2); b.z = float(bb) / 255.0f;
|
|
|
|
/*a = floor(a * grid + 0.5f);
|
|
a.x = (a.x * 8 + floorf(a.x / 4)) / 255.0f;
|
|
a.y = (a.y * 4 + floorf(a.y / 16)) / 255.0f;
|
|
a.z = (a.z * 8 + floorf(a.z / 4)) / 255.0f;
|
|
|
|
b = floor(b * grid + 0.5f);
|
|
b.x = (b.x * 8 + floorf(b.x / 4)) / 255.0f;
|
|
b.y = (b.y * 4 + floorf(b.y / 16)) / 255.0f;
|
|
b.z = (b.z * 8 + floorf(b.z / 4)) / 255.0f;*/
|
|
|
|
a = round565(a);
|
|
b = round565(b);
|
|
#endif
|
|
|
|
// compute the error
|
|
Vector3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum );
|
|
|
|
// apply the metric to the error term
|
|
float error = dot(e1, m_metricSqr);
|
|
|
|
// keep the solution if it wins
|
|
if (error < besterror)
|
|
{
|
|
besterror = error;
|
|
beststart = a;
|
|
bestend = b;
|
|
b0 = c0;
|
|
b1 = c1;
|
|
}
|
|
|
|
x1 += m_weighted[c0+c1];
|
|
w1 += m_weights[c0+c1];
|
|
}
|
|
|
|
x0 += m_weighted[c0];
|
|
w0 += m_weights[c0];
|
|
}
|
|
|
|
// save the block if necessary
|
|
if( besterror < m_besterror )
|
|
{
|
|
|
|
*start = beststart;
|
|
*end = bestend;
|
|
|
|
// save the error
|
|
m_besterror = besterror;
|
|
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool ClusterFit::compress4(Vector3 * start, Vector3 * end)
|
|
{
|
|
const uint count = m_count;
|
|
const Vector3 grid( 31.0f, 63.0f, 31.0f );
|
|
const Vector3 gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
|
|
|
|
// declare variables
|
|
Vector3 beststart( 0.0f );
|
|
Vector3 bestend( 0.0f );
|
|
float besterror = FLT_MAX;
|
|
|
|
Vector3 x0(0.0f);
|
|
float w0 = 0.0f;
|
|
int b0 = 0, b1 = 0, b2 = 0;
|
|
|
|
// check all possible clusters for this total order
|
|
for (uint c0 = 0; c0 <= count; c0++)
|
|
{
|
|
Vector3 x1(0.0f);
|
|
float w1 = 0.0f;
|
|
|
|
for (uint c1 = 0; c1 <= count-c0; c1++)
|
|
{
|
|
Vector3 x2(0.0f);
|
|
float w2 = 0.0f;
|
|
|
|
for (uint c2 = 0; c2 <= count-c0-c1; c2++)
|
|
{
|
|
float w3 = m_wsum - w0 - w1 - w2;
|
|
|
|
float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f);
|
|
float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f);
|
|
float const alphabeta_sum = (w1 + w2) * (2.0f/9.0f);
|
|
float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
|
|
|
|
Vector3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f);
|
|
Vector3 const betax_sum = m_xsum - alphax_sum;
|
|
|
|
Vector3 a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor;
|
|
Vector3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor;
|
|
|
|
// clamp to the grid
|
|
a = clamp(a, 0, 1);
|
|
b = clamp(b, 0, 1);
|
|
#if 0
|
|
a = floor(a * grid + 0.5f) * gridrcp;
|
|
b = floor(b * grid + 0.5f) * gridrcp;
|
|
#else
|
|
//int ar = ftoi_round(31 * a.x); ar = (ar << 3) | (ar >> 2); a.x = float(ar) / 255.0f;
|
|
//int ag = ftoi_round(63 * a.y); ar = (ag << 2) | (ag >> 4); a.y = float(ag) / 255.0f;
|
|
//int ab = ftoi_round(31 * a.z); ar = (ab << 3) | (ab >> 2); a.z = float(ab) / 255.0f;
|
|
//int br = ftoi_round(31 * b.x); br = (br << 3) | (br >> 2); b.x = float(br) / 255.0f;
|
|
//int bg = ftoi_round(63 * b.y); br = (bg << 2) | (bg >> 4); b.y = float(bg) / 255.0f;
|
|
//int bb = ftoi_round(31 * b.z); br = (bb << 3) | (bb >> 2); b.z = float(bb) / 255.0f;
|
|
|
|
/*
|
|
a = floor(a * grid + 0.5f);
|
|
a.x = (a.x * 8 + floorf(a.x / 4)) / 255.0f;
|
|
a.y = (a.y * 4 + floorf(a.y / 16)) / 255.0f;
|
|
a.z = (a.z * 8 + floorf(a.z / 4)) / 255.0f;
|
|
|
|
b = floor(b * grid + 0.5f);
|
|
b.x = (b.x * 8 + floorf(b.x / 4)) / 255.0f;
|
|
b.y = (b.y * 4 + floorf(b.y / 16)) / 255.0f;
|
|
b.z = (b.z * 8 + floorf(b.z / 4)) / 255.0f;
|
|
*/
|
|
|
|
a = round565(a);
|
|
b = round565(b);
|
|
#endif
|
|
// @@ It would be much more accurate to evaluate the error exactly.
|
|
|
|
// compute the error
|
|
Vector3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum );
|
|
|
|
// apply the metric to the error term
|
|
float error = dot( e1, m_metricSqr );
|
|
|
|
// keep the solution if it wins
|
|
if (error < besterror)
|
|
{
|
|
besterror = error;
|
|
beststart = a;
|
|
bestend = b;
|
|
b0 = c0;
|
|
b1 = c1;
|
|
b2 = c2;
|
|
}
|
|
|
|
x2 += m_weighted[c0+c1+c2];
|
|
w2 += m_weights[c0+c1+c2];
|
|
}
|
|
|
|
x1 += m_weighted[c0+c1];
|
|
w1 += m_weights[c0+c1];
|
|
}
|
|
|
|
x0 += m_weighted[c0];
|
|
w0 += m_weights[c0];
|
|
}
|
|
|
|
// save the block if necessary
|
|
if (besterror < m_besterror)
|
|
{
|
|
*start = beststart;
|
|
*end = bestend;
|
|
|
|
// save the error
|
|
m_besterror = besterror;
|
|
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
#endif // NVTT_USE_SIMD
|