Cleanup clusterfit.

This commit is contained in:
castano 2011-01-19 07:27:04 +00:00
parent 4f098c4ff9
commit 95b5e1decd
2 changed files with 418 additions and 526 deletions

View File

@ -1,27 +1,27 @@
/* ----------------------------------------------------------------------------- /* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Copyright (c) 2006 Ignacio Castano icastano@nvidia.com Copyright (c) 2006 Ignacio Castano icastano@nvidia.com
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including "Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish, without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to permit persons to whom the Software is furnished to do so, subject to
the following conditions: the following conditions:
The above copyright notice and this permission notice shall be included The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software. in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */ -------------------------------------------------------------------------- */
#include "ClusterFit.h" #include "ClusterFit.h"
@ -38,91 +38,90 @@ ClusterFit::ClusterFit()
void ClusterFit::setColourSet(const ColorSet * set) void ClusterFit::setColourSet(const ColorSet * set)
{ {
// initialise the best error // initialise the best error
#if NVTT_USE_SIMD #if NVTT_USE_SIMD
m_besterror = SimdVector( FLT_MAX ); m_besterror = SimdVector( FLT_MAX );
Vector3 metric = m_metric.toVector3(); Vector3 metric = m_metric.toVector3();
#else #else
m_besterror = FLT_MAX; m_besterror = FLT_MAX;
Vector3 metric = m_metric; Vector3 metric = m_metric;
#endif #endif
// cache some values // cache some values
count = set->count; m_count = set->count;
Vector3 values[16]; Vector3 values[16];
for (uint i = 0; i < count; i++) for (uint i = 0; i < m_count; i++)
{ {
values[i] = set->colors[i].xyz(); values[i] = set->colors[i].xyz();
} }
Vector3 principle = Fit::computePrincipalComponent(count, values, set->weights, metric);
Vector3 principle = Fit::computePrincipalComponent(m_count, values, set->weights, metric);
// build the list of values // build the list of values
float dps[16]; int order[16];
for (uint i = 0; i < count; ++i) float dps[16];
{ for (uint i = 0; i < m_count; ++i)
dps[i] = dot(values[i], principle); {
m_order[i] = i; dps[i] = dot(values[i], principle);
} order[i] = i;
}
// stable sort
for (uint i = 0; i < count; ++i) // stable sort
{ for (uint i = 0; i < m_count; ++i)
for (uint j = i; j > 0 && dps[j] < dps[j - 1]; --j) {
{ for (uint j = i; j > 0 && dps[j] < dps[j - 1]; --j)
swap( dps[j], dps[j - 1] ); {
swap( m_order[j], m_order[j - 1] ); swap(dps[j], dps[j - 1]);
} swap(order[j], order[j - 1]);
} }
}
// weight all the points
// weight all the points
#if NVTT_USE_SIMD #if NVTT_USE_SIMD
SimdVector const* unweighted = set->GetPointsSimd(); m_xxsum = SimdVector( 0.0f );
SimdVector const* weights = set->GetWeightsSimd(); m_xsum = SimdVector( 0.0f );
m_xxsum = SimdVector( 0.0f );
m_xsum = SimdVector( 0.0f );
#else #else
Vector3 const* unweighted = values; m_xsum = Vector3(0.0f);
float const* weights = set->weights; m_wsum = 0.0f;
m_xxsum = Vector3(0.0f);
m_xsum = Vector3(0.0f);
m_wsum = 0.0f;
#endif #endif
for (uint i = 0; i < count; ++i) for (uint i = 0; i < m_count; ++i)
{ {
int p = m_order[i]; int p = order[i];
m_weighted[i] = weights[p] * unweighted[p]; #if NVTT_USE_SIMD
m_xxsum += m_weighted[i] * m_weighted[i]; m_weighted[i] = SimdVector(Vector4(set->weights[p] * values[p], set->weights[p]));
m_xsum += m_weighted[i]; m_xxsum += m_weighted[i] * m_weighted[i];
#if !NVTT_USE_SIMD m_xsum += m_weighted[i];
m_weights[i] = weights[p]; #else
m_wsum += m_weights[i]; m_weighted[i] = values[p];
m_xxsum += m_weighted[i] * m_weighted[i];
m_xsum += m_weighted[i];
m_weights[i] = set->weights[p];
m_wsum += m_weights[i];
#endif #endif
} }
} }
void ClusterFit::setMetric(Vector4::Arg w) void ClusterFit::setMetric(Vector4::Arg w)
{ {
#if NVTT_USE_SIMD #if NVTT_USE_SIMD
m_metric = SimdVector(w); m_metric = SimdVector(Vector4(w.xyz(), 1));
#else #else
m_metric = w.xyz(); m_metric = w.xyz();
#endif #endif
m_metricSqr = m_metric * m_metric; m_metricSqr = m_metric * m_metric;
} }
float ClusterFit::bestError() const float ClusterFit::bestError() const
{ {
#if NVTT_USE_SIMD #if NVTT_USE_SIMD
SimdVector x = m_xxsum * m_metricSqr; SimdVector x = m_xxsum * m_metricSqr;
SimdVector error = m_besterror + x.splatX() + x.splatY() + x.splatZ(); SimdVector error = m_besterror + x.splatX() + x.splatY() + x.splatZ();
return error.toFloat(); return error.toFloat();
#else #else
return m_besterror + dot(m_xxsum, m_metricSqr); return m_besterror + dot(m_xxsum, m_metricSqr);
#endif #endif
} }
@ -131,251 +130,199 @@ float ClusterFit::bestError() const
bool ClusterFit::compress3( Vector3 * start, Vector3 * end ) bool ClusterFit::compress3( Vector3 * start, Vector3 * end )
{ {
int const count = m_colours->count; int const count = m_count;
SimdVector const one = SimdVector(1.0f); SimdVector const one = SimdVector(1.0f);
SimdVector const zero = SimdVector(0.0f); SimdVector const zero = SimdVector(0.0f);
SimdVector const half(0.5f, 0.5f, 0.5f, 0.25f); SimdVector const half(0.5f, 0.5f, 0.5f, 0.25f);
SimdVector const two = SimdVector(2.0); SimdVector const two = SimdVector(2.0);
SimdVector const grid( 31.0f, 63.0f, 31.0f, 0.0f ); SimdVector const grid( 31.0f, 63.0f, 31.0f, 0.0f );
SimdVector const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f ); SimdVector const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
// declare variables
SimdVector beststart = SimdVector( 0.0f );
SimdVector bestend = SimdVector( 0.0f );
SimdVector besterror = SimdVector( FLT_MAX );
SimdVector x0 = zero; // declare variables
SimdVector beststart = SimdVector( 0.0f );
SimdVector bestend = SimdVector( 0.0f );
SimdVector besterror = SimdVector( FLT_MAX );
SimdVector x0 = zero;
int b0 = 0, b1 = 0; int b0 = 0, b1 = 0;
// check all possible clusters for this total order // check all possible clusters for this total order
for( int c0 = 0; c0 <= count; c0++) for( int c0 = 0; c0 <= count; c0++)
{ {
SimdVector x1 = zero; SimdVector x1 = zero;
for( int c1 = 0; c1 <= count-c0; c1++)
{
SimdVector const x2 = m_xsum - x1 - x0;
//Vector3 const alphax_sum = x0 + x1 * 0.5f;
//float const alpha2_sum = w0 + w1 * 0.25f;
SimdVector const alphax_sum = multiplyAdd(x1, half, x0); // alphax_sum, alpha2_sum
SimdVector const alpha2_sum = alphax_sum.splatW();
//Vector3 const betax_sum = x2 + x1 * 0.5f;
//float const beta2_sum = w2 + w1 * 0.25f;
SimdVector const betax_sum = multiplyAdd(x1, half, x2); // betax_sum, beta2_sum
SimdVector const beta2_sum = betax_sum.splatW();
//float const alphabeta_sum = w1 * 0.25f;
SimdVector const alphabeta_sum = (x1 * half).splatW(); // alphabeta_sum
// float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
SimdVector const factor = reciprocal( negativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) );
SimdVector a = negativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
SimdVector b = negativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
// clamp to the grid
a = min( one, max( zero, a ) );
b = min( one, max( zero, b ) );
a = truncate( multiplyAdd( grid, a, half ) ) * gridrcp;
b = truncate( multiplyAdd( grid, b, half ) ) * gridrcp;
// compute the error (we skip the constant xxsum)
SimdVector e1 = multiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
SimdVector e2 = negativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
SimdVector e3 = negativeMultiplySubtract( b, betax_sum, e2 );
SimdVector e4 = multiplyAdd( two, e3, e1 );
// apply the metric to the error term for( int c1 = 0; c1 <= count-c0; c1++)
SimdVector e5 = e4 * m_metricSqr; {
SimdVector error = e5.splatX() + e5.splatY() + e5.splatZ(); SimdVector const x2 = m_xsum - x1 - x0;
// keep the solution if it wins //Vector3 const alphax_sum = x0 + x1 * 0.5f;
if( compareAnyLessThan( error, besterror ) ) //float const alpha2_sum = w0 + w1 * 0.25f;
{ SimdVector const alphax_sum = multiplyAdd(x1, half, x0); // alphax_sum, alpha2_sum
besterror = error; SimdVector const alpha2_sum = alphax_sum.splatW();
beststart = a;
bestend = b; //Vector3 const betax_sum = x2 + x1 * 0.5f;
b0 = c0; //float const beta2_sum = w2 + w1 * 0.25f;
b1 = c1; SimdVector const betax_sum = multiplyAdd(x1, half, x2); // betax_sum, beta2_sum
} SimdVector const beta2_sum = betax_sum.splatW();
x1 += m_weighted[c0+c1]; //float const alphabeta_sum = w1 * 0.25f;
} SimdVector const alphabeta_sum = (x1 * half).splatW(); // alphabeta_sum
x0 += m_weighted[c0]; // float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
SimdVector const factor = reciprocal( negativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) );
SimdVector a = negativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
SimdVector b = negativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
// clamp to the grid
a = min( one, max( zero, a ) );
b = min( one, max( zero, b ) );
a = truncate( multiplyAdd( grid, a, half ) ) * gridrcp;
b = truncate( multiplyAdd( grid, b, half ) ) * gridrcp;
// compute the error (we skip the constant xxsum)
SimdVector e1 = multiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
SimdVector e2 = negativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
SimdVector e3 = negativeMultiplySubtract( b, betax_sum, e2 );
SimdVector e4 = multiplyAdd( two, e3, e1 );
// apply the metric to the error term
SimdVector e5 = e4 * m_metricSqr;
SimdVector error = e5.splatX() + e5.splatY() + e5.splatZ();
// keep the solution if it wins
if( compareAnyLessThan( error, besterror ) )
{
besterror = error;
beststart = a;
bestend = b;
b0 = c0;
b1 = c1;
}
x1 += m_weighted[c0+c1];
} }
// save the block if necessary x0 += m_weighted[c0];
if( compareAnyLessThan( besterror, m_besterror ) ) }
{
// compute indices from cluster sizes.
/*u8 bestindices[16];
{
int i = 0;
for(; i < b0; i++) {
bestindices[i] = 0;
}
for(; i < b0+b1; i++) {
bestindices[i] = 2;
}
for(; i < count; i++) {
bestindices[i] = 1;
}
}
// remap the indices
u8 ordered[16];
for( int i = 0; i < count; ++i )
ordered[m_order[i]] = bestindices[i];
m_colours->RemapIndices( ordered, bestindices );
// save the block // save the block if necessary
WriteColourBlock3( beststart.toVector3(), bestend.toVector3(), bestindices, block );*/ if( compareAnyLessThan( besterror, m_besterror ) )
{
*start = beststart.toVector3(); *start = beststart.toVector3();
*end = bestend.toVector3(); *end = bestend.toVector3();
// save the error // save the error
m_besterror = besterror; m_besterror = besterror;
return true; return true;
} }
return false; return false;
} }
bool ClusterFit::compress4( Vector3 * start, Vector3 * end ) bool ClusterFit::compress4( Vector3 * start, Vector3 * end )
{ {
int const count = m_colours->count; int const count = m_count;
SimdVector const one = SimdVector(1.0f); SimdVector const one = SimdVector(1.0f);
SimdVector const zero = SimdVector(0.0f); SimdVector const zero = SimdVector(0.0f);
SimdVector const half = SimdVector(0.5f); SimdVector const half = SimdVector(0.5f);
SimdVector const two = SimdVector(2.0); SimdVector const two = SimdVector(2.0);
SimdVector const onethird( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f ); SimdVector const onethird( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
SimdVector const twothirds( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f ); SimdVector const twothirds( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
SimdVector const twonineths = SimdVector( 2.0f/9.0f ); SimdVector const twonineths = SimdVector( 2.0f/9.0f );
SimdVector const grid( 31.0f, 63.0f, 31.0f, 0.0f ); SimdVector const grid( 31.0f, 63.0f, 31.0f, 0.0f );
SimdVector const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f ); SimdVector const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
// declare variables
SimdVector beststart = SimdVector( 0.0f );
SimdVector bestend = SimdVector( 0.0f );
SimdVector besterror = SimdVector( FLT_MAX );
SimdVector x0 = zero; // declare variables
int b0 = 0, b1 = 0, b2 = 0; SimdVector beststart = SimdVector( 0.0f );
SimdVector bestend = SimdVector( 0.0f );
SimdVector besterror = SimdVector( FLT_MAX );
// check all possible clusters for this total order SimdVector x0 = zero;
for( int c0 = 0; c0 <= count; c0++) int b0 = 0, b1 = 0, b2 = 0;
{
SimdVector x1 = zero; // check all possible clusters for this total order
for( int c0 = 0; c0 <= count; c0++)
for( int c1 = 0; c1 <= count-c0; c1++) {
{ SimdVector x1 = zero;
SimdVector x2 = zero;
for( int c1 = 0; c1 <= count-c0; c1++)
for( int c2 = 0; c2 <= count-c0-c1; c2++) {
{ SimdVector x2 = zero;
SimdVector const x3 = m_xsum - x2 - x1 - x0;
for( int c2 = 0; c2 <= count-c0-c1; c2++)
//Vector3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f); {
//float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f); SimdVector const x3 = m_xsum - x2 - x1 - x0;
//Vector3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f);
//float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f);
SimdVector const alphax_sum = multiplyAdd(x2, onethird, multiplyAdd(x1, twothirds, x0)); // alphax_sum, alpha2_sum SimdVector const alphax_sum = multiplyAdd(x2, onethird, multiplyAdd(x1, twothirds, x0)); // alphax_sum, alpha2_sum
SimdVector const alpha2_sum = alphax_sum.splatW(); SimdVector const alpha2_sum = alphax_sum.splatW();
//Vector3 const betax_sum = x3 + x2 * (2.0f / 3.0f) + x1 * (1.0f / 3.0f);
//float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f);
SimdVector const betax_sum = multiplyAdd(x2, twothirds, multiplyAdd(x1, onethird, x3)); // betax_sum, beta2_sum
SimdVector const beta2_sum = betax_sum.splatW();
//float const alphabeta_sum = (w1 + w2) * (2.0f/9.0f);
SimdVector const alphabeta_sum = twonineths*( x1 + x2 ).splatW(); // alphabeta_sum
// float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
SimdVector const factor = reciprocal( negativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) );
SimdVector a = negativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
SimdVector b = negativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
// clamp to the grid
a = min( one, max( zero, a ) );
b = min( one, max( zero, b ) );
a = truncate( multiplyAdd( grid, a, half ) ) * gridrcp;
b = truncate( multiplyAdd( grid, b, half ) ) * gridrcp;
// compute the error (we skip the constant xxsum)
SimdVector e1 = multiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
SimdVector e2 = negativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
SimdVector e3 = negativeMultiplySubtract( b, betax_sum, e2 );
SimdVector e4 = multiplyAdd( two, e3, e1 );
// apply the metric to the error term //Vector3 const betax_sum = x3 + x2 * (2.0f / 3.0f) + x1 * (1.0f / 3.0f);
SimdVector e5 = e4 * m_metricSqr; //float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f);
SimdVector error = e5.splatX() + e5.splatY() + e5.splatZ(); SimdVector const betax_sum = multiplyAdd(x2, twothirds, multiplyAdd(x1, onethird, x3)); // betax_sum, beta2_sum
SimdVector const beta2_sum = betax_sum.splatW();
// keep the solution if it wins
if( compareAnyLessThan( error, besterror ) ) //float const alphabeta_sum = (w1 + w2) * (2.0f/9.0f);
{ SimdVector const alphabeta_sum = twonineths*( x1 + x2 ).splatW(); // alphabeta_sum
besterror = error;
beststart = a; // float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
bestend = b; SimdVector const factor = reciprocal( negativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) );
b0 = c0;
b1 = c1; SimdVector a = negativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
b2 = c2; SimdVector b = negativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
}
// clamp to the grid
x2 += m_weighted[c0+c1+c2]; a = min( one, max( zero, a ) );
} b = min( one, max( zero, b ) );
a = truncate( multiplyAdd( grid, a, half ) ) * gridrcp;
x1 += m_weighted[c0+c1]; b = truncate( multiplyAdd( grid, b, half ) ) * gridrcp;
// compute the error (we skip the constant xxsum)
SimdVector e1 = multiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
SimdVector e2 = negativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
SimdVector e3 = negativeMultiplySubtract( b, betax_sum, e2 );
SimdVector e4 = multiplyAdd( two, e3, e1 );
// apply the metric to the error term
SimdVector e5 = e4 * m_metricSqr;
SimdVector error = e5.splatX() + e5.splatY() + e5.splatZ();
// keep the solution if it wins
if( compareAnyLessThan( error, besterror ) )
{
besterror = error;
beststart = a;
bestend = b;
b0 = c0;
b1 = c1;
b2 = c2;
} }
x0 += m_weighted[c0]; x2 += m_weighted[c0+c1+c2];
}
x1 += m_weighted[c0+c1];
} }
// save the block if necessary x0 += m_weighted[c0];
if( compareAnyLessThan( besterror, m_besterror ) ) }
{
/*// compute indices from cluster sizes.
u8 bestindices[16];
{
int i = 0;
for(; i < b0; i++) {
bestindices[i] = 0;
}
for(; i < b0+b1; i++) {
bestindices[i] = 2;
}
for(; i < b0+b1+b2; i++) {
bestindices[i] = 3;
}
for(; i < count; i++) {
bestindices[i] = 1;
}
}
// remap the indices
u8 ordered[16];
for( int i = 0; i < count; ++i )
ordered[m_order[i]] = bestindices[i];
m_colours->RemapIndices( ordered, bestindices );
// save the block
WriteColourBlock4( beststart.toVector3(), bestend.toVector3(), bestindices, block );*/
// save the block if necessary
if( compareAnyLessThan( besterror, m_besterror ) )
{
*start = beststart.toVector3(); *start = beststart.toVector3();
*end = bestend.toVector3(); *end = bestend.toVector3();
// save the error // save the error
m_besterror = besterror; m_besterror = besterror;
return true; return true;
} }
return false; return false;
} }
@ -384,231 +331,181 @@ bool ClusterFit::compress4( Vector3 * start, Vector3 * end )
bool ClusterFit::compress3(Vector3 * start, Vector3 * end) bool ClusterFit::compress3(Vector3 * start, Vector3 * end)
{ {
const Vector3 one( 1.0f ); int const count = m_count;
const Vector3 zero( 0.0f ); const Vector3 one( 1.0f );
const Vector3 half( 0.5f ); const Vector3 zero( 0.0f );
const Vector3 half( 0.5f );
const Vector3 grid( 31.0f, 63.0f, 31.0f ); const Vector3 grid( 31.0f, 63.0f, 31.0f );
const Vector3 gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f ); const Vector3 gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
// declare variables // declare variables
Vector3 beststart( 0.0f ); Vector3 beststart( 0.0f );
Vector3 bestend( 0.0f ); Vector3 bestend( 0.0f );
float besterror = FLT_MAX; float besterror = FLT_MAX;
Vector3 x0(0.0f); Vector3 x0(0.0f);
float w0 = 0.0f; float w0 = 0.0f;
int b0 = 0, b1 = 0;
// check all possible clusters for this total order int b0 = 0, b1 = 0;
for (uint c0 = 0; c0 <= count; c0++)
{
Vector3 x1(0.0f);
float w1 = 0.0f;
for (uint c1 = 0; c1 <= count-c0; c1++)
{
float w2 = m_wsum - w0 - w1;
// These factors could be entirely precomputed.
float const alpha2_sum = w0 + w1 * 0.25f;
float const beta2_sum = w2 + w1 * 0.25f;
float const alphabeta_sum = w1 * 0.25f;
float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
Vector3 const alphax_sum = x0 + x1 * 0.5f;
Vector3 const betax_sum = m_xsum - alphax_sum;
Vector3 a = (alphax_sum*beta2_sum - betax_sum*alphabeta_sum) * factor;
Vector3 b = (betax_sum*alpha2_sum - alphax_sum*alphabeta_sum) * factor;
// clamp to the grid
a = min(one, max(zero, a));
b = min(one, max(zero, b));
a = floor(grid * a + half) * gridrcp;
b = floor(grid * b + half) * gridrcp;
// compute the error
Vector3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum );
// apply the metric to the error term
float error = dot(e1, m_metricSqr);
// keep the solution if it wins
if (error < besterror)
{
besterror = error;
beststart = a;
bestend = b;
b0 = c0;
b1 = c1;
}
x1 += m_weighted[c0+c1];
w1 += m_weights[c0+c1];
}
x0 += m_weighted[c0];
w0 += m_weights[c0];
}
// save the block if necessary // check all possible clusters for this total order
if( besterror < m_besterror ) for (uint c0 = 0; c0 <= count; c0++)
{ {
/*// compute indices from cluster sizes. Vector3 x1(0.0f);
u8 bestindices[16]; float w1 = 0.0f;
{
int i = 0;
for(; i < b0; i++) {
bestindices[i] = 0;
}
for(; i < b0+b1; i++) {
bestindices[i] = 2;
}
for(; i < count; i++) {
bestindices[i] = 1;
}
}
// remap the indices
u8 ordered[16];
for( int i = 0; i < count; ++i )
ordered[m_order[i]] = bestindices[i];
m_colours->RemapIndices( ordered, bestindices );
// save the block for (uint c1 = 0; c1 <= count-c0; c1++)
WriteColourBlock3( beststart, bestend, bestindices, block );*/ {
float w2 = m_wsum - w0 - w1;
// These factors could be entirely precomputed.
float const alpha2_sum = w0 + w1 * 0.25f;
float const beta2_sum = w2 + w1 * 0.25f;
float const alphabeta_sum = w1 * 0.25f;
float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
Vector3 const alphax_sum = x0 + x1 * 0.5f;
Vector3 const betax_sum = m_xsum - alphax_sum;
Vector3 a = (alphax_sum*beta2_sum - betax_sum*alphabeta_sum) * factor;
Vector3 b = (betax_sum*alpha2_sum - alphax_sum*alphabeta_sum) * factor;
// clamp to the grid
a = min(one, max(zero, a));
b = min(one, max(zero, b));
a = floor(grid * a + half) * gridrcp;
b = floor(grid * b + half) * gridrcp;
// compute the error
Vector3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum );
// apply the metric to the error term
float error = dot(e1, m_metricSqr);
// keep the solution if it wins
if (error < besterror)
{
besterror = error;
beststart = a;
bestend = b;
b0 = c0;
b1 = c1;
}
x1 += m_weighted[c0+c1];
w1 += m_weights[c0+c1];
}
x0 += m_weighted[c0];
w0 += m_weights[c0];
}
// save the block if necessary
if( besterror < m_besterror )
{
*start = beststart; *start = beststart;
*end = bestend; *end = bestend;
// save the error // save the error
m_besterror = besterror; m_besterror = besterror;
return true; return true;
} }
return false; return false;
} }
bool ClusterFit::compress4(Vector3 * start, Vector3 * end) bool ClusterFit::compress4(Vector3 * start, Vector3 * end)
{ {
Vector3 const one( 1.0f ); int const count = m_count;
Vector3 const zero( 0.0f ); Vector3 const one( 1.0f );
Vector3 const half( 0.5f ); Vector3 const zero( 0.0f );
Vector3 const grid( 31.0f, 63.0f, 31.0f ); Vector3 const half( 0.5f );
Vector3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f ); Vector3 const grid( 31.0f, 63.0f, 31.0f );
Vector3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
// declare variables // declare variables
Vector3 beststart( 0.0f ); Vector3 beststart( 0.0f );
Vector3 bestend( 0.0f ); Vector3 bestend( 0.0f );
float besterror = FLT_MAX; float besterror = FLT_MAX;
Vector3 x0(0.0f); Vector3 x0(0.0f);
float w0 = 0.0f; float w0 = 0.0f;
int b0 = 0, b1 = 0, b2 = 0; int b0 = 0, b1 = 0, b2 = 0;
// check all possible clusters for this total order // check all possible clusters for this total order
for (uint c0 = 0; c0 <= count; c0++) for (uint c0 = 0; c0 <= count; c0++)
{ {
Vector3 x1(0.0f); Vector3 x1(0.0f);
float w1 = 0.0f; float w1 = 0.0f;
for (uint c1 = 0; c1 <= count-c0; c1++)
{
Vector3 x2(0.0f);
float w2 = 0.0f;
for (uint c2 = 0; c2 <= count-c0-c1; c2++)
{
float w3 = m_wsum - w0 - w1 - w2;
float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f);
float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f);
float const alphabeta_sum = (w1 + w2) * (2.0f/9.0f);
float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
Vector3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f);
Vector3 const betax_sum = m_xsum - alphax_sum;
Vector3 a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor;
Vector3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor;
// clamp to the grid
a = min( one, max( zero, a ) );
b = min( one, max( zero, b ) );
a = floor( grid*a + half )*gridrcp;
b = floor( grid*b + half )*gridrcp;
// compute the error
Vector3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum );
// apply the metric to the error term
float error = dot( e1, m_metricSqr );
// keep the solution if it wins
if( error < besterror )
{
besterror = error;
beststart = a;
bestend = b;
b0 = c0;
b1 = c1;
b2 = c2;
}
x2 += m_weighted[c0+c1+c2];
w2 += m_weights[c0+c1+c2];
}
x1 += m_weighted[c0+c1];
w1 += m_weights[c0+c1];
}
x0 += m_weighted[c0];
w0 += m_weights[c0];
}
// save the block if necessary for (uint c1 = 0; c1 <= count-c0; c1++)
if( besterror < m_besterror ) {
{ Vector3 x2(0.0f);
/*// compute indices from cluster sizes. float w2 = 0.0f;
u8 bestindices[16];
{
int i = 0;
for(; i < b0; i++) {
bestindices[i] = 0;
}
for(; i < b0+b1; i++) {
bestindices[i] = 2;
}
for(; i < b0+b1+b2; i++) {
bestindices[i] = 3;
}
for(; i < count; i++) {
bestindices[i] = 1;
}
}
// remap the indices
u8 ordered[16];
for( int i = 0; i < count; ++i )
ordered[m_order[i]] = bestindices[i];
m_colours->RemapIndices( ordered, bestindices ); for (uint c2 = 0; c2 <= count-c0-c1; c2++)
{
// save the block float w3 = m_wsum - w0 - w1 - w2;
WriteColourBlock4( beststart, bestend, bestindices, block );*/
float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f);
float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f);
float const alphabeta_sum = (w1 + w2) * (2.0f/9.0f);
float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
Vector3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f);
Vector3 const betax_sum = m_xsum - alphax_sum;
Vector3 a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor;
Vector3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor;
// clamp to the grid
a = min( one, max( zero, a ) );
b = min( one, max( zero, b ) );
a = floor( grid*a + half )*gridrcp;
b = floor( grid*b + half )*gridrcp;
// compute the error
Vector3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum );
// apply the metric to the error term
float error = dot( e1, m_metricSqr );
// keep the solution if it wins
if( error < besterror )
{
besterror = error;
beststart = a;
bestend = b;
b0 = c0;
b1 = c1;
b2 = c2;
}
x2 += m_weighted[c0+c1+c2];
w2 += m_weights[c0+c1+c2];
}
x1 += m_weighted[c0+c1];
w1 += m_weights[c0+c1];
}
x0 += m_weighted[c0];
w0 += m_weights[c0];
}
// save the block if necessary
if( besterror < m_besterror )
{
*start = beststart; *start = beststart;
*end = bestend; *end = bestend;
// save the error // save the error
m_besterror = besterror; m_besterror = besterror;
return true; return true;
} }
return false; return false;
} }

View File

@ -1,26 +1,26 @@
/* ----------------------------------------------------------------------------- /* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Copyright (c) 2006 Ignacio Castano icastano@nvidia.com Copyright (c) 2006 Ignacio Castano icastano@nvidia.com
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including "Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish, without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to permit persons to whom the Software is furnished to do so, subject to
the following conditions: the following conditions:
The above copyright notice and this permission notice shall be included The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software. in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */ -------------------------------------------------------------------------- */
@ -39,42 +39,37 @@ namespace nv {
class ClusterFit class ClusterFit
{ {
public: public:
ClusterFit(); ClusterFit();
void setColourSet(const ColorSet * set);
void setColourSet(const ColorSet * set);
void setMetric(Vector4::Arg w); void setMetric(Vector4::Arg w);
float bestError() const; float bestError() const;
bool compress3(Vector3 * start, Vector3 * end);
bool compress4(Vector3 * start, Vector3 * end);
bool compress3(Vector3 * start, Vector3 * end);
bool compress4(Vector3 * start, Vector3 * end);
private: private:
uint count; uint m_count;
//ColorSet const* m_colours;
Vector3 m_principle;
#if NVTT_USE_SIMD #if NVTT_USE_SIMD
SimdVector m_weighted[16]; SimdVector m_weighted[16]; // color | weight
SimdVector m_metric; SimdVector m_metric; // vec3
SimdVector m_metricSqr; SimdVector m_metricSqr; // vec3
SimdVector m_xxsum; SimdVector m_xxsum; // color | weight
SimdVector m_xsum; SimdVector m_xsum; // color | weight (wsum)
SimdVector m_besterror; SimdVector m_besterror; // scalar
#else #else
Vector3 m_weighted[16]; Vector3 m_weighted[16];
float m_weights[16]; float m_weights[16];
Vector3 m_metric; Vector3 m_metric;
Vector3 m_metricSqr; Vector3 m_metricSqr;
Vector3 m_xxsum; Vector3 m_xxsum;
Vector3 m_xsum; Vector3 m_xsum;
float m_wsum; float m_wsum;
float m_besterror; float m_besterror;
#endif #endif
int m_order[16];
}; };
} // nv namespace } // nv namespace