From 0bb3011f7fba06f5ec36e48d7d473d8c71faf75e Mon Sep 17 00:00:00 2001
From: castano <castano@95f4ed2b-212e-0410-8b90-d31948207fce>
Date: Fri, 12 Nov 2010 03:35:28 +0000
Subject: [PATCH] Add cluster fit implementation based on squish.

---
 src/nvtt/ClusterFit.cpp | 614 ++++++++++++++++++++++++++++++++++++++++
 src/nvtt/ClusterFit.h   |  82 ++++++
 2 files changed, 696 insertions(+)
 create mode 100644 src/nvtt/ClusterFit.cpp
 create mode 100644 src/nvtt/ClusterFit.h

diff --git a/src/nvtt/ClusterFit.cpp b/src/nvtt/ClusterFit.cpp
new file mode 100644
index 0000000..7f21813
--- /dev/null
+++ b/src/nvtt/ClusterFit.cpp
@@ -0,0 +1,614 @@
+/* -----------------------------------------------------------------------------
+
+	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+	Copyright (c) 2006 Ignacio Castano                      icastano@nvidia.com
+
+	Permission is hereby granted, free of charge, to any person obtaining
+	a copy of this software and associated documentation files (the 
+	"Software"), to	deal in the Software without restriction, including
+	without limitation the rights to use, copy, modify, merge, publish,
+	distribute, sublicense, and/or sell copies of the Software, and to 
+	permit persons to whom the Software is furnished to do so, subject to 
+	the following conditions:
+
+	The above copyright notice and this permission notice shall be included
+	in all copies or substantial portions of the Software.
+
+	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
+	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
+	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
+	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+	
+   -------------------------------------------------------------------------- */
+
+#include "ClusterFit.h"
+#include "nvmath/Fitting.h"
+#include "nvimage/ColorBlock.h"
+
+using namespace nv;
+
+ClusterFit::ClusterFit()
+{
+}
+
+void ClusterFit::setColourSet(const ColorSet * set)
+{
+	// initialise the best error
+#if NVTT_USE_SIMD
+	m_besterror = SimdVector( FLT_MAX );
+	Vector3 metric = m_metric.toVector3();
+#else
+	m_besterror = FLT_MAX;
+	Vector3 metric = m_metric;
+#endif
+
+	// cache some values
+	count = set->count;
+
+    Vector3 values[16];
+    for (uint i = 0; i < count; i++)
+    {
+        values[i] = set->colors[i].xyz();
+    }
+	
+    Vector3 principle = Fit::computePrincipalComponent(count, values, set->weights, metric);
+
+
+	// build the list of values
+	float dps[16];
+	for (uint i = 0; i < count; ++i)
+	{
+		dps[i] = dot(values[i], principle);
+		m_order[i] = i;
+	}
+	
+	// stable sort
+	for (uint i = 0; i < count; ++i)
+	{
+		for (uint j = i; j > 0 && dps[j] < dps[j - 1]; --j)
+		{
+			swap( dps[j], dps[j - 1] );
+			swap( m_order[j], m_order[j - 1] );
+		}
+	}
+	
+	// weight all the points
+#if NVTT_USE_SIMD
+	SimdVector const* unweighted = set->GetPointsSimd();
+	SimdVector const* weights = set->GetWeightsSimd();
+	m_xxsum = SimdVector( 0.0f );
+	m_xsum = SimdVector( 0.0f );
+#else
+	Vector3 const* unweighted = values;
+	float const* weights = set->weights;
+	m_xxsum = Vector3(0.0f);
+	m_xsum = Vector3(0.0f);
+	m_wsum = 0.0f;	
+#endif
+	
+	for (uint i = 0; i < count; ++i)
+	{
+		int p = m_order[i];
+		m_weighted[i] = weights[p] * unweighted[p];
+		m_xxsum += m_weighted[i] * m_weighted[i];
+		m_xsum += m_weighted[i];
+#if !NVTT_USE_SIMD		
+		m_weights[i] = weights[p];
+		m_wsum += m_weights[i];
+#endif
+	}
+}
+
+
+void ClusterFit::setMetric(Vector4::Arg w)
+{
+#if NVTT_USE_SIMD
+	m_metric = SimdVector(w);
+#else
+	m_metric = w.xyz();
+#endif
+	m_metricSqr = m_metric * m_metric;
+}
+
+float ClusterFit::bestError() const
+{
+#if NVTT_USE_SIMD
+	SimdVector x = m_xxsum * m_metricSqr;
+	SimdVector error = m_besterror + x.splatX() + x.splatY() + x.splatZ();
+	return error.toFloat();
+#else
+	return m_besterror + dot(m_xxsum, m_metricSqr);
+#endif
+
+}
+
+#if NVTT_USE_SIMD
+
+bool ClusterFit::compress3( Vector3 * start, Vector3 * end )
+{
+    int const count = m_colours->count;
+	SimdVector const one = SimdVector(1.0f);
+	SimdVector const zero = SimdVector(0.0f);
+	SimdVector const half(0.5f, 0.5f, 0.5f, 0.25f);
+	SimdVector const two = SimdVector(2.0);
+	SimdVector const grid( 31.0f, 63.0f, 31.0f, 0.0f );
+	SimdVector const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
+	 
+	// declare variables
+	SimdVector beststart = SimdVector( 0.0f );
+	SimdVector bestend = SimdVector( 0.0f );
+	SimdVector besterror = SimdVector( FLT_MAX );
+
+	SimdVector x0 = zero;
+	
+	int b0 = 0, b1 = 0;
+
+	// check all possible clusters for this total order
+	for( int c0 = 0; c0 <= count; c0++)
+	{	
+		SimdVector x1 = zero;
+		
+		for( int c1 = 0; c1 <= count-c0; c1++)
+		{
+			SimdVector const x2 = m_xsum - x1 - x0;
+			
+			//Vector3 const alphax_sum = x0 + x1 * 0.5f;
+			//float const alpha2_sum = w0 + w1 * 0.25f;
+			SimdVector const alphax_sum = multiplyAdd(x1, half, x0); // alphax_sum, alpha2_sum
+			SimdVector const alpha2_sum = alphax_sum.splatW();
+			
+			//Vector3 const betax_sum = x2 + x1 * 0.5f;
+			//float const beta2_sum = w2 + w1 * 0.25f;
+			SimdVector const betax_sum = multiplyAdd(x1, half, x2); // betax_sum, beta2_sum
+			SimdVector const beta2_sum = betax_sum.splatW();
+			
+			//float const alphabeta_sum = w1 * 0.25f;
+			SimdVector const alphabeta_sum = (x1 * half).splatW(); // alphabeta_sum
+			
+			// float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
+			SimdVector const factor = reciprocal( negativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) );
+			
+			SimdVector a = negativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
+			SimdVector b = negativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
+			
+			// clamp to the grid
+			a = min( one, max( zero, a ) );
+			b = min( one, max( zero, b ) );
+			a = truncate( multiplyAdd( grid, a, half ) ) * gridrcp;
+			b = truncate( multiplyAdd( grid, b, half ) ) * gridrcp;
+			
+			// compute the error (we skip the constant xxsum)
+			SimdVector e1 = multiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
+			SimdVector e2 = negativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
+			SimdVector e3 = negativeMultiplySubtract( b, betax_sum, e2 );
+			SimdVector e4 = multiplyAdd( two, e3, e1 );
+
+			// apply the metric to the error term
+			SimdVector e5 = e4 * m_metricSqr;
+			SimdVector error = e5.splatX() + e5.splatY() + e5.splatZ();
+			
+			// keep the solution if it wins
+			if( compareAnyLessThan( error, besterror ) )
+			{
+				besterror = error;
+				beststart = a;
+				bestend = b;
+				b0 = c0;
+				b1 = c1;
+			}
+			
+			x1 += m_weighted[c0+c1];
+		}
+		
+		x0 += m_weighted[c0];
+	}
+
+	// save the block if necessary
+	if( compareAnyLessThan( besterror, m_besterror ) )
+	{
+		// compute indices from cluster sizes.
+		/*u8 bestindices[16];
+		{
+			int i = 0;
+			for(; i < b0; i++) {
+				bestindices[i] = 0;
+			}
+			for(; i < b0+b1; i++) {
+				bestindices[i] = 2;
+			}
+			for(; i < count; i++) {
+				bestindices[i] = 1;
+			}
+		}
+		
+		// remap the indices
+		u8 ordered[16];
+		for( int i = 0; i < count; ++i )
+			ordered[m_order[i]] = bestindices[i];
+		
+		m_colours->RemapIndices( ordered, bestindices );
+
+		// save the block
+		WriteColourBlock3( beststart.toVector3(), bestend.toVector3(), bestindices, block );*/
+
+        *start = beststart.toVector3();
+        *end = bestend.toVector3();
+
+		// save the error
+		m_besterror = besterror;
+
+        return true;
+	}
+
+    return false;
+}
+
+bool ClusterFit::compress4( Vector3 * start, Vector3 * end )
+{
+    int const count = m_colours->count;
+	SimdVector const one = SimdVector(1.0f);
+	SimdVector const zero = SimdVector(0.0f);
+	SimdVector const half = SimdVector(0.5f);
+	SimdVector const two = SimdVector(2.0);
+	SimdVector const onethird( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
+	SimdVector const twothirds( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
+    SimdVector const twonineths = SimdVector( 2.0f/9.0f );
+	SimdVector const grid( 31.0f, 63.0f, 31.0f, 0.0f );
+	SimdVector const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
+	
+	// declare variables
+	SimdVector beststart = SimdVector( 0.0f );
+	SimdVector bestend = SimdVector( 0.0f );
+	SimdVector besterror = SimdVector( FLT_MAX );
+
+	SimdVector x0 = zero;
+	int b0 = 0, b1 = 0, b2 = 0;
+
+	// check all possible clusters for this total order
+	for( int c0 = 0; c0 <= count; c0++)
+	{	
+		SimdVector x1 = zero;
+		
+		for( int c1 = 0; c1 <= count-c0; c1++)
+		{	
+			SimdVector x2 = zero;
+			
+			for( int c2 = 0; c2 <= count-c0-c1; c2++)
+			{
+				SimdVector const x3 = m_xsum - x2 - x1 - x0;
+				
+				//Vector3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f);
+				//float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f);
+                SimdVector const alphax_sum = multiplyAdd(x2, onethird, multiplyAdd(x1, twothirds, x0)); // alphax_sum, alpha2_sum
+				SimdVector const alpha2_sum = alphax_sum.splatW();
+				
+				//Vector3 const betax_sum = x3 + x2 * (2.0f / 3.0f) + x1 * (1.0f / 3.0f);
+				//float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f);
+				SimdVector const betax_sum = multiplyAdd(x2, twothirds, multiplyAdd(x1, onethird, x3)); // betax_sum, beta2_sum
+				SimdVector const beta2_sum = betax_sum.splatW();
+				
+				//float const alphabeta_sum = (w1 + w2) * (2.0f/9.0f);
+                SimdVector const alphabeta_sum = twonineths*( x1 + x2 ).splatW(); // alphabeta_sum
+				
+				// float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
+				SimdVector const factor = reciprocal( negativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) );
+				
+				SimdVector a = negativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
+				SimdVector b = negativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
+				
+				// clamp to the grid
+				a = min( one, max( zero, a ) );
+				b = min( one, max( zero, b ) );
+				a = truncate( multiplyAdd( grid, a, half ) ) * gridrcp;
+				b = truncate( multiplyAdd( grid, b, half ) ) * gridrcp;
+				
+				// compute the error (we skip the constant xxsum)
+				SimdVector e1 = multiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
+				SimdVector e2 = negativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
+				SimdVector e3 = negativeMultiplySubtract( b, betax_sum, e2 );
+				SimdVector e4 = multiplyAdd( two, e3, e1 );
+
+				// apply the metric to the error term
+				SimdVector e5 = e4 * m_metricSqr;
+				SimdVector error = e5.splatX() + e5.splatY() + e5.splatZ();
+				
+				// keep the solution if it wins
+				if( compareAnyLessThan( error, besterror ) )
+				{
+					besterror = error;
+					beststart = a;
+					bestend = b;
+					b0 = c0;
+					b1 = c1;
+					b2 = c2;
+				}
+				
+				x2 += m_weighted[c0+c1+c2];
+			}
+			
+			x1 += m_weighted[c0+c1];
+		}
+		
+		x0 += m_weighted[c0];
+	}
+
+	// save the block if necessary
+	if( compareAnyLessThan( besterror, m_besterror ) )
+	{
+		/*// compute indices from cluster sizes.
+		u8 bestindices[16];
+		{
+			int i = 0;
+			for(; i < b0; i++) {
+				bestindices[i] = 0;
+			}
+			for(; i < b0+b1; i++) {
+				bestindices[i] = 2;
+			}
+			for(; i < b0+b1+b2; i++) {
+				bestindices[i] = 3;
+			}
+			for(; i < count; i++) {
+				bestindices[i] = 1;
+			}
+		}
+		
+		// remap the indices
+		u8 ordered[16];
+		for( int i = 0; i < count; ++i )
+			ordered[m_order[i]] = bestindices[i];
+		
+        m_colours->RemapIndices( ordered, bestindices );
+
+		// save the block
+		WriteColourBlock4( beststart.toVector3(), bestend.toVector3(), bestindices, block );*/
+
+        *start = beststart.toVector3();
+        *end = bestend.toVector3();
+		
+		// save the error
+		m_besterror = besterror;
+
+        return true;
+	}
+
+    return false;
+}
+
+#else
+
+bool ClusterFit::compress3(Vector3 * start, Vector3 * end)
+{
+	const Vector3 one( 1.0f );
+	const Vector3 zero( 0.0f );
+	const Vector3 half( 0.5f );
+    const Vector3 grid( 31.0f, 63.0f, 31.0f );
+    const Vector3 gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
+
+	// declare variables
+	Vector3 beststart( 0.0f );
+	Vector3 bestend( 0.0f );
+	float besterror = FLT_MAX;
+
+	Vector3 x0(0.0f);
+	float w0 = 0.0f;
+	
+	int b0 = 0, b1 = 0;
+
+	// check all possible clusters for this total order
+	for (uint c0 = 0; c0 <= count; c0++)
+	{	
+		Vector3 x1(0.0f);
+		float w1 = 0.0f;
+		
+		for (uint c1 = 0; c1 <= count-c0; c1++)
+		{	
+			float w2 = m_wsum - w0 - w1;
+			
+			// These factors could be entirely precomputed.
+			float const alpha2_sum = w0 + w1 * 0.25f;
+			float const beta2_sum = w2 + w1 * 0.25f;
+			float const alphabeta_sum = w1 * 0.25f;
+			float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
+			
+			Vector3 const alphax_sum = x0 + x1 * 0.5f;
+			Vector3 const betax_sum = m_xsum - alphax_sum;
+			
+			Vector3 a = (alphax_sum*beta2_sum - betax_sum*alphabeta_sum) * factor;
+			Vector3 b = (betax_sum*alpha2_sum - alphax_sum*alphabeta_sum) * factor;
+			
+			// clamp to the grid
+			a = min(one, max(zero, a));
+			b = min(one, max(zero, b));
+			a = floor(grid * a + half) * gridrcp;
+			b = floor(grid * b + half) * gridrcp;
+			
+			// compute the error
+			Vector3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum );
+			
+			// apply the metric to the error term
+			float error = dot(e1, m_metricSqr);
+			
+			// keep the solution if it wins
+			if (error < besterror)
+			{
+				besterror = error;
+				beststart = a;
+				bestend = b;
+				b0 = c0;
+				b1 = c1;
+			}
+			
+			x1 += m_weighted[c0+c1];
+			w1 += m_weights[c0+c1];
+		}
+		
+		x0 += m_weighted[c0];
+		w0 += m_weights[c0];
+	}
+
+	// save the block if necessary
+	if( besterror < m_besterror )
+	{
+		/*// compute indices from cluster sizes.
+		u8 bestindices[16];
+		{
+			int i = 0;
+			for(; i < b0; i++) {
+				bestindices[i] = 0;
+			}
+			for(; i < b0+b1; i++) {
+				bestindices[i] = 2;
+			}
+			for(; i < count; i++) {
+				bestindices[i] = 1;
+			}
+		}
+		
+		// remap the indices
+		u8 ordered[16];
+		for( int i = 0; i < count; ++i )
+			ordered[m_order[i]] = bestindices[i];
+		
+        m_colours->RemapIndices( ordered, bestindices );
+
+		// save the block
+		WriteColourBlock3( beststart, bestend, bestindices, block );*/
+		
+        *start = beststart;
+        *end = bestend;
+
+		// save the error
+		m_besterror = besterror;
+
+        return true;
+	}
+
+    return false;
+}
+
+bool ClusterFit::compress4(Vector3 * start, Vector3 * end)
+{
+	Vector3 const one( 1.0f );
+	Vector3 const zero( 0.0f );
+	Vector3 const half( 0.5f );
+	Vector3 const grid( 31.0f, 63.0f, 31.0f );
+	Vector3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
+
+	// declare variables
+	Vector3 beststart( 0.0f );
+	Vector3 bestend( 0.0f );
+	float besterror = FLT_MAX;
+
+	Vector3 x0(0.0f);
+	float w0 = 0.0f;
+	int b0 = 0, b1 = 0, b2 = 0;
+
+	// check all possible clusters for this total order
+	for (uint c0 = 0; c0 <= count; c0++)
+	{	
+		Vector3 x1(0.0f);
+		float w1 = 0.0f;
+		
+		for (uint c1 = 0; c1 <= count-c0; c1++)
+		{	
+			Vector3 x2(0.0f);
+			float w2 = 0.0f;
+			
+			for (uint c2 = 0; c2 <= count-c0-c1; c2++)
+			{
+				float w3 = m_wsum - w0 - w1 - w2;
+				
+				float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f);
+				float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f);
+				float const alphabeta_sum = (w1 + w2) * (2.0f/9.0f);
+				float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
+				
+				Vector3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f);
+				Vector3 const betax_sum = m_xsum - alphax_sum;
+				
+				Vector3 a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor;
+				Vector3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor;
+				
+				// clamp to the grid
+				a = min( one, max( zero, a ) );
+				b = min( one, max( zero, b ) );
+				a = floor( grid*a + half )*gridrcp;
+				b = floor( grid*b + half )*gridrcp;
+				
+				// compute the error
+				Vector3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum );
+				
+				// apply the metric to the error term
+				float error = dot( e1, m_metricSqr );
+				
+				// keep the solution if it wins
+				if( error < besterror )
+				{
+					besterror = error;
+					beststart = a;
+					bestend = b;
+					b0 = c0;
+					b1 = c1;
+					b2 = c2;
+				}
+				
+				x2 += m_weighted[c0+c1+c2];
+				w2 += m_weights[c0+c1+c2];
+			}
+			
+			x1 += m_weighted[c0+c1];
+			w1 += m_weights[c0+c1];
+		}
+		
+		x0 += m_weighted[c0];
+		w0 += m_weights[c0];
+	}
+
+	// save the block if necessary
+	if( besterror < m_besterror )
+	{
+		/*// compute indices from cluster sizes.
+		u8 bestindices[16];
+		{
+			int i = 0;
+			for(; i < b0; i++) {
+				bestindices[i] = 0;
+			}
+			for(; i < b0+b1; i++) {
+				bestindices[i] = 2;
+			}
+			for(; i < b0+b1+b2; i++) {
+				bestindices[i] = 3;
+			}
+			for(; i < count; i++) {
+				bestindices[i] = 1;
+			}
+		}
+		
+		// remap the indices
+		u8 ordered[16];
+		for( int i = 0; i < count; ++i )
+			ordered[m_order[i]] = bestindices[i];
+
+        m_colours->RemapIndices( ordered, bestindices );
+		
+		// save the block
+		WriteColourBlock4( beststart, bestend, bestindices, block );*/
+
+        *start = beststart;
+        *end = bestend;
+
+		// save the error
+		m_besterror = besterror;
+
+        return true;
+	}
+
+    return false;
+}
+
+#endif // NVTT_USE_SIMD
diff --git a/src/nvtt/ClusterFit.h b/src/nvtt/ClusterFit.h
new file mode 100644
index 0000000..af3f84d
--- /dev/null
+++ b/src/nvtt/ClusterFit.h
@@ -0,0 +1,82 @@
+/* -----------------------------------------------------------------------------
+
+	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+	Copyright (c) 2006 Ignacio Castano                      icastano@nvidia.com
+
+	Permission is hereby granted, free of charge, to any person obtaining
+	a copy of this software and associated documentation files (the 
+	"Software"), to	deal in the Software without restriction, including
+	without limitation the rights to use, copy, modify, merge, publish,
+	distribute, sublicense, and/or sell copies of the Software, and to 
+	permit persons to whom the Software is furnished to do so, subject to 
+	the following conditions:
+
+	The above copyright notice and this permission notice shall be included
+	in all copies or substantial portions of the Software.
+
+	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
+	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
+	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
+	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+	
+   -------------------------------------------------------------------------- */
+   
+#ifndef NVTT_CLUSTERFIT_H
+#define NVTT_CLUSTERFIT_H
+
+#define NVTT_USE_SIMD 0
+
+#include "nvmath/SimdVector.h"
+#include "nvmath/Vector.h"
+
+namespace nv {
+
+    struct ColorSet;
+
+    class ClusterFit
+    {
+    public:
+	    ClusterFit();
+
+	    void setColourSet(const ColorSet * set);
+    	
+        void setMetric(Vector4::Arg w);
+	    float bestError() const;
+
+	    bool compress3(Vector3 * start, Vector3 * end);
+	    bool compress4(Vector3 * start, Vector3 * end);
+    	
+    private:
+
+        uint count;
+	    //ColorSet const* m_colours;
+
+        Vector3 m_principle;
+
+    #if NVTT_USE_SIMD
+        SimdVector m_weighted[16];
+	    SimdVector m_metric;
+	    SimdVector m_metricSqr;
+	    SimdVector m_xxsum;
+	    SimdVector m_xsum;
+	    SimdVector m_besterror;
+    #else
+	    Vector3 m_weighted[16];
+	    float m_weights[16];
+	    Vector3 m_metric;
+	    Vector3 m_metricSqr;
+	    Vector3 m_xxsum;
+	    Vector3 m_xsum;
+	    float m_wsum;
+	    float m_besterror;
+    #endif
+
+	    int m_order[16];
+    };
+
+} // nv namespace
+
+#endif // NVTT_CLUSTERFIT_H