diff --git a/src/nvtt/squish/maths.cpp b/src/nvtt/squish/maths.cpp index 87b4cd9..6e4d2ba 100644 --- a/src/nvtt/squish/maths.cpp +++ b/src/nvtt/squish/maths.cpp @@ -24,6 +24,7 @@ -------------------------------------------------------------------------- */ #include "maths.h" +#include "simd.h" #include namespace squish { @@ -60,12 +61,40 @@ Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weight } +#define POWER_ITERATION_COUNT 8 + +#if SQUISH_USE_SIMD + +Vec3 ComputePrincipleComponent( Sym3x3 const& matrix ) +{ + Vec4 const row0( matrix[0], matrix[1], matrix[2], 0.0f ); + Vec4 const row1( matrix[1], matrix[3], matrix[4], 0.0f ); + Vec4 const row2( matrix[2], matrix[4], matrix[5], 0.0f ); + Vec4 v = VEC4_CONST( 1.0f ); + for( int i = 0; i < POWER_ITERATION_COUNT; ++i ) + { + // matrix multiply + Vec4 w = row0*v.SplatX(); + w = MultiplyAdd(row1, v.SplatY(), w); + w = MultiplyAdd(row2, v.SplatZ(), w); + + // get max component from xyz in all channels + Vec4 a = Max(w.SplatX(), Max(w.SplatY(), w.SplatZ())); + + // divide through and advance + v = w*Reciprocal(a); + } + return v.GetVec3(); +} + +#else + Vec3 ComputePrincipleComponent( Sym3x3 const& matrix ) { const int NUM = 8; Vec3 v(1, 1, 1); - for(int i = 0; i < NUM; i++) { + for (int i = 0; i < POWER_ITERATION_COUNT; i++) { float x = v.X() * matrix[0] + v.Y() * matrix[1] + v.Z() * matrix[2]; float y = v.X() * matrix[1] + v.Y() * matrix[3] + v.Z() * matrix[4]; float z = v.X() * matrix[2] + v.Y() * matrix[4] + v.Z() * matrix[5]; @@ -82,5 +111,6 @@ Vec3 ComputePrincipleComponent( Sym3x3 const& matrix ) return v; } +#endif } // namespace squish