From a5faf517382944ba68f2c0cd2e27bff467cd887e Mon Sep 17 00:00:00 2001 From: castano Date: Sat, 4 Jul 2009 19:33:55 +0000 Subject: [PATCH] Add simd power solver. --- src/nvtt/squish/maths.cpp | 202 +++++++------------------------------- 1 file changed, 35 insertions(+), 167 deletions(-) diff --git a/src/nvtt/squish/maths.cpp b/src/nvtt/squish/maths.cpp index 4243e7e..35934a3 100644 --- a/src/nvtt/squish/maths.cpp +++ b/src/nvtt/squish/maths.cpp @@ -24,6 +24,7 @@ -------------------------------------------------------------------------- */ #include "maths.h" +#include "simd.h" #include namespace nvsquish { @@ -59,189 +60,56 @@ Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weight return covariance; } -#if 1 + +#define POWER_ITERATION_COUNT 8 + +#if SQUISH_USE_SIMD Vec3 ComputePrincipleComponent( Sym3x3 const& matrix ) { - const int NUM = 8; + Vec4 const row0( matrix[0], matrix[1], matrix[2], 0.0f ); + Vec4 const row1( matrix[1], matrix[3], matrix[4], 0.0f ); + Vec4 const row2( matrix[2], matrix[4], matrix[5], 0.0f ); + Vec4 v = VEC4_CONST( 1.0f ); + for( int i = 0; i < POWER_ITERATION_COUNT; ++i ) + { + // matrix multiply + Vec4 w = row0*v.SplatX(); + w = MultiplyAdd(row1, v.SplatY(), w); + w = MultiplyAdd(row2, v.SplatZ(), w); + // get max component from xyz in all channels + Vec4 a = Max(w.SplatX(), Max(w.SplatY(), w.SplatZ())); + + // divide through and advance + v = w*Reciprocal(a); + } + return v.GetVec3(); +} + +#else + +Vec3 ComputePrincipleComponent( Sym3x3 const& matrix ) +{ Vec3 v(1, 1, 1); - for (int i = 0; i < NUM; i++) - { + for (int i = 0; i < POWER_ITERATION_COUNT; i++) + { float x = v.X() * matrix[0] + v.Y() * matrix[1] + v.Z() * matrix[2]; float y = v.X() * matrix[1] + v.Y() * matrix[3] + v.Z() * matrix[4]; float z = v.X() * matrix[2] + v.Y() * matrix[4] + v.Z() * matrix[5]; float norm = std::max(std::max(x, y), z); - float iv = 1.0f / norm; + if (norm == 0.0f) { // @@ I think this is not necessary in this case!! + return Vec3(0.0f); + } + v = Vec3(x*iv, y*iv, z*iv); } return v; } -#else - -static Vec3 GetMultiplicity1Evector( Sym3x3 const& matrix, float evalue ) -{ - // compute M - Sym3x3 m; - m[0] = matrix[0] - evalue; - m[1] = matrix[1]; - m[2] = matrix[2]; - m[3] = matrix[3] - evalue; - m[4] = matrix[4]; - m[5] = matrix[5] - evalue; - - // compute U - Sym3x3 u; - u[0] = m[3]*m[5] - m[4]*m[4]; - u[1] = m[2]*m[4] - m[1]*m[5]; - u[2] = m[1]*m[4] - m[2]*m[3]; - u[3] = m[0]*m[5] - m[2]*m[2]; - u[4] = m[1]*m[2] - m[4]*m[0]; - u[5] = m[0]*m[3] - m[1]*m[1]; - - // find the largest component - float mc = std::fabs( u[0] ); - int mi = 0; - for( int i = 1; i < 6; ++i ) - { - float c = std::fabs( u[i] ); - if( c > mc ) - { - mc = c; - mi = i; - } - } - - // pick the column with this component - switch( mi ) - { - case 0: - return Vec3( u[0], u[1], u[2] ); - - case 1: - case 3: - return Vec3( u[1], u[3], u[4] ); - - default: - return Vec3( u[2], u[4], u[5] ); - } -} - -static Vec3 GetMultiplicity2Evector( Sym3x3 const& matrix, float evalue ) -{ - // compute M - Sym3x3 m; - m[0] = matrix[0] - evalue; - m[1] = matrix[1]; - m[2] = matrix[2]; - m[3] = matrix[3] - evalue; - m[4] = matrix[4]; - m[5] = matrix[5] - evalue; - - // find the largest component - float mc = std::fabs( m[0] ); - int mi = 0; - for( int i = 1; i < 6; ++i ) - { - float c = std::fabs( m[i] ); - if( c > mc ) - { - mc = c; - mi = i; - } - } - - // pick the first eigenvector based on this index - switch( mi ) - { - case 0: - case 1: - return Vec3( -m[1], m[0], 0.0f ); - - case 2: - return Vec3( m[2], 0.0f, -m[0] ); - - case 3: - case 4: - return Vec3( 0.0f, -m[4], m[3] ); - - default: - return Vec3( 0.0f, -m[5], m[4] ); - } -} - -Vec3 ComputePrincipleComponent( Sym3x3 const& matrix ) -{ - // compute the cubic coefficients - float c0 = matrix[0]*matrix[3]*matrix[5] - + 2.0f*matrix[1]*matrix[2]*matrix[4] - - matrix[0]*matrix[4]*matrix[4] - - matrix[3]*matrix[2]*matrix[2] - - matrix[5]*matrix[1]*matrix[1]; - float c1 = matrix[0]*matrix[3] + matrix[0]*matrix[5] + matrix[3]*matrix[5] - - matrix[1]*matrix[1] - matrix[2]*matrix[2] - matrix[4]*matrix[4]; - float c2 = matrix[0] + matrix[3] + matrix[5]; - - // compute the quadratic coefficients - float a = c1 - ( 1.0f/3.0f )*c2*c2; - float b = ( -2.0f/27.0f )*c2*c2*c2 + ( 1.0f/3.0f )*c1*c2 - c0; - - // compute the root count check - float Q = 0.25f*b*b + ( 1.0f/27.0f )*a*a*a; - - // test the multiplicity - if( FLT_EPSILON < Q ) - { - // only one root, which implies we have a multiple of the identity - return Vec3( 1.0f ); - } - else if( Q < -FLT_EPSILON ) - { - // three distinct roots - float theta = std::atan2( std::sqrt( -Q ), -0.5f*b ); - float rho = std::sqrt( 0.25f*b*b - Q ); - - float rt = std::pow( rho, 1.0f/3.0f ); - float ct = std::cos( theta/3.0f ); - float st = std::sin( theta/3.0f ); - - float l1 = ( 1.0f/3.0f )*c2 + 2.0f*rt*ct; - float l2 = ( 1.0f/3.0f )*c2 - rt*( ct + ( float )sqrt( 3.0f )*st ); - float l3 = ( 1.0f/3.0f )*c2 - rt*( ct - ( float )sqrt( 3.0f )*st ); - - // pick the larger - if( std::fabs( l2 ) > std::fabs( l1 ) ) - l1 = l2; - if( std::fabs( l3 ) > std::fabs( l1 ) ) - l1 = l3; - - // get the eigenvector - return GetMultiplicity1Evector( matrix, l1 ); - } - else // if( -FLT_EPSILON <= Q && Q <= FLT_EPSILON ) - { - // two roots - float rt; - if( b < 0.0f ) - rt = -std::pow( -0.5f*b, 1.0f/3.0f ); - else - rt = std::pow( 0.5f*b, 1.0f/3.0f ); - - float l1 = ( 1.0f/3.0f )*c2 + rt; // repeated - float l2 = ( 1.0f/3.0f )*c2 - 2.0f*rt; - - // get the eigenvector - if( std::fabs( l1 ) > std::fabs( l2 ) ) - return GetMultiplicity2Evector( matrix, l1 ); - else - return GetMultiplicity1Evector( matrix, l2 ); - } -} #endif - -} // namespace squish +} // namespace nvsquish