diff --git a/src/nvtt/squish/fastclusterfit.cpp b/src/nvtt/squish/fastclusterfit.cpp index 8ae8ab5..3bb9fbe 100644 --- a/src/nvtt/squish/fastclusterfit.cpp +++ b/src/nvtt/squish/fastclusterfit.cpp @@ -129,6 +129,8 @@ void FastClusterFit::Compress3( void* block ) Vec4 const zero = VEC4_CONST(0.0f); Vec4 const half = VEC4_CONST(0.5f); Vec4 const two = VEC4_CONST(2.0); + Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f ); + Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f ); // declare variables Vec4 beststart = VEC4_CONST( 0.0f ); @@ -160,25 +162,22 @@ void FastClusterFit::Compress3( void* block ) Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor; Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor; - // clamp the output to [0, 1] + // clamp to the grid a = Min( one, Max( zero, a ) ); b = Min( one, Max( zero, b ) ); - - // clamp to the grid - Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f ); - Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f ); a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp; b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp; - // compute the error - Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum ); - Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); - Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 ); - + // compute the error (we skip the constant xxsum) + Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); + Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum ); + Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 ); + Vec4 e4 = MultiplyAdd( two, e3, e1 ); + // apply the metric to the error term - Vec4 e4 = e3 * m_metricSqr; - Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ(); - + Vec4 e5 = e4 * m_metricSqr; + Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ(); + // keep the solution if it wins if( CompareAnyLessThan( error, besterror ) ) { @@ -274,7 +273,7 @@ void FastClusterFit::Compress4( void* block ) Vec4 const factor = constants.SplatW(); i++; - Vec4 const alphax_sum = x0 + MultiplyAdd(x1, twothirds, x2 * onethird); + Vec4 const alphax_sum = MultiplyAdd(x2, onethird, MultiplyAdd(x1, twothirds, x0)); Vec4 const betax_sum = m_xsum - alphax_sum; Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor; @@ -286,18 +285,19 @@ void FastClusterFit::Compress4( void* block ) // clamp to the grid Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f ); - Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f ); + Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f ); a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp; b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp; - // compute the error - Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum ); - Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); - Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 ); - + // compute the error (we skip the constant xxsum) + Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); + Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum ); + Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 ); + Vec4 e4 = MultiplyAdd( two, e3, e1 ); + // apply the metric to the error term - Vec4 e4 = e3 * m_metricSqr; - Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ(); + Vec4 e5 = e4 * m_metricSqr; + Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ(); // keep the solution if it wins if( CompareAnyLessThan( error, besterror ) ) @@ -370,6 +370,12 @@ void FastClusterFit::Compress4( void* block ) void FastClusterFit::Compress3( void* block ) { + Vec3 const one( 1.0f ); + Vec3 const zero( 0.0f ); + Vec3 const half( 0.5f ); + Vec3 const grid( 31.0f, 63.0f, 31.0f ); + Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f ); + // declare variables Vec3 beststart( 0.0f ); Vec3 bestend( 0.0f ); @@ -399,16 +405,9 @@ void FastClusterFit::Compress3( void* block ) Vec3 a = (alphax_sum*beta2_sum - betax_sum*alphabeta_sum) * factor; Vec3 b = (betax_sum*alpha2_sum - alphax_sum*alphabeta_sum) * factor; - // clamp the output to [0, 1] - Vec3 const one( 1.0f ); - Vec3 const zero( 0.0f ); + // clamp to the grid a = Min( one, Max( zero, a ) ); b = Min( one, Max( zero, b ) ); - - // clamp to the grid - Vec3 const grid( 31.0f, 63.0f, 31.0f ); - Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f ); - Vec3 const half( 0.5f ); a = Floor( grid*a + half )*gridrcp; b = Floor( grid*b + half )*gridrcp; @@ -477,6 +476,12 @@ void FastClusterFit::Compress3( void* block ) void FastClusterFit::Compress4( void* block ) { + Vec3 const one( 1.0f ); + Vec3 const zero( 0.0f ); + Vec3 const half( 0.5f ); + Vec3 const grid( 31.0f, 63.0f, 31.0f ); + Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f ); + // declare variables Vec3 beststart( 0.0f ); Vec3 bestend( 0.0f ); @@ -511,16 +516,9 @@ void FastClusterFit::Compress4( void* block ) Vec3 a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor; Vec3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor; - // clamp the output to [0, 1] - Vec3 const one( 1.0f ); - Vec3 const zero( 0.0f ); + // clamp to the grid a = Min( one, Max( zero, a ) ); b = Min( one, Max( zero, b ) ); - - // clamp to the grid - Vec3 const grid( 31.0f, 63.0f, 31.0f ); - Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f ); - Vec3 const half( 0.5f ); a = Floor( grid*a + half )*gridrcp; b = Floor( grid*b + half )*gridrcp; diff --git a/src/nvtt/squish/maths.cpp b/src/nvtt/squish/maths.cpp index 87b4cd9..829b45d 100644 --- a/src/nvtt/squish/maths.cpp +++ b/src/nvtt/squish/maths.cpp @@ -59,28 +59,189 @@ Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weight return covariance; } +#if 1 Vec3 ComputePrincipleComponent( Sym3x3 const& matrix ) { const int NUM = 8; Vec3 v(1, 1, 1); - for(int i = 0; i < NUM; i++) { + for (int i = 0; i < NUM; i++) + { float x = v.X() * matrix[0] + v.Y() * matrix[1] + v.Z() * matrix[2]; float y = v.X() * matrix[1] + v.Y() * matrix[3] + v.Z() * matrix[4]; float z = v.X() * matrix[2] + v.Y() * matrix[4] + v.Z() * matrix[5]; float norm = std::max(std::max(x, y), z); + float iv = 1.0f / norm; - if (norm == 0.0f) { // @@ I think this is not necessary in this case!! - return Vec3(0.0f); - } - v = Vec3(x*iv, y*iv, z*iv); } return v; } +#else + +static Vec3 GetMultiplicity1Evector( Sym3x3 const& matrix, float evalue ) +{ + // compute M + Sym3x3 m; + m[0] = matrix[0] - evalue; + m[1] = matrix[1]; + m[2] = matrix[2]; + m[3] = matrix[3] - evalue; + m[4] = matrix[4]; + m[5] = matrix[5] - evalue; + + // compute U + Sym3x3 u; + u[0] = m[3]*m[5] - m[4]*m[4]; + u[1] = m[2]*m[4] - m[1]*m[5]; + u[2] = m[1]*m[4] - m[2]*m[3]; + u[3] = m[0]*m[5] - m[2]*m[2]; + u[4] = m[1]*m[2] - m[4]*m[0]; + u[5] = m[0]*m[3] - m[1]*m[1]; + + // find the largest component + float mc = std::fabs( u[0] ); + int mi = 0; + for( int i = 1; i < 6; ++i ) + { + float c = std::fabs( u[i] ); + if( c > mc ) + { + mc = c; + mi = i; + } + } + + // pick the column with this component + switch( mi ) + { + case 0: + return Vec3( u[0], u[1], u[2] ); + + case 1: + case 3: + return Vec3( u[1], u[3], u[4] ); + + default: + return Vec3( u[2], u[4], u[5] ); + } +} + +static Vec3 GetMultiplicity2Evector( Sym3x3 const& matrix, float evalue ) +{ + // compute M + Sym3x3 m; + m[0] = matrix[0] - evalue; + m[1] = matrix[1]; + m[2] = matrix[2]; + m[3] = matrix[3] - evalue; + m[4] = matrix[4]; + m[5] = matrix[5] - evalue; + + // find the largest component + float mc = std::fabs( m[0] ); + int mi = 0; + for( int i = 1; i < 6; ++i ) + { + float c = std::fabs( m[i] ); + if( c > mc ) + { + mc = c; + mi = i; + } + } + + // pick the first eigenvector based on this index + switch( mi ) + { + case 0: + case 1: + return Vec3( -m[1], m[0], 0.0f ); + + case 2: + return Vec3( m[2], 0.0f, -m[0] ); + + case 3: + case 4: + return Vec3( 0.0f, -m[4], m[3] ); + + default: + return Vec3( 0.0f, -m[5], m[4] ); + } +} + +Vec3 ComputePrincipleComponent( Sym3x3 const& matrix ) +{ + // compute the cubic coefficients + float c0 = matrix[0]*matrix[3]*matrix[5] + + 2.0f*matrix[1]*matrix[2]*matrix[4] + - matrix[0]*matrix[4]*matrix[4] + - matrix[3]*matrix[2]*matrix[2] + - matrix[5]*matrix[1]*matrix[1]; + float c1 = matrix[0]*matrix[3] + matrix[0]*matrix[5] + matrix[3]*matrix[5] + - matrix[1]*matrix[1] - matrix[2]*matrix[2] - matrix[4]*matrix[4]; + float c2 = matrix[0] + matrix[3] + matrix[5]; + + // compute the quadratic coefficients + float a = c1 - ( 1.0f/3.0f )*c2*c2; + float b = ( -2.0f/27.0f )*c2*c2*c2 + ( 1.0f/3.0f )*c1*c2 - c0; + + // compute the root count check + float Q = 0.25f*b*b + ( 1.0f/27.0f )*a*a*a; + + // test the multiplicity + if( FLT_EPSILON < Q ) + { + // only one root, which implies we have a multiple of the identity + return Vec3( 1.0f ); + } + else if( Q < -FLT_EPSILON ) + { + // three distinct roots + float theta = std::atan2( std::sqrt( -Q ), -0.5f*b ); + float rho = std::sqrt( 0.25f*b*b - Q ); + + float rt = std::pow( rho, 1.0f/3.0f ); + float ct = std::cos( theta/3.0f ); + float st = std::sin( theta/3.0f ); + + float l1 = ( 1.0f/3.0f )*c2 + 2.0f*rt*ct; + float l2 = ( 1.0f/3.0f )*c2 - rt*( ct + ( float )sqrt( 3.0f )*st ); + float l3 = ( 1.0f/3.0f )*c2 - rt*( ct - ( float )sqrt( 3.0f )*st ); + + // pick the larger + if( std::fabs( l2 ) > std::fabs( l1 ) ) + l1 = l2; + if( std::fabs( l3 ) > std::fabs( l1 ) ) + l1 = l3; + + // get the eigenvector + return GetMultiplicity1Evector( matrix, l1 ); + } + else // if( -FLT_EPSILON <= Q && Q <= FLT_EPSILON ) + { + // two roots + float rt; + if( b < 0.0f ) + rt = -std::pow( -0.5f*b, 1.0f/3.0f ); + else + rt = std::pow( 0.5f*b, 1.0f/3.0f ); + + float l1 = ( 1.0f/3.0f )*c2 + rt; // repeated + float l2 = ( 1.0f/3.0f )*c2 - 2.0f*rt; + + // get the eigenvector + if( std::fabs( l1 ) > std::fabs( l2 ) ) + return GetMultiplicity2Evector( matrix, l1 ); + else + return GetMultiplicity1Evector( matrix, l2 ); + } +} +#endif + } // namespace squish