Merge optimizations from official squish release.
This commit is contained in:
@ -129,6 +129,8 @@ void FastClusterFit::Compress3( void* block )
|
||||
Vec4 const zero = VEC4_CONST(0.0f);
|
||||
Vec4 const half = VEC4_CONST(0.5f);
|
||||
Vec4 const two = VEC4_CONST(2.0);
|
||||
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
|
||||
Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
|
||||
|
||||
// declare variables
|
||||
Vec4 beststart = VEC4_CONST( 0.0f );
|
||||
@ -160,25 +162,22 @@ void FastClusterFit::Compress3( void* block )
|
||||
Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
|
||||
Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
|
||||
|
||||
// clamp the output to [0, 1]
|
||||
// clamp to the grid
|
||||
a = Min( one, Max( zero, a ) );
|
||||
b = Min( one, Max( zero, b ) );
|
||||
|
||||
// clamp to the grid
|
||||
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
|
||||
Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f );
|
||||
a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp;
|
||||
b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp;
|
||||
|
||||
// compute the error
|
||||
Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum );
|
||||
Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
|
||||
Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 );
|
||||
|
||||
// compute the error (we skip the constant xxsum)
|
||||
Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
|
||||
Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
|
||||
Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
|
||||
Vec4 e4 = MultiplyAdd( two, e3, e1 );
|
||||
|
||||
// apply the metric to the error term
|
||||
Vec4 e4 = e3 * m_metricSqr;
|
||||
Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ();
|
||||
|
||||
Vec4 e5 = e4 * m_metricSqr;
|
||||
Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
|
||||
|
||||
// keep the solution if it wins
|
||||
if( CompareAnyLessThan( error, besterror ) )
|
||||
{
|
||||
@ -274,7 +273,7 @@ void FastClusterFit::Compress4( void* block )
|
||||
Vec4 const factor = constants.SplatW();
|
||||
i++;
|
||||
|
||||
Vec4 const alphax_sum = x0 + MultiplyAdd(x1, twothirds, x2 * onethird);
|
||||
Vec4 const alphax_sum = MultiplyAdd(x2, onethird, MultiplyAdd(x1, twothirds, x0));
|
||||
Vec4 const betax_sum = m_xsum - alphax_sum;
|
||||
|
||||
Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
|
||||
@ -286,18 +285,19 @@ void FastClusterFit::Compress4( void* block )
|
||||
|
||||
// clamp to the grid
|
||||
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
|
||||
Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f );
|
||||
Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
|
||||
a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp;
|
||||
b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp;
|
||||
|
||||
// compute the error
|
||||
Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum );
|
||||
Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
|
||||
Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 );
|
||||
|
||||
// compute the error (we skip the constant xxsum)
|
||||
Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
|
||||
Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
|
||||
Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
|
||||
Vec4 e4 = MultiplyAdd( two, e3, e1 );
|
||||
|
||||
// apply the metric to the error term
|
||||
Vec4 e4 = e3 * m_metricSqr;
|
||||
Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ();
|
||||
Vec4 e5 = e4 * m_metricSqr;
|
||||
Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
|
||||
|
||||
// keep the solution if it wins
|
||||
if( CompareAnyLessThan( error, besterror ) )
|
||||
@ -370,6 +370,12 @@ void FastClusterFit::Compress4( void* block )
|
||||
|
||||
void FastClusterFit::Compress3( void* block )
|
||||
{
|
||||
Vec3 const one( 1.0f );
|
||||
Vec3 const zero( 0.0f );
|
||||
Vec3 const half( 0.5f );
|
||||
Vec3 const grid( 31.0f, 63.0f, 31.0f );
|
||||
Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
|
||||
|
||||
// declare variables
|
||||
Vec3 beststart( 0.0f );
|
||||
Vec3 bestend( 0.0f );
|
||||
@ -399,16 +405,9 @@ void FastClusterFit::Compress3( void* block )
|
||||
Vec3 a = (alphax_sum*beta2_sum - betax_sum*alphabeta_sum) * factor;
|
||||
Vec3 b = (betax_sum*alpha2_sum - alphax_sum*alphabeta_sum) * factor;
|
||||
|
||||
// clamp the output to [0, 1]
|
||||
Vec3 const one( 1.0f );
|
||||
Vec3 const zero( 0.0f );
|
||||
// clamp to the grid
|
||||
a = Min( one, Max( zero, a ) );
|
||||
b = Min( one, Max( zero, b ) );
|
||||
|
||||
// clamp to the grid
|
||||
Vec3 const grid( 31.0f, 63.0f, 31.0f );
|
||||
Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f );
|
||||
Vec3 const half( 0.5f );
|
||||
a = Floor( grid*a + half )*gridrcp;
|
||||
b = Floor( grid*b + half )*gridrcp;
|
||||
|
||||
@ -477,6 +476,12 @@ void FastClusterFit::Compress3( void* block )
|
||||
|
||||
void FastClusterFit::Compress4( void* block )
|
||||
{
|
||||
Vec3 const one( 1.0f );
|
||||
Vec3 const zero( 0.0f );
|
||||
Vec3 const half( 0.5f );
|
||||
Vec3 const grid( 31.0f, 63.0f, 31.0f );
|
||||
Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
|
||||
|
||||
// declare variables
|
||||
Vec3 beststart( 0.0f );
|
||||
Vec3 bestend( 0.0f );
|
||||
@ -511,16 +516,9 @@ void FastClusterFit::Compress4( void* block )
|
||||
Vec3 a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor;
|
||||
Vec3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor;
|
||||
|
||||
// clamp the output to [0, 1]
|
||||
Vec3 const one( 1.0f );
|
||||
Vec3 const zero( 0.0f );
|
||||
// clamp to the grid
|
||||
a = Min( one, Max( zero, a ) );
|
||||
b = Min( one, Max( zero, b ) );
|
||||
|
||||
// clamp to the grid
|
||||
Vec3 const grid( 31.0f, 63.0f, 31.0f );
|
||||
Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f );
|
||||
Vec3 const half( 0.5f );
|
||||
a = Floor( grid*a + half )*gridrcp;
|
||||
b = Floor( grid*b + half )*gridrcp;
|
||||
|
||||
|
Reference in New Issue
Block a user