|
|
|
@ -136,6 +136,8 @@ void WeightedClusterFit::Compress3( void* block )
|
|
|
|
|
Vec4 const zero = VEC4_CONST(0.0f);
|
|
|
|
|
Vec4 const half(0.5f, 0.5f, 0.5f, 0.25f);
|
|
|
|
|
Vec4 const two = VEC4_CONST(2.0);
|
|
|
|
|
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
|
|
|
|
|
Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
|
|
|
|
|
|
|
|
|
|
// declare variables
|
|
|
|
|
Vec4 beststart = VEC4_CONST( 0.0f );
|
|
|
|
@ -174,24 +176,21 @@ void WeightedClusterFit::Compress3( void* block )
|
|
|
|
|
Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
|
|
|
|
|
Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
|
|
|
|
|
|
|
|
|
|
// clamp the output to [0, 1]
|
|
|
|
|
// clamp to the grid
|
|
|
|
|
a = Min( one, Max( zero, a ) );
|
|
|
|
|
b = Min( one, Max( zero, b ) );
|
|
|
|
|
|
|
|
|
|
// clamp to the grid
|
|
|
|
|
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
|
|
|
|
|
Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f );
|
|
|
|
|
a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp;
|
|
|
|
|
b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp;
|
|
|
|
|
|
|
|
|
|
// compute the error
|
|
|
|
|
Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum );
|
|
|
|
|
Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
|
|
|
|
|
Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 );
|
|
|
|
|
// compute the error (we skip the constant xxsum)
|
|
|
|
|
Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
|
|
|
|
|
Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
|
|
|
|
|
Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
|
|
|
|
|
Vec4 e4 = MultiplyAdd( two, e3, e1 );
|
|
|
|
|
|
|
|
|
|
// apply the metric to the error term
|
|
|
|
|
Vec4 e4 = e3 * m_metricSqr;
|
|
|
|
|
Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ();
|
|
|
|
|
Vec4 e5 = e4 * m_metricSqr;
|
|
|
|
|
Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
|
|
|
|
|
|
|
|
|
|
// keep the solution if it wins
|
|
|
|
|
if( CompareAnyLessThan( error, besterror ) )
|
|
|
|
@ -222,7 +221,7 @@ void WeightedClusterFit::Compress3( void* block )
|
|
|
|
|
for(; i < b0+b1; i++) {
|
|
|
|
|
bestindices[i] = 2;
|
|
|
|
|
}
|
|
|
|
|
for(; i < 16; i++) {
|
|
|
|
|
for(; i < count; i++) {
|
|
|
|
|
bestindices[i] = 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
@ -232,7 +231,7 @@ void WeightedClusterFit::Compress3( void* block )
|
|
|
|
|
for( int i = 0; i < count; ++i )
|
|
|
|
|
ordered[m_order[i]] = bestindices[i];
|
|
|
|
|
|
|
|
|
|
m_colours->RemapIndices( ordered, bestindices ); // Set alpha indices.
|
|
|
|
|
m_colours->RemapIndices( ordered, bestindices );
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// save the block
|
|
|
|
@ -252,6 +251,9 @@ void WeightedClusterFit::Compress4( void* block )
|
|
|
|
|
Vec4 const two = VEC4_CONST(2.0);
|
|
|
|
|
Vec4 const onethird( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
|
|
|
|
|
Vec4 const twothirds( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
|
|
|
|
|
Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f );
|
|
|
|
|
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
|
|
|
|
|
Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
|
|
|
|
|
|
|
|
|
|
// declare variables
|
|
|
|
|
Vec4 beststart = VEC4_CONST( 0.0f );
|
|
|
|
@ -276,16 +278,16 @@ void WeightedClusterFit::Compress4( void* block )
|
|
|
|
|
|
|
|
|
|
//Vec3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f);
|
|
|
|
|
//float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f);
|
|
|
|
|
Vec4 const alphax_sum = x0 + MultiplyAdd(x1, twothirds, x2 * onethird); // alphax_sum, alpha2_sum
|
|
|
|
|
Vec4 const alphax_sum = MultiplyAdd(x2, onethird, MultiplyAdd(x1, twothirds, x0)); // alphax_sum, alpha2_sum
|
|
|
|
|
Vec4 const alpha2_sum = alphax_sum.SplatW();
|
|
|
|
|
|
|
|
|
|
//Vec3 const betax_sum = x3 + x2 * (2.0f / 3.0f) + x1 * (1.0f / 3.0f);
|
|
|
|
|
//float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f);
|
|
|
|
|
Vec4 const betax_sum = x3 + MultiplyAdd(x2, twothirds, x1 * onethird); // betax_sum, beta2_sum
|
|
|
|
|
Vec4 const betax_sum = MultiplyAdd(x2, twothirds, MultiplyAdd(x1, onethird, x3)); // betax_sum, beta2_sum
|
|
|
|
|
Vec4 const beta2_sum = betax_sum.SplatW();
|
|
|
|
|
|
|
|
|
|
//float const alphabeta_sum = w1 * (2.0f/9.0f) + w2 * (2.0f/9.0f);
|
|
|
|
|
Vec4 const alphabeta_sum = two * (x1 * onethird + x2 * onethird).SplatW(); // alphabeta_sum
|
|
|
|
|
//float const alphabeta_sum = (w1 + w2) * (2.0f/9.0f);
|
|
|
|
|
Vec4 const alphabeta_sum = twonineths*( x1 + x2 ).SplatW(); // alphabeta_sum
|
|
|
|
|
|
|
|
|
|
// float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
|
|
|
|
|
Vec4 const factor = Reciprocal( NegativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) );
|
|
|
|
@ -293,24 +295,21 @@ void WeightedClusterFit::Compress4( void* block )
|
|
|
|
|
Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
|
|
|
|
|
Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
|
|
|
|
|
|
|
|
|
|
// clamp the output to [0, 1]
|
|
|
|
|
// clamp to the grid
|
|
|
|
|
a = Min( one, Max( zero, a ) );
|
|
|
|
|
b = Min( one, Max( zero, b ) );
|
|
|
|
|
|
|
|
|
|
// clamp to the grid
|
|
|
|
|
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
|
|
|
|
|
Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f );
|
|
|
|
|
a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp;
|
|
|
|
|
b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp;
|
|
|
|
|
|
|
|
|
|
// compute the error
|
|
|
|
|
Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum );
|
|
|
|
|
Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
|
|
|
|
|
Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 );
|
|
|
|
|
// compute the error (we skip the constant xxsum)
|
|
|
|
|
Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
|
|
|
|
|
Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
|
|
|
|
|
Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
|
|
|
|
|
Vec4 e4 = MultiplyAdd( two, e3, e1 );
|
|
|
|
|
|
|
|
|
|
// apply the metric to the error term
|
|
|
|
|
Vec4 e4 = e3 * m_metricSqr;
|
|
|
|
|
Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ();
|
|
|
|
|
Vec4 e5 = e4 * m_metricSqr;
|
|
|
|
|
Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
|
|
|
|
|
|
|
|
|
|
// keep the solution if it wins
|
|
|
|
|
if( CompareAnyLessThan( error, besterror ) )
|
|
|
|
@ -348,7 +347,7 @@ void WeightedClusterFit::Compress4( void* block )
|
|
|
|
|
for(; i < b0+b1+b2; i++) {
|
|
|
|
|
bestindices[i] = 3;
|
|
|
|
|
}
|
|
|
|
|
for(; i < 16; i++) {
|
|
|
|
|
for(; i < count; i++) {
|
|
|
|
|
bestindices[i] = 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
@ -358,8 +357,10 @@ void WeightedClusterFit::Compress4( void* block )
|
|
|
|
|
for( int i = 0; i < count; ++i )
|
|
|
|
|
ordered[m_order[i]] = bestindices[i];
|
|
|
|
|
|
|
|
|
|
m_colours->RemapIndices( ordered, bestindices );
|
|
|
|
|
|
|
|
|
|
// save the block
|
|
|
|
|
WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), ordered, block );
|
|
|
|
|
WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
|
|
|
|
|
|
|
|
|
|
// save the error
|
|
|
|
|
m_besterror = besterror;
|
|
|
|
@ -370,6 +371,13 @@ void WeightedClusterFit::Compress4( void* block )
|
|
|
|
|
|
|
|
|
|
void WeightedClusterFit::Compress3( void* block )
|
|
|
|
|
{
|
|
|
|
|
int const count = m_colours->GetCount();
|
|
|
|
|
Vec3 const one( 1.0f );
|
|
|
|
|
Vec3 const zero( 0.0f );
|
|
|
|
|
Vec3 const half( 0.5f );
|
|
|
|
|
Vec3 const grid( 31.0f, 63.0f, 31.0f );
|
|
|
|
|
Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
|
|
|
|
|
|
|
|
|
|
// declare variables
|
|
|
|
|
Vec3 beststart( 0.0f );
|
|
|
|
|
Vec3 bestend( 0.0f );
|
|
|
|
@ -381,12 +389,12 @@ void WeightedClusterFit::Compress3( void* block )
|
|
|
|
|
int b0 = 0, b1 = 0;
|
|
|
|
|
|
|
|
|
|
// check all possible clusters for this total order
|
|
|
|
|
for( int c0 = 0; c0 <= 16; c0++)
|
|
|
|
|
for( int c0 = 0; c0 <= count; c0++)
|
|
|
|
|
{
|
|
|
|
|
Vec3 x1(0.0f);
|
|
|
|
|
float w1 = 0.0f;
|
|
|
|
|
|
|
|
|
|
for( int c1 = 0; c1 <= 16-c0; c1++)
|
|
|
|
|
for( int c1 = 0; c1 <= count-c0; c1++)
|
|
|
|
|
{
|
|
|
|
|
float w2 = m_wsum - w0 - w1;
|
|
|
|
|
|
|
|
|
@ -402,16 +410,9 @@ void WeightedClusterFit::Compress3( void* block )
|
|
|
|
|
Vec3 a = (alphax_sum*beta2_sum - betax_sum*alphabeta_sum) * factor;
|
|
|
|
|
Vec3 b = (betax_sum*alpha2_sum - alphax_sum*alphabeta_sum) * factor;
|
|
|
|
|
|
|
|
|
|
// clamp the output to [0, 1]
|
|
|
|
|
Vec3 const one( 1.0f );
|
|
|
|
|
Vec3 const zero( 0.0f );
|
|
|
|
|
// clamp to the grid
|
|
|
|
|
a = Min( one, Max( zero, a ) );
|
|
|
|
|
b = Min( one, Max( zero, b ) );
|
|
|
|
|
|
|
|
|
|
// clamp to the grid
|
|
|
|
|
Vec3 const grid( 31.0f, 63.0f, 31.0f );
|
|
|
|
|
Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f );
|
|
|
|
|
Vec3 const half( 0.5f );
|
|
|
|
|
a = Floor( grid*a + half )*gridrcp;
|
|
|
|
|
b = Floor( grid*b + half )*gridrcp;
|
|
|
|
|
|
|
|
|
@ -452,18 +453,20 @@ void WeightedClusterFit::Compress3( void* block )
|
|
|
|
|
for(; i < b0+b1; i++) {
|
|
|
|
|
bestindices[i] = 2;
|
|
|
|
|
}
|
|
|
|
|
for(; i < 16; i++) {
|
|
|
|
|
for(; i < count; i++) {
|
|
|
|
|
bestindices[i] = 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// remap the indices
|
|
|
|
|
u8 ordered[16];
|
|
|
|
|
for( int i = 0; i < 16; ++i )
|
|
|
|
|
for( int i = 0; i < count; ++i )
|
|
|
|
|
ordered[m_order[i]] = bestindices[i];
|
|
|
|
|
|
|
|
|
|
m_colours->RemapIndices( ordered, bestindices );
|
|
|
|
|
|
|
|
|
|
// save the block
|
|
|
|
|
WriteColourBlock3( beststart, bestend, ordered, block );
|
|
|
|
|
WriteColourBlock3( beststart, bestend, bestindices, block );
|
|
|
|
|
|
|
|
|
|
// save the error
|
|
|
|
|
m_besterror = besterror;
|
|
|
|
@ -472,6 +475,13 @@ void WeightedClusterFit::Compress3( void* block )
|
|
|
|
|
|
|
|
|
|
void WeightedClusterFit::Compress4( void* block )
|
|
|
|
|
{
|
|
|
|
|
int const count = m_colours->GetCount();
|
|
|
|
|
Vec3 const one( 1.0f );
|
|
|
|
|
Vec3 const zero( 0.0f );
|
|
|
|
|
Vec3 const half( 0.5f );
|
|
|
|
|
Vec3 const grid( 31.0f, 63.0f, 31.0f );
|
|
|
|
|
Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
|
|
|
|
|
|
|
|
|
|
// declare variables
|
|
|
|
|
Vec3 beststart( 0.0f );
|
|
|
|
|
Vec3 bestend( 0.0f );
|
|
|
|
@ -482,17 +492,17 @@ void WeightedClusterFit::Compress4( void* block )
|
|
|
|
|
int b0 = 0, b1 = 0, b2 = 0;
|
|
|
|
|
|
|
|
|
|
// check all possible clusters for this total order
|
|
|
|
|
for( int c0 = 0; c0 <= 16; c0++)
|
|
|
|
|
for( int c0 = 0; c0 <= count; c0++)
|
|
|
|
|
{
|
|
|
|
|
Vec3 x1(0.0f);
|
|
|
|
|
float w1 = 0.0f;
|
|
|
|
|
|
|
|
|
|
for( int c1 = 0; c1 <= 16-c0; c1++)
|
|
|
|
|
for( int c1 = 0; c1 <= count-c0; c1++)
|
|
|
|
|
{
|
|
|
|
|
Vec3 x2(0.0f);
|
|
|
|
|
float w2 = 0.0f;
|
|
|
|
|
|
|
|
|
|
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
|
|
|
|
|
for( int c2 = 0; c2 <= count-c0-c1; c2++)
|
|
|
|
|
{
|
|
|
|
|
float w3 = m_wsum - w0 - w1 - w2;
|
|
|
|
|
|
|
|
|
@ -507,16 +517,9 @@ void WeightedClusterFit::Compress4( void* block )
|
|
|
|
|
Vec3 a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor;
|
|
|
|
|
Vec3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor;
|
|
|
|
|
|
|
|
|
|
// clamp the output to [0, 1]
|
|
|
|
|
Vec3 const one( 1.0f );
|
|
|
|
|
Vec3 const zero( 0.0f );
|
|
|
|
|
// clamp to the grid
|
|
|
|
|
a = Min( one, Max( zero, a ) );
|
|
|
|
|
b = Min( one, Max( zero, b ) );
|
|
|
|
|
|
|
|
|
|
// clamp to the grid
|
|
|
|
|
Vec3 const grid( 31.0f, 63.0f, 31.0f );
|
|
|
|
|
Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f );
|
|
|
|
|
Vec3 const half( 0.5f );
|
|
|
|
|
a = Floor( grid*a + half )*gridrcp;
|
|
|
|
|
b = Floor( grid*b + half )*gridrcp;
|
|
|
|
|
|
|
|
|
@ -565,18 +568,20 @@ void WeightedClusterFit::Compress4( void* block )
|
|
|
|
|
for(; i < b0+b1+b2; i++) {
|
|
|
|
|
bestindices[i] = 3;
|
|
|
|
|
}
|
|
|
|
|
for(; i < 16; i++) {
|
|
|
|
|
for(; i < count; i++) {
|
|
|
|
|
bestindices[i] = 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// remap the indices
|
|
|
|
|
u8 ordered[16];
|
|
|
|
|
for( int i = 0; i < 16; ++i )
|
|
|
|
|
for( int i = 0; i < count; ++i )
|
|
|
|
|
ordered[m_order[i]] = bestindices[i];
|
|
|
|
|
|
|
|
|
|
m_colours->RemapIndices( ordered, bestindices );
|
|
|
|
|
|
|
|
|
|
// save the block
|
|
|
|
|
WriteColourBlock4( beststart, bestend, ordered, block );
|
|
|
|
|
WriteColourBlock4( beststart, bestend, bestindices, block );
|
|
|
|
|
|
|
|
|
|
// save the error
|
|
|
|
|
m_besterror = besterror;
|
|
|
|
|