Replace broken compressor with the latest version from trunk.

2.0.7
castano 15 years ago
parent 09ad232142
commit f817d49872

@ -1,28 +1,28 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Copyright (c) 2006 Ignacio Castano icastano@nvidia.com
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Copyright (c) 2006 Ignacio Castano icastano@nvidia.com
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include "weightedclusterfit.h"
#include "colourset.h"
@ -32,12 +32,12 @@
namespace squish {
WeightedClusterFit::WeightedClusterFit()
{
}
WeightedClusterFit::WeightedClusterFit()
{
}
void WeightedClusterFit::SetColourSet( ColourSet const* colours, int flags )
{
void WeightedClusterFit::SetColourSet( ColourSet const* colours, int flags )
{
ColourFit::SetColourSet( colours, flags );
// initialise the best error
@ -102,21 +102,21 @@ void WeightedClusterFit::SetColourSet( ColourSet const* colours, int flags )
m_wsum += m_weights[i];
#endif
}
}
}
void WeightedClusterFit::SetMetric(float r, float g, float b)
{
void WeightedClusterFit::SetMetric(float r, float g, float b)
{
#if SQUISH_USE_SIMD
m_metric = Vec4(r, g, b, 0);
#else
m_metric = Vec3(r, g, b);
#endif
m_metricSqr = m_metric * m_metric;
}
}
float WeightedClusterFit::GetBestError() const
{
float WeightedClusterFit::GetBestError() const
{
#if SQUISH_USE_SIMD
Vec4 x = m_xxsum * m_metricSqr;
Vec4 error = m_besterror + x.SplatX() + x.SplatY() + x.SplatZ();
@ -125,17 +125,19 @@ float WeightedClusterFit::GetBestError() const
return m_besterror + Dot(m_xxsum, m_metricSqr);
#endif
}
}
#if SQUISH_USE_SIMD
void WeightedClusterFit::Compress3( void* block )
{
void WeightedClusterFit::Compress3( void* block )
{
int const count = m_colours->GetCount();
Vec4 const one = VEC4_CONST(1.0f);
Vec4 const zero = VEC4_CONST(0.0f);
Vec4 const half(0.5f, 0.5f, 0.5f, 0.25f);
Vec4 const two = VEC4_CONST(2.0);
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
// declare variables
Vec4 beststart = VEC4_CONST( 0.0f );
@ -174,24 +176,21 @@ void WeightedClusterFit::Compress3( void* block )
Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
// clamp the output to [0, 1]
// clamp to the grid
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f );
a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp;
b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp;
// compute the error
Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum );
Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 );
// compute the error (we skip the constant xxsum)
Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
Vec4 e4 = MultiplyAdd( two, e3, e1 );
// apply the metric to the error term
Vec4 e4 = e3 * m_metricSqr;
Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ();
Vec4 e5 = e4 * m_metricSqr;
Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
// keep the solution if it wins
if( CompareAnyLessThan( error, besterror ) )
@ -222,7 +221,7 @@ void WeightedClusterFit::Compress3( void* block )
for(; i < b0+b1; i++) {
bestindices[i] = 2;
}
for(; i < 16; i++) {
for(; i < count; i++) {
bestindices[i] = 1;
}
}
@ -232,7 +231,7 @@ void WeightedClusterFit::Compress3( void* block )
for( int i = 0; i < count; ++i )
ordered[m_order[i]] = bestindices[i];
m_colours->RemapIndices( ordered, bestindices ); // Set alpha indices.
m_colours->RemapIndices( ordered, bestindices );
// save the block
@ -241,10 +240,10 @@ void WeightedClusterFit::Compress3( void* block )
// save the error
m_besterror = besterror;
}
}
}
void WeightedClusterFit::Compress4( void* block )
{
void WeightedClusterFit::Compress4( void* block )
{
int const count = m_colours->GetCount();
Vec4 const one = VEC4_CONST(1.0f);
Vec4 const zero = VEC4_CONST(0.0f);
@ -252,6 +251,9 @@ void WeightedClusterFit::Compress4( void* block )
Vec4 const two = VEC4_CONST(2.0);
Vec4 const onethird( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
Vec4 const twothirds( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f );
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
// declare variables
Vec4 beststart = VEC4_CONST( 0.0f );
@ -276,16 +278,16 @@ void WeightedClusterFit::Compress4( void* block )
//Vec3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f);
//float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f);
Vec4 const alphax_sum = x0 + MultiplyAdd(x1, twothirds, x2 * onethird); // alphax_sum, alpha2_sum
Vec4 const alphax_sum = MultiplyAdd(x2, onethird, MultiplyAdd(x1, twothirds, x0)); // alphax_sum, alpha2_sum
Vec4 const alpha2_sum = alphax_sum.SplatW();
//Vec3 const betax_sum = x3 + x2 * (2.0f / 3.0f) + x1 * (1.0f / 3.0f);
//float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f);
Vec4 const betax_sum = x3 + MultiplyAdd(x2, twothirds, x1 * onethird); // betax_sum, beta2_sum
Vec4 const betax_sum = MultiplyAdd(x2, twothirds, MultiplyAdd(x1, onethird, x3)); // betax_sum, beta2_sum
Vec4 const beta2_sum = betax_sum.SplatW();
//float const alphabeta_sum = w1 * (2.0f/9.0f) + w2 * (2.0f/9.0f);
Vec4 const alphabeta_sum = two * (x1 * onethird + x2 * onethird).SplatW(); // alphabeta_sum
//float const alphabeta_sum = (w1 + w2) * (2.0f/9.0f);
Vec4 const alphabeta_sum = twonineths*( x1 + x2 ).SplatW(); // alphabeta_sum
// float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
Vec4 const factor = Reciprocal( NegativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) );
@ -293,24 +295,21 @@ void WeightedClusterFit::Compress4( void* block )
Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
// clamp the output to [0, 1]
// clamp to the grid
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f );
a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp;
b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp;
// compute the error
Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum );
Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 );
// compute the error (we skip the constant xxsum)
Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
Vec4 e4 = MultiplyAdd( two, e3, e1 );
// apply the metric to the error term
Vec4 e4 = e3 * m_metricSqr;
Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ();
Vec4 e5 = e4 * m_metricSqr;
Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
// keep the solution if it wins
if( CompareAnyLessThan( error, besterror ) )
@ -348,7 +347,7 @@ void WeightedClusterFit::Compress4( void* block )
for(; i < b0+b1+b2; i++) {
bestindices[i] = 3;
}
for(; i < 16; i++) {
for(; i < count; i++) {
bestindices[i] = 1;
}
}
@ -358,18 +357,27 @@ void WeightedClusterFit::Compress4( void* block )
for( int i = 0; i < count; ++i )
ordered[m_order[i]] = bestindices[i];
m_colours->RemapIndices( ordered, bestindices );
// save the block
WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), ordered, block );
WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
// save the error
m_besterror = besterror;
}
}
}
#else
void WeightedClusterFit::Compress3( void* block )
{
void WeightedClusterFit::Compress3( void* block )
{
int const count = m_colours->GetCount();
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
Vec3 const half( 0.5f );
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
// declare variables
Vec3 beststart( 0.0f );
Vec3 bestend( 0.0f );
@ -381,12 +389,12 @@ void WeightedClusterFit::Compress3( void* block )
int b0 = 0, b1 = 0;
// check all possible clusters for this total order
for( int c0 = 0; c0 <= 16; c0++)
for( int c0 = 0; c0 <= count; c0++)
{
Vec3 x1(0.0f);
float w1 = 0.0f;
for( int c1 = 0; c1 <= 16-c0; c1++)
for( int c1 = 0; c1 <= count-c0; c1++)
{
float w2 = m_wsum - w0 - w1;
@ -402,16 +410,9 @@ void WeightedClusterFit::Compress3( void* block )
Vec3 a = (alphax_sum*beta2_sum - betax_sum*alphabeta_sum) * factor;
Vec3 b = (betax_sum*alpha2_sum - alphax_sum*alphabeta_sum) * factor;
// clamp the output to [0, 1]
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
// clamp to the grid
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f );
Vec3 const half( 0.5f );
a = Floor( grid*a + half )*gridrcp;
b = Floor( grid*b + half )*gridrcp;
@ -452,26 +453,35 @@ void WeightedClusterFit::Compress3( void* block )
for(; i < b0+b1; i++) {
bestindices[i] = 2;
}
for(; i < 16; i++) {
for(; i < count; i++) {
bestindices[i] = 1;
}
}
// remap the indices
u8 ordered[16];
for( int i = 0; i < 16; ++i )
for( int i = 0; i < count; ++i )
ordered[m_order[i]] = bestindices[i];
m_colours->RemapIndices( ordered, bestindices );
// save the block
WriteColourBlock3( beststart, bestend, ordered, block );
WriteColourBlock3( beststart, bestend, bestindices, block );
// save the error
m_besterror = besterror;
}
}
}
void WeightedClusterFit::Compress4( void* block )
{
int const count = m_colours->GetCount();
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
Vec3 const half( 0.5f );
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
void WeightedClusterFit::Compress4( void* block )
{
// declare variables
Vec3 beststart( 0.0f );
Vec3 bestend( 0.0f );
@ -482,17 +492,17 @@ void WeightedClusterFit::Compress4( void* block )
int b0 = 0, b1 = 0, b2 = 0;
// check all possible clusters for this total order
for( int c0 = 0; c0 <= 16; c0++)
for( int c0 = 0; c0 <= count; c0++)
{
Vec3 x1(0.0f);
float w1 = 0.0f;
for( int c1 = 0; c1 <= 16-c0; c1++)
for( int c1 = 0; c1 <= count-c0; c1++)
{
Vec3 x2(0.0f);
float w2 = 0.0f;
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
for( int c2 = 0; c2 <= count-c0-c1; c2++)
{
float w3 = m_wsum - w0 - w1 - w2;
@ -507,16 +517,9 @@ void WeightedClusterFit::Compress4( void* block )
Vec3 a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor;
Vec3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor;
// clamp the output to [0, 1]
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
// clamp to the grid
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f );
Vec3 const half( 0.5f );
a = Floor( grid*a + half )*gridrcp;
b = Floor( grid*b + half )*gridrcp;
@ -565,23 +568,25 @@ void WeightedClusterFit::Compress4( void* block )
for(; i < b0+b1+b2; i++) {
bestindices[i] = 3;
}
for(; i < 16; i++) {
for(; i < count; i++) {
bestindices[i] = 1;
}
}
// remap the indices
u8 ordered[16];
for( int i = 0; i < 16; ++i )
for( int i = 0; i < count; ++i )
ordered[m_order[i]] = bestindices[i];
m_colours->RemapIndices( ordered, bestindices );
// save the block
WriteColourBlock4( beststart, bestend, ordered, block );
WriteColourBlock4( beststart, bestend, bestindices, block );
// save the error
m_besterror = besterror;
}
}
}
#endif

Loading…
Cancel
Save