Minor optimizations.

Fix DXT1a support.
This commit is contained in:
castano 2008-02-04 10:01:43 +00:00
parent 42da2d63ce
commit 7b433f08f3
6 changed files with 56 additions and 29 deletions

View File

@ -43,10 +43,11 @@ void ColourFit::Compress( void* block )
bool isDxt1 = ( ( m_flags & kDxt1 ) != 0 ); bool isDxt1 = ( ( m_flags & kDxt1 ) != 0 );
if( isDxt1 ) if( isDxt1 )
{ {
Compress4( block ); Compress3( block );
if( !m_colours->IsTransparent() ) if( !m_colours->IsTransparent() )
{ {
Compress3( block ); Compress4( block );
} }
} }
else else

View File

@ -43,16 +43,16 @@ ColourSet::ColourSet( u8 const* rgba, int flags, bool createMinimalSet/*=false*/
// create the minimal set // create the minimal set
for( int i = 0; i < 16; ++i ) for( int i = 0; i < 16; ++i )
{ {
// check for transparent pixels when using dxt1
if( isDxt1 && rgba[4*i + 3] == 0 )
{
m_remap[i] = -1;
m_transparent = true;
if (createMinimalSet) continue;
}
if (createMinimalSet) if (createMinimalSet)
{ {
// check for transparent pixels when using dxt1
if( isDxt1 && rgba[4*i + 3] == 0 )
{
m_remap[i] = -1;
m_transparent = true;
continue;
}
// loop over previous points for a match // loop over previous points for a match
for( int j = 0;; ++j ) for( int j = 0;; ++j )
{ {
@ -100,6 +100,17 @@ ColourSet::ColourSet( u8 const* rgba, int flags, bool createMinimalSet/*=false*/
} }
else else
{ {
// check for transparent pixels when using dxt1
if( isDxt1 && rgba[4*i + 3] == 0 )
{
m_remap[i] = -1;
m_transparent = true;
}
else
{
m_remap[i] = m_count;
}
// normalise coordinates to [0,1] // normalise coordinates to [0,1]
float x = ( float )rgba[4*i + 2] / 255.0f; float x = ( float )rgba[4*i + 2] / 255.0f;
float y = ( float )rgba[4*i + 1] / 255.0f; float y = ( float )rgba[4*i + 1] / 255.0f;
@ -111,7 +122,6 @@ ColourSet::ColourSet( u8 const* rgba, int flags, bool createMinimalSet/*=false*/
// add the point // add the point
m_points[m_count] = Vec3( x, y, z ); m_points[m_count] = Vec3( x, y, z );
m_weights[m_count] = ( weightByAlpha ? w : 1.0f ); m_weights[m_count] = ( weightByAlpha ? w : 1.0f );
m_remap[i] = m_count;
// advance // advance
++m_count; ++m_count;

View File

@ -104,8 +104,8 @@ struct Precomp {
float factor; float factor;
}; };
static Precomp s_threeElement[153]; static SQUISH_ALIGN_16 Precomp s_threeElement[153];
static Precomp s_fourElement[969]; static SQUISH_ALIGN_16 Precomp s_fourElement[969];
void FastClusterFit::DoPrecomputation() void FastClusterFit::DoPrecomputation()
{ {
@ -229,11 +229,12 @@ void FastClusterFit::Compress3( void* block )
x1 = zero; x1 = zero;
for( int c1 = 0; c1 <= 16-c0; c1++) for( int c1 = 0; c1 <= 16-c0; c1++)
{ {
Vec4 const alpha2_sum = VEC4_CONST(s_threeElement[i].alpha2_sum); Vec4 const constants = Vec4((const float *)&s_threeElement[i]);
Vec4 const beta2_sum = VEC4_CONST(s_threeElement[i].beta2_sum); Vec4 const alpha2_sum = constants.SplatX();
Vec4 const alphabeta_sum = VEC4_CONST(s_threeElement[i].alphabeta_sum); Vec4 const beta2_sum = constants.SplatY();
Vec4 const factor = VEC4_CONST(s_threeElement[i].factor); Vec4 const alphabeta_sum = constants.SplatZ();
Vec4 const factor = constants.SplatW();
i++; i++;
Vec4 const alphax_sum = MultiplyAdd(half, x1, x0); Vec4 const alphax_sum = MultiplyAdd(half, x1, x0);
@ -310,6 +311,8 @@ void FastClusterFit::Compress3( void* block )
for( int i = 0; i < 16; ++i ) for( int i = 0; i < 16; ++i )
ordered[m_order[i]] = bestindices[i]; ordered[m_order[i]] = bestindices[i];
m_colours->RemapIndices( ordered, bestindices ); // Set alpha indices.
// save the block // save the block
WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), ordered, block ); WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), ordered, block );
@ -347,10 +350,11 @@ void FastClusterFit::Compress4( void* block )
for( int c2 = 0; c2 <= 16-c0-c1; c2++) for( int c2 = 0; c2 <= 16-c0-c1; c2++)
{ {
Vec4 const alpha2_sum = VEC4_CONST(s_fourElement[i].alpha2_sum); Vec4 const constants = Vec4((const float *)&s_fourElement[i]);
Vec4 const beta2_sum = VEC4_CONST(s_fourElement[i].beta2_sum); Vec4 const alpha2_sum = constants.SplatX();
Vec4 const alphabeta_sum = VEC4_CONST(s_fourElement[i].alphabeta_sum); Vec4 const beta2_sum = constants.SplatY();
Vec4 const factor = VEC4_CONST(s_fourElement[i].factor); Vec4 const alphabeta_sum = constants.SplatZ();
Vec4 const factor = constants.SplatW();
i++; i++;
Vec4 const alphax_sum = x0 + MultiplyAdd(x1, twothirds, x2 * onethird); Vec4 const alphax_sum = x0 + MultiplyAdd(x1, twothirds, x2 * onethird);

View File

@ -35,6 +35,12 @@
#define SQUISH_SSE_SPLAT( a ) \ #define SQUISH_SSE_SPLAT( a ) \
( ( a ) | ( ( a ) << 2 ) | ( ( a ) << 4 ) | ( ( a ) << 6 ) ) ( ( a ) | ( ( a ) << 2 ) | ( ( a ) << 4 ) | ( ( a ) << 6 ) )
#ifdef __GNUC__
# define SQUISH_ALIGN_16 __attribute__ ((__aligned__ (16)))
#else
# define SQUISH_ALIGN_16 __declspec(align(16))
#endif
namespace squish { namespace squish {
#define VEC4_CONST( X ) Vec4( _mm_set1_ps( X ) ) #define VEC4_CONST( X ) Vec4( _mm_set1_ps( X ) )
@ -55,7 +61,12 @@ public:
m_v = arg.m_v; m_v = arg.m_v;
return *this; return *this;
} }
Vec4( const float * v )
{
m_v = _mm_load_ps( v );
}
Vec4( float x, float y, float z, float w ) Vec4( float x, float y, float z, float w )
{ {
m_v = _mm_setr_ps( x, y, z, w ); m_v = _mm_setr_ps( x, y, z, w );
@ -63,11 +74,7 @@ public:
Vec3 GetVec3() const Vec3 GetVec3() const
{ {
#ifdef __GNUC__ SQUISH_ALIGN_16 float c[4];
__attribute__ ((__aligned__ (16))) float c[4];
#else
__declspec(align(16)) float c[4];
#endif
_mm_store_ps( c, m_v ); _mm_store_ps( c, m_v );
return Vec3( c[0], c[1], c[2] ); return Vec3( c[0], c[1], c[2] );
} }

View File

@ -29,6 +29,8 @@
#include <altivec.h> #include <altivec.h>
#undef bool #undef bool
#define SQUISH_ALIGN_16 __attribute__ ((__aligned__ (16)))
namespace squish { namespace squish {
#define VEC4_CONST( X ) Vec4( ( vector float )( X ) ) #define VEC4_CONST( X ) Vec4( ( vector float )( X ) )

View File

@ -231,8 +231,11 @@ void WeightedClusterFit::Compress3( void* block )
for( int i = 0; i < 16; ++i ) for( int i = 0; i < 16; ++i )
ordered[m_order[i]] = bestindices[i]; ordered[m_order[i]] = bestindices[i];
m_colours->RemapIndices( ordered, bestindices ); // Set alpha indices.
// save the block // save the block
WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), ordered, block ); WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
// save the error // save the error
m_besterror = besterror; m_besterror = besterror;