Minor optimizations.

Fix DXT1a support.
2.0
castano 17 years ago
parent 42da2d63ce
commit 7b433f08f3

@ -43,10 +43,11 @@ void ColourFit::Compress( void* block )
bool isDxt1 = ( ( m_flags & kDxt1 ) != 0 );
if( isDxt1 )
{
Compress4( block );
Compress3( block );
if( !m_colours->IsTransparent() )
{
Compress3( block );
Compress4( block );
}
}
else

@ -43,16 +43,16 @@ ColourSet::ColourSet( u8 const* rgba, int flags, bool createMinimalSet/*=false*/
// create the minimal set
for( int i = 0; i < 16; ++i )
{
// check for transparent pixels when using dxt1
if( isDxt1 && rgba[4*i + 3] == 0 )
{
m_remap[i] = -1;
m_transparent = true;
if (createMinimalSet) continue;
}
if (createMinimalSet)
{
// check for transparent pixels when using dxt1
if( isDxt1 && rgba[4*i + 3] == 0 )
{
m_remap[i] = -1;
m_transparent = true;
continue;
}
// loop over previous points for a match
for( int j = 0;; ++j )
{
@ -100,6 +100,17 @@ ColourSet::ColourSet( u8 const* rgba, int flags, bool createMinimalSet/*=false*/
}
else
{
// check for transparent pixels when using dxt1
if( isDxt1 && rgba[4*i + 3] == 0 )
{
m_remap[i] = -1;
m_transparent = true;
}
else
{
m_remap[i] = m_count;
}
// normalise coordinates to [0,1]
float x = ( float )rgba[4*i + 2] / 255.0f;
float y = ( float )rgba[4*i + 1] / 255.0f;
@ -111,7 +122,6 @@ ColourSet::ColourSet( u8 const* rgba, int flags, bool createMinimalSet/*=false*/
// add the point
m_points[m_count] = Vec3( x, y, z );
m_weights[m_count] = ( weightByAlpha ? w : 1.0f );
m_remap[i] = m_count;
// advance
++m_count;

@ -104,8 +104,8 @@ struct Precomp {
float factor;
};
static Precomp s_threeElement[153];
static Precomp s_fourElement[969];
static SQUISH_ALIGN_16 Precomp s_threeElement[153];
static SQUISH_ALIGN_16 Precomp s_fourElement[969];
void FastClusterFit::DoPrecomputation()
{
@ -229,11 +229,12 @@ void FastClusterFit::Compress3( void* block )
x1 = zero;
for( int c1 = 0; c1 <= 16-c0; c1++)
{
Vec4 const alpha2_sum = VEC4_CONST(s_threeElement[i].alpha2_sum);
Vec4 const beta2_sum = VEC4_CONST(s_threeElement[i].beta2_sum);
Vec4 const alphabeta_sum = VEC4_CONST(s_threeElement[i].alphabeta_sum);
Vec4 const factor = VEC4_CONST(s_threeElement[i].factor);
{
Vec4 const constants = Vec4((const float *)&s_threeElement[i]);
Vec4 const alpha2_sum = constants.SplatX();
Vec4 const beta2_sum = constants.SplatY();
Vec4 const alphabeta_sum = constants.SplatZ();
Vec4 const factor = constants.SplatW();
i++;
Vec4 const alphax_sum = MultiplyAdd(half, x1, x0);
@ -310,6 +311,8 @@ void FastClusterFit::Compress3( void* block )
for( int i = 0; i < 16; ++i )
ordered[m_order[i]] = bestindices[i];
m_colours->RemapIndices( ordered, bestindices ); // Set alpha indices.
// save the block
WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), ordered, block );
@ -347,10 +350,11 @@ void FastClusterFit::Compress4( void* block )
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
{
Vec4 const alpha2_sum = VEC4_CONST(s_fourElement[i].alpha2_sum);
Vec4 const beta2_sum = VEC4_CONST(s_fourElement[i].beta2_sum);
Vec4 const alphabeta_sum = VEC4_CONST(s_fourElement[i].alphabeta_sum);
Vec4 const factor = VEC4_CONST(s_fourElement[i].factor);
Vec4 const constants = Vec4((const float *)&s_fourElement[i]);
Vec4 const alpha2_sum = constants.SplatX();
Vec4 const beta2_sum = constants.SplatY();
Vec4 const alphabeta_sum = constants.SplatZ();
Vec4 const factor = constants.SplatW();
i++;
Vec4 const alphax_sum = x0 + MultiplyAdd(x1, twothirds, x2 * onethird);

@ -35,6 +35,12 @@
#define SQUISH_SSE_SPLAT( a ) \
( ( a ) | ( ( a ) << 2 ) | ( ( a ) << 4 ) | ( ( a ) << 6 ) )
#ifdef __GNUC__
# define SQUISH_ALIGN_16 __attribute__ ((__aligned__ (16)))
#else
# define SQUISH_ALIGN_16 __declspec(align(16))
#endif
namespace squish {
#define VEC4_CONST( X ) Vec4( _mm_set1_ps( X ) )
@ -55,7 +61,12 @@ public:
m_v = arg.m_v;
return *this;
}
Vec4( const float * v )
{
m_v = _mm_load_ps( v );
}
Vec4( float x, float y, float z, float w )
{
m_v = _mm_setr_ps( x, y, z, w );
@ -63,11 +74,7 @@ public:
Vec3 GetVec3() const
{
#ifdef __GNUC__
__attribute__ ((__aligned__ (16))) float c[4];
#else
__declspec(align(16)) float c[4];
#endif
SQUISH_ALIGN_16 float c[4];
_mm_store_ps( c, m_v );
return Vec3( c[0], c[1], c[2] );
}

@ -29,6 +29,8 @@
#include <altivec.h>
#undef bool
#define SQUISH_ALIGN_16 __attribute__ ((__aligned__ (16)))
namespace squish {
#define VEC4_CONST( X ) Vec4( ( vector float )( X ) )

@ -231,8 +231,11 @@ void WeightedClusterFit::Compress3( void* block )
for( int i = 0; i < 16; ++i )
ordered[m_order[i]] = bestindices[i];
m_colours->RemapIndices( ordered, bestindices ); // Set alpha indices.
// save the block
WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), ordered, block );
WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
// save the error
m_besterror = besterror;

Loading…
Cancel
Save