Minor optimizations.
Fix DXT1a support.
This commit is contained in:
parent
42da2d63ce
commit
7b433f08f3
@ -43,10 +43,11 @@ void ColourFit::Compress( void* block )
|
||||
bool isDxt1 = ( ( m_flags & kDxt1 ) != 0 );
|
||||
if( isDxt1 )
|
||||
{
|
||||
Compress4( block );
|
||||
Compress3( block );
|
||||
|
||||
if( !m_colours->IsTransparent() )
|
||||
{
|
||||
Compress3( block );
|
||||
Compress4( block );
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -43,16 +43,16 @@ ColourSet::ColourSet( u8 const* rgba, int flags, bool createMinimalSet/*=false*/
|
||||
// create the minimal set
|
||||
for( int i = 0; i < 16; ++i )
|
||||
{
|
||||
// check for transparent pixels when using dxt1
|
||||
if( isDxt1 && rgba[4*i + 3] == 0 )
|
||||
{
|
||||
m_remap[i] = -1;
|
||||
m_transparent = true;
|
||||
if (createMinimalSet) continue;
|
||||
}
|
||||
|
||||
if (createMinimalSet)
|
||||
{
|
||||
// check for transparent pixels when using dxt1
|
||||
if( isDxt1 && rgba[4*i + 3] == 0 )
|
||||
{
|
||||
m_remap[i] = -1;
|
||||
m_transparent = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// loop over previous points for a match
|
||||
for( int j = 0;; ++j )
|
||||
{
|
||||
@ -100,6 +100,17 @@ ColourSet::ColourSet( u8 const* rgba, int flags, bool createMinimalSet/*=false*/
|
||||
}
|
||||
else
|
||||
{
|
||||
// check for transparent pixels when using dxt1
|
||||
if( isDxt1 && rgba[4*i + 3] == 0 )
|
||||
{
|
||||
m_remap[i] = -1;
|
||||
m_transparent = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_remap[i] = m_count;
|
||||
}
|
||||
|
||||
// normalise coordinates to [0,1]
|
||||
float x = ( float )rgba[4*i + 2] / 255.0f;
|
||||
float y = ( float )rgba[4*i + 1] / 255.0f;
|
||||
@ -111,7 +122,6 @@ ColourSet::ColourSet( u8 const* rgba, int flags, bool createMinimalSet/*=false*/
|
||||
// add the point
|
||||
m_points[m_count] = Vec3( x, y, z );
|
||||
m_weights[m_count] = ( weightByAlpha ? w : 1.0f );
|
||||
m_remap[i] = m_count;
|
||||
|
||||
// advance
|
||||
++m_count;
|
||||
|
@ -104,8 +104,8 @@ struct Precomp {
|
||||
float factor;
|
||||
};
|
||||
|
||||
static Precomp s_threeElement[153];
|
||||
static Precomp s_fourElement[969];
|
||||
static SQUISH_ALIGN_16 Precomp s_threeElement[153];
|
||||
static SQUISH_ALIGN_16 Precomp s_fourElement[969];
|
||||
|
||||
void FastClusterFit::DoPrecomputation()
|
||||
{
|
||||
@ -229,11 +229,12 @@ void FastClusterFit::Compress3( void* block )
|
||||
x1 = zero;
|
||||
|
||||
for( int c1 = 0; c1 <= 16-c0; c1++)
|
||||
{
|
||||
Vec4 const alpha2_sum = VEC4_CONST(s_threeElement[i].alpha2_sum);
|
||||
Vec4 const beta2_sum = VEC4_CONST(s_threeElement[i].beta2_sum);
|
||||
Vec4 const alphabeta_sum = VEC4_CONST(s_threeElement[i].alphabeta_sum);
|
||||
Vec4 const factor = VEC4_CONST(s_threeElement[i].factor);
|
||||
{
|
||||
Vec4 const constants = Vec4((const float *)&s_threeElement[i]);
|
||||
Vec4 const alpha2_sum = constants.SplatX();
|
||||
Vec4 const beta2_sum = constants.SplatY();
|
||||
Vec4 const alphabeta_sum = constants.SplatZ();
|
||||
Vec4 const factor = constants.SplatW();
|
||||
i++;
|
||||
|
||||
Vec4 const alphax_sum = MultiplyAdd(half, x1, x0);
|
||||
@ -310,6 +311,8 @@ void FastClusterFit::Compress3( void* block )
|
||||
for( int i = 0; i < 16; ++i )
|
||||
ordered[m_order[i]] = bestindices[i];
|
||||
|
||||
m_colours->RemapIndices( ordered, bestindices ); // Set alpha indices.
|
||||
|
||||
// save the block
|
||||
WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), ordered, block );
|
||||
|
||||
@ -347,10 +350,11 @@ void FastClusterFit::Compress4( void* block )
|
||||
|
||||
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
|
||||
{
|
||||
Vec4 const alpha2_sum = VEC4_CONST(s_fourElement[i].alpha2_sum);
|
||||
Vec4 const beta2_sum = VEC4_CONST(s_fourElement[i].beta2_sum);
|
||||
Vec4 const alphabeta_sum = VEC4_CONST(s_fourElement[i].alphabeta_sum);
|
||||
Vec4 const factor = VEC4_CONST(s_fourElement[i].factor);
|
||||
Vec4 const constants = Vec4((const float *)&s_fourElement[i]);
|
||||
Vec4 const alpha2_sum = constants.SplatX();
|
||||
Vec4 const beta2_sum = constants.SplatY();
|
||||
Vec4 const alphabeta_sum = constants.SplatZ();
|
||||
Vec4 const factor = constants.SplatW();
|
||||
i++;
|
||||
|
||||
Vec4 const alphax_sum = x0 + MultiplyAdd(x1, twothirds, x2 * onethird);
|
||||
|
@ -35,6 +35,12 @@
|
||||
#define SQUISH_SSE_SPLAT( a ) \
|
||||
( ( a ) | ( ( a ) << 2 ) | ( ( a ) << 4 ) | ( ( a ) << 6 ) )
|
||||
|
||||
#ifdef __GNUC__
|
||||
# define SQUISH_ALIGN_16 __attribute__ ((__aligned__ (16)))
|
||||
#else
|
||||
# define SQUISH_ALIGN_16 __declspec(align(16))
|
||||
#endif
|
||||
|
||||
namespace squish {
|
||||
|
||||
#define VEC4_CONST( X ) Vec4( _mm_set1_ps( X ) )
|
||||
@ -55,7 +61,12 @@ public:
|
||||
m_v = arg.m_v;
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
Vec4( const float * v )
|
||||
{
|
||||
m_v = _mm_load_ps( v );
|
||||
}
|
||||
|
||||
Vec4( float x, float y, float z, float w )
|
||||
{
|
||||
m_v = _mm_setr_ps( x, y, z, w );
|
||||
@ -63,11 +74,7 @@ public:
|
||||
|
||||
Vec3 GetVec3() const
|
||||
{
|
||||
#ifdef __GNUC__
|
||||
__attribute__ ((__aligned__ (16))) float c[4];
|
||||
#else
|
||||
__declspec(align(16)) float c[4];
|
||||
#endif
|
||||
SQUISH_ALIGN_16 float c[4];
|
||||
_mm_store_ps( c, m_v );
|
||||
return Vec3( c[0], c[1], c[2] );
|
||||
}
|
||||
|
@ -29,6 +29,8 @@
|
||||
#include <altivec.h>
|
||||
#undef bool
|
||||
|
||||
#define SQUISH_ALIGN_16 __attribute__ ((__aligned__ (16)))
|
||||
|
||||
namespace squish {
|
||||
|
||||
#define VEC4_CONST( X ) Vec4( ( vector float )( X ) )
|
||||
|
@ -231,8 +231,11 @@ void WeightedClusterFit::Compress3( void* block )
|
||||
for( int i = 0; i < 16; ++i )
|
||||
ordered[m_order[i]] = bestindices[i];
|
||||
|
||||
m_colours->RemapIndices( ordered, bestindices ); // Set alpha indices.
|
||||
|
||||
|
||||
// save the block
|
||||
WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), ordered, block );
|
||||
WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
|
||||
|
||||
// save the error
|
||||
m_besterror = besterror;
|
||||
|
Loading…
Reference in New Issue
Block a user