Minor optimizations.
Fix DXT1a support.
This commit is contained in:
parent
42da2d63ce
commit
7b433f08f3
@ -43,10 +43,11 @@ void ColourFit::Compress( void* block )
|
|||||||
bool isDxt1 = ( ( m_flags & kDxt1 ) != 0 );
|
bool isDxt1 = ( ( m_flags & kDxt1 ) != 0 );
|
||||||
if( isDxt1 )
|
if( isDxt1 )
|
||||||
{
|
{
|
||||||
Compress4( block );
|
Compress3( block );
|
||||||
|
|
||||||
if( !m_colours->IsTransparent() )
|
if( !m_colours->IsTransparent() )
|
||||||
{
|
{
|
||||||
Compress3( block );
|
Compress4( block );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -42,17 +42,17 @@ ColourSet::ColourSet( u8 const* rgba, int flags, bool createMinimalSet/*=false*/
|
|||||||
|
|
||||||
// create the minimal set
|
// create the minimal set
|
||||||
for( int i = 0; i < 16; ++i )
|
for( int i = 0; i < 16; ++i )
|
||||||
|
{
|
||||||
|
if (createMinimalSet)
|
||||||
{
|
{
|
||||||
// check for transparent pixels when using dxt1
|
// check for transparent pixels when using dxt1
|
||||||
if( isDxt1 && rgba[4*i + 3] == 0 )
|
if( isDxt1 && rgba[4*i + 3] == 0 )
|
||||||
{
|
{
|
||||||
m_remap[i] = -1;
|
m_remap[i] = -1;
|
||||||
m_transparent = true;
|
m_transparent = true;
|
||||||
if (createMinimalSet) continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (createMinimalSet)
|
|
||||||
{
|
|
||||||
// loop over previous points for a match
|
// loop over previous points for a match
|
||||||
for( int j = 0;; ++j )
|
for( int j = 0;; ++j )
|
||||||
{
|
{
|
||||||
@ -100,6 +100,17 @@ ColourSet::ColourSet( u8 const* rgba, int flags, bool createMinimalSet/*=false*/
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
// check for transparent pixels when using dxt1
|
||||||
|
if( isDxt1 && rgba[4*i + 3] == 0 )
|
||||||
|
{
|
||||||
|
m_remap[i] = -1;
|
||||||
|
m_transparent = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
m_remap[i] = m_count;
|
||||||
|
}
|
||||||
|
|
||||||
// normalise coordinates to [0,1]
|
// normalise coordinates to [0,1]
|
||||||
float x = ( float )rgba[4*i + 2] / 255.0f;
|
float x = ( float )rgba[4*i + 2] / 255.0f;
|
||||||
float y = ( float )rgba[4*i + 1] / 255.0f;
|
float y = ( float )rgba[4*i + 1] / 255.0f;
|
||||||
@ -111,7 +122,6 @@ ColourSet::ColourSet( u8 const* rgba, int flags, bool createMinimalSet/*=false*/
|
|||||||
// add the point
|
// add the point
|
||||||
m_points[m_count] = Vec3( x, y, z );
|
m_points[m_count] = Vec3( x, y, z );
|
||||||
m_weights[m_count] = ( weightByAlpha ? w : 1.0f );
|
m_weights[m_count] = ( weightByAlpha ? w : 1.0f );
|
||||||
m_remap[i] = m_count;
|
|
||||||
|
|
||||||
// advance
|
// advance
|
||||||
++m_count;
|
++m_count;
|
||||||
|
@ -104,8 +104,8 @@ struct Precomp {
|
|||||||
float factor;
|
float factor;
|
||||||
};
|
};
|
||||||
|
|
||||||
static Precomp s_threeElement[153];
|
static SQUISH_ALIGN_16 Precomp s_threeElement[153];
|
||||||
static Precomp s_fourElement[969];
|
static SQUISH_ALIGN_16 Precomp s_fourElement[969];
|
||||||
|
|
||||||
void FastClusterFit::DoPrecomputation()
|
void FastClusterFit::DoPrecomputation()
|
||||||
{
|
{
|
||||||
@ -230,10 +230,11 @@ void FastClusterFit::Compress3( void* block )
|
|||||||
|
|
||||||
for( int c1 = 0; c1 <= 16-c0; c1++)
|
for( int c1 = 0; c1 <= 16-c0; c1++)
|
||||||
{
|
{
|
||||||
Vec4 const alpha2_sum = VEC4_CONST(s_threeElement[i].alpha2_sum);
|
Vec4 const constants = Vec4((const float *)&s_threeElement[i]);
|
||||||
Vec4 const beta2_sum = VEC4_CONST(s_threeElement[i].beta2_sum);
|
Vec4 const alpha2_sum = constants.SplatX();
|
||||||
Vec4 const alphabeta_sum = VEC4_CONST(s_threeElement[i].alphabeta_sum);
|
Vec4 const beta2_sum = constants.SplatY();
|
||||||
Vec4 const factor = VEC4_CONST(s_threeElement[i].factor);
|
Vec4 const alphabeta_sum = constants.SplatZ();
|
||||||
|
Vec4 const factor = constants.SplatW();
|
||||||
i++;
|
i++;
|
||||||
|
|
||||||
Vec4 const alphax_sum = MultiplyAdd(half, x1, x0);
|
Vec4 const alphax_sum = MultiplyAdd(half, x1, x0);
|
||||||
@ -310,6 +311,8 @@ void FastClusterFit::Compress3( void* block )
|
|||||||
for( int i = 0; i < 16; ++i )
|
for( int i = 0; i < 16; ++i )
|
||||||
ordered[m_order[i]] = bestindices[i];
|
ordered[m_order[i]] = bestindices[i];
|
||||||
|
|
||||||
|
m_colours->RemapIndices( ordered, bestindices ); // Set alpha indices.
|
||||||
|
|
||||||
// save the block
|
// save the block
|
||||||
WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), ordered, block );
|
WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), ordered, block );
|
||||||
|
|
||||||
@ -347,10 +350,11 @@ void FastClusterFit::Compress4( void* block )
|
|||||||
|
|
||||||
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
|
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
|
||||||
{
|
{
|
||||||
Vec4 const alpha2_sum = VEC4_CONST(s_fourElement[i].alpha2_sum);
|
Vec4 const constants = Vec4((const float *)&s_fourElement[i]);
|
||||||
Vec4 const beta2_sum = VEC4_CONST(s_fourElement[i].beta2_sum);
|
Vec4 const alpha2_sum = constants.SplatX();
|
||||||
Vec4 const alphabeta_sum = VEC4_CONST(s_fourElement[i].alphabeta_sum);
|
Vec4 const beta2_sum = constants.SplatY();
|
||||||
Vec4 const factor = VEC4_CONST(s_fourElement[i].factor);
|
Vec4 const alphabeta_sum = constants.SplatZ();
|
||||||
|
Vec4 const factor = constants.SplatW();
|
||||||
i++;
|
i++;
|
||||||
|
|
||||||
Vec4 const alphax_sum = x0 + MultiplyAdd(x1, twothirds, x2 * onethird);
|
Vec4 const alphax_sum = x0 + MultiplyAdd(x1, twothirds, x2 * onethird);
|
||||||
|
@ -35,6 +35,12 @@
|
|||||||
#define SQUISH_SSE_SPLAT( a ) \
|
#define SQUISH_SSE_SPLAT( a ) \
|
||||||
( ( a ) | ( ( a ) << 2 ) | ( ( a ) << 4 ) | ( ( a ) << 6 ) )
|
( ( a ) | ( ( a ) << 2 ) | ( ( a ) << 4 ) | ( ( a ) << 6 ) )
|
||||||
|
|
||||||
|
#ifdef __GNUC__
|
||||||
|
# define SQUISH_ALIGN_16 __attribute__ ((__aligned__ (16)))
|
||||||
|
#else
|
||||||
|
# define SQUISH_ALIGN_16 __declspec(align(16))
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace squish {
|
namespace squish {
|
||||||
|
|
||||||
#define VEC4_CONST( X ) Vec4( _mm_set1_ps( X ) )
|
#define VEC4_CONST( X ) Vec4( _mm_set1_ps( X ) )
|
||||||
@ -56,6 +62,11 @@ public:
|
|||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Vec4( const float * v )
|
||||||
|
{
|
||||||
|
m_v = _mm_load_ps( v );
|
||||||
|
}
|
||||||
|
|
||||||
Vec4( float x, float y, float z, float w )
|
Vec4( float x, float y, float z, float w )
|
||||||
{
|
{
|
||||||
m_v = _mm_setr_ps( x, y, z, w );
|
m_v = _mm_setr_ps( x, y, z, w );
|
||||||
@ -63,11 +74,7 @@ public:
|
|||||||
|
|
||||||
Vec3 GetVec3() const
|
Vec3 GetVec3() const
|
||||||
{
|
{
|
||||||
#ifdef __GNUC__
|
SQUISH_ALIGN_16 float c[4];
|
||||||
__attribute__ ((__aligned__ (16))) float c[4];
|
|
||||||
#else
|
|
||||||
__declspec(align(16)) float c[4];
|
|
||||||
#endif
|
|
||||||
_mm_store_ps( c, m_v );
|
_mm_store_ps( c, m_v );
|
||||||
return Vec3( c[0], c[1], c[2] );
|
return Vec3( c[0], c[1], c[2] );
|
||||||
}
|
}
|
||||||
|
@ -29,6 +29,8 @@
|
|||||||
#include <altivec.h>
|
#include <altivec.h>
|
||||||
#undef bool
|
#undef bool
|
||||||
|
|
||||||
|
#define SQUISH_ALIGN_16 __attribute__ ((__aligned__ (16)))
|
||||||
|
|
||||||
namespace squish {
|
namespace squish {
|
||||||
|
|
||||||
#define VEC4_CONST( X ) Vec4( ( vector float )( X ) )
|
#define VEC4_CONST( X ) Vec4( ( vector float )( X ) )
|
||||||
|
@ -231,8 +231,11 @@ void WeightedClusterFit::Compress3( void* block )
|
|||||||
for( int i = 0; i < 16; ++i )
|
for( int i = 0; i < 16; ++i )
|
||||||
ordered[m_order[i]] = bestindices[i];
|
ordered[m_order[i]] = bestindices[i];
|
||||||
|
|
||||||
|
m_colours->RemapIndices( ordered, bestindices ); // Set alpha indices.
|
||||||
|
|
||||||
|
|
||||||
// save the block
|
// save the block
|
||||||
WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), ordered, block );
|
WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
|
||||||
|
|
||||||
// save the error
|
// save the error
|
||||||
m_besterror = besterror;
|
m_besterror = besterror;
|
||||||
|
Loading…
Reference in New Issue
Block a user