diff --git a/src/nvtt/squish/colourfit.cpp b/src/nvtt/squish/colourfit.cpp index 55e2816..15d8a74 100644 --- a/src/nvtt/squish/colourfit.cpp +++ b/src/nvtt/squish/colourfit.cpp @@ -43,10 +43,11 @@ void ColourFit::Compress( void* block ) bool isDxt1 = ( ( m_flags & kDxt1 ) != 0 ); if( isDxt1 ) { - Compress4( block ); + Compress3( block ); + if( !m_colours->IsTransparent() ) { - Compress3( block ); + Compress4( block ); } } else diff --git a/src/nvtt/squish/colourset.cpp b/src/nvtt/squish/colourset.cpp index ba5213f..82a7571 100644 --- a/src/nvtt/squish/colourset.cpp +++ b/src/nvtt/squish/colourset.cpp @@ -43,16 +43,16 @@ ColourSet::ColourSet( u8 const* rgba, int flags, bool createMinimalSet/*=false*/ // create the minimal set for( int i = 0; i < 16; ++i ) { - // check for transparent pixels when using dxt1 - if( isDxt1 && rgba[4*i + 3] == 0 ) - { - m_remap[i] = -1; - m_transparent = true; - if (createMinimalSet) continue; - } - if (createMinimalSet) { + // check for transparent pixels when using dxt1 + if( isDxt1 && rgba[4*i + 3] == 0 ) + { + m_remap[i] = -1; + m_transparent = true; + continue; + } + // loop over previous points for a match for( int j = 0;; ++j ) { @@ -100,6 +100,17 @@ ColourSet::ColourSet( u8 const* rgba, int flags, bool createMinimalSet/*=false*/ } else { + // check for transparent pixels when using dxt1 + if( isDxt1 && rgba[4*i + 3] == 0 ) + { + m_remap[i] = -1; + m_transparent = true; + } + else + { + m_remap[i] = m_count; + } + // normalise coordinates to [0,1] float x = ( float )rgba[4*i + 2] / 255.0f; float y = ( float )rgba[4*i + 1] / 255.0f; @@ -111,7 +122,6 @@ ColourSet::ColourSet( u8 const* rgba, int flags, bool createMinimalSet/*=false*/ // add the point m_points[m_count] = Vec3( x, y, z ); m_weights[m_count] = ( weightByAlpha ? w : 1.0f ); - m_remap[i] = m_count; // advance ++m_count; diff --git a/src/nvtt/squish/fastclusterfit.cpp b/src/nvtt/squish/fastclusterfit.cpp index 6ef2969..c65cfb2 100644 --- a/src/nvtt/squish/fastclusterfit.cpp +++ b/src/nvtt/squish/fastclusterfit.cpp @@ -104,8 +104,8 @@ struct Precomp { float factor; }; -static Precomp s_threeElement[153]; -static Precomp s_fourElement[969]; +static SQUISH_ALIGN_16 Precomp s_threeElement[153]; +static SQUISH_ALIGN_16 Precomp s_fourElement[969]; void FastClusterFit::DoPrecomputation() { @@ -229,11 +229,12 @@ void FastClusterFit::Compress3( void* block ) x1 = zero; for( int c1 = 0; c1 <= 16-c0; c1++) - { - Vec4 const alpha2_sum = VEC4_CONST(s_threeElement[i].alpha2_sum); - Vec4 const beta2_sum = VEC4_CONST(s_threeElement[i].beta2_sum); - Vec4 const alphabeta_sum = VEC4_CONST(s_threeElement[i].alphabeta_sum); - Vec4 const factor = VEC4_CONST(s_threeElement[i].factor); + { + Vec4 const constants = Vec4((const float *)&s_threeElement[i]); + Vec4 const alpha2_sum = constants.SplatX(); + Vec4 const beta2_sum = constants.SplatY(); + Vec4 const alphabeta_sum = constants.SplatZ(); + Vec4 const factor = constants.SplatW(); i++; Vec4 const alphax_sum = MultiplyAdd(half, x1, x0); @@ -310,6 +311,8 @@ void FastClusterFit::Compress3( void* block ) for( int i = 0; i < 16; ++i ) ordered[m_order[i]] = bestindices[i]; + m_colours->RemapIndices( ordered, bestindices ); // Set alpha indices. + // save the block WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), ordered, block ); @@ -347,10 +350,11 @@ void FastClusterFit::Compress4( void* block ) for( int c2 = 0; c2 <= 16-c0-c1; c2++) { - Vec4 const alpha2_sum = VEC4_CONST(s_fourElement[i].alpha2_sum); - Vec4 const beta2_sum = VEC4_CONST(s_fourElement[i].beta2_sum); - Vec4 const alphabeta_sum = VEC4_CONST(s_fourElement[i].alphabeta_sum); - Vec4 const factor = VEC4_CONST(s_fourElement[i].factor); + Vec4 const constants = Vec4((const float *)&s_fourElement[i]); + Vec4 const alpha2_sum = constants.SplatX(); + Vec4 const beta2_sum = constants.SplatY(); + Vec4 const alphabeta_sum = constants.SplatZ(); + Vec4 const factor = constants.SplatW(); i++; Vec4 const alphax_sum = x0 + MultiplyAdd(x1, twothirds, x2 * onethird); diff --git a/src/nvtt/squish/simd_sse.h b/src/nvtt/squish/simd_sse.h index d241d69..7624f40 100644 --- a/src/nvtt/squish/simd_sse.h +++ b/src/nvtt/squish/simd_sse.h @@ -35,6 +35,12 @@ #define SQUISH_SSE_SPLAT( a ) \ ( ( a ) | ( ( a ) << 2 ) | ( ( a ) << 4 ) | ( ( a ) << 6 ) ) +#ifdef __GNUC__ +# define SQUISH_ALIGN_16 __attribute__ ((__aligned__ (16))) +#else +# define SQUISH_ALIGN_16 __declspec(align(16)) +#endif + namespace squish { #define VEC4_CONST( X ) Vec4( _mm_set1_ps( X ) ) @@ -55,7 +61,12 @@ public: m_v = arg.m_v; return *this; } - + + Vec4( const float * v ) + { + m_v = _mm_load_ps( v ); + } + Vec4( float x, float y, float z, float w ) { m_v = _mm_setr_ps( x, y, z, w ); @@ -63,11 +74,7 @@ public: Vec3 GetVec3() const { -#ifdef __GNUC__ - __attribute__ ((__aligned__ (16))) float c[4]; -#else - __declspec(align(16)) float c[4]; -#endif + SQUISH_ALIGN_16 float c[4]; _mm_store_ps( c, m_v ); return Vec3( c[0], c[1], c[2] ); } diff --git a/src/nvtt/squish/simd_ve.h b/src/nvtt/squish/simd_ve.h index d22b370..cfed6a9 100644 --- a/src/nvtt/squish/simd_ve.h +++ b/src/nvtt/squish/simd_ve.h @@ -29,6 +29,8 @@ #include #undef bool +#define SQUISH_ALIGN_16 __attribute__ ((__aligned__ (16))) + namespace squish { #define VEC4_CONST( X ) Vec4( ( vector float )( X ) ) diff --git a/src/nvtt/squish/weightedclusterfit.cpp b/src/nvtt/squish/weightedclusterfit.cpp index 2ebe078..36186b8 100644 --- a/src/nvtt/squish/weightedclusterfit.cpp +++ b/src/nvtt/squish/weightedclusterfit.cpp @@ -231,8 +231,11 @@ void WeightedClusterFit::Compress3( void* block ) for( int i = 0; i < 16; ++i ) ordered[m_order[i]] = bestindices[i]; + m_colours->RemapIndices( ordered, bestindices ); // Set alpha indices. + + // save the block - WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), ordered, block ); + WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block ); // save the error m_besterror = besterror;