diff --git a/src/nvtt/squish/clusterfit.cpp b/src/nvtt/squish/clusterfit.cpp index 8411478..772dfdb 100644 --- a/src/nvtt/squish/clusterfit.cpp +++ b/src/nvtt/squish/clusterfit.cpp @@ -109,7 +109,7 @@ void ClusterFit::SetMetric(float r, float g, float b) float ClusterFit::GetBestError() const { #if SQUISH_USE_SIMD - return m_besterror.GetVec3().X(); + return m_besterror.GetX(); #else return m_besterror; #endif diff --git a/src/nvtt/squish/simd_sse.h b/src/nvtt/squish/simd_sse.h index aa0e7b7..e94846c 100644 --- a/src/nvtt/squish/simd_sse.h +++ b/src/nvtt/squish/simd_sse.h @@ -72,6 +72,13 @@ public: _mm_store_ps( c, m_v ); return Vec3( c[0], c[1], c[2] ); } + + float GetX() const + { + SQUISH_ALIGN_16 float f; + _mm_store_ss(&f, m_v); + return f; + } Vec4 SplatX() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 0 ) ) ); } Vec4 SplatY() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 1 ) ) ); } diff --git a/src/nvtt/squish/simd_ve.h b/src/nvtt/squish/simd_ve.h index 2be08fa..2a9ab13 100644 --- a/src/nvtt/squish/simd_ve.h +++ b/src/nvtt/squish/simd_ve.h @@ -78,7 +78,14 @@ public: u.v = m_v; return Vec3( u.c[0], u.c[1], u.c[2] ); } - + + float GetX() const + { + union { vector float v; float c[4]; } u; + u.v = m_v; + return u.c[0]; + } + Vec4 SplatX() const { return Vec4( vec_splat( m_v, 0 ) ); } Vec4 SplatY() const { return Vec4( vec_splat( m_v, 1 ) ); } Vec4 SplatZ() const { return Vec4( vec_splat( m_v, 2 ) ); } diff --git a/src/nvtt/squish/weightedclusterfit.cpp b/src/nvtt/squish/weightedclusterfit.cpp index f3781b1..9181249 100644 --- a/src/nvtt/squish/weightedclusterfit.cpp +++ b/src/nvtt/squish/weightedclusterfit.cpp @@ -120,7 +120,7 @@ float WeightedClusterFit::GetBestError() const #if SQUISH_USE_SIMD Vec4 x = m_xxsum * m_metricSqr; Vec4 error = m_besterror + x.SplatX() + x.SplatY() + x.SplatZ(); - return error.GetVec3().X(); + return error.GetX(); #else return m_besterror + Dot(m_xxsum, m_metricSqr); #endif