From 8922ab86d6f9d9b02b82ef13aebefdeb25c923fb Mon Sep 17 00:00:00 2001 From: castano Date: Thu, 12 Jan 2012 16:54:10 +0000 Subject: [PATCH] Fix msvc build. Fixes issue 171. Improve sse perf. Fix rmse computation in nvimgdiff. --- project/vc9/nvtt/nvtt.vcproj | 2 +- src/nvmath/SimdVector.h | 22 ++++++------- src/nvmath/SimdVector_SSE.h | 64 ++++++++++++++++-------------------- src/nvmath/nvmath.h | 5 +++ src/nvtt/ClusterFit.cpp | 4 +-- src/nvtt/ClusterFit.h | 6 ++-- src/nvtt/CompressorDX9.cpp | 2 +- src/nvtt/CompressorDX9.h | 2 +- src/nvtt/tools/imgdiff.cpp | 22 +++++-------- 9 files changed, 60 insertions(+), 69 deletions(-) diff --git a/project/vc9/nvtt/nvtt.vcproj b/project/vc9/nvtt/nvtt.vcproj index eab12ce..b4db5c4 100644 --- a/project/vc9/nvtt/nvtt.vcproj +++ b/project/vc9/nvtt/nvtt.vcproj @@ -481,7 +481,7 @@ Name="VCLinkerTool" OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).dll" LinkIncremental="2" - AdditionalLibraryDirectories=""$(CUDA_LIB_PATH)\..\lib64"" + AdditionalLibraryDirectories="" GenerateDebugInformation="true" SubSystem="2" RandomizedBaseAddress="1" diff --git a/src/nvmath/SimdVector.h b/src/nvmath/SimdVector.h index 42274c0..fa61391 100644 --- a/src/nvmath/SimdVector.h +++ b/src/nvmath/SimdVector.h @@ -12,19 +12,19 @@ # if NV_CPU_X86 || NV_CPU_X86_64 # define NV_USE_SSE 2 # endif -//# if defined(__SSE2__) -//# define NV_USE_SSE 2 -//# elif defined(__SSE__) -//# define NV_USE_SSE 1 -//# else -//# define NV_USE_SSE 0 -//# endif +//# if defined(__SSE2__) +//# define NV_USE_SSE 2 +//# elif defined(__SSE__) +//# define NV_USE_SSE 1 +//# else +//# define NV_USE_SSE 0 +//# endif #endif -// Internally set NV_USE_SIMD when either altivec or sse is available. -#if NV_USE_ALTIVEC && NV_USE_SSE -# error "Cannot enable both altivec and sse!" -#endif +// Internally set NV_USE_SIMD when either altivec or sse is available. +#if NV_USE_ALTIVEC && NV_USE_SSE +# error "Cannot enable both altivec and sse!" +#endif #if NV_USE_ALTIVEC # include "SimdVector_VE.h" diff --git a/src/nvmath/SimdVector_SSE.h b/src/nvmath/SimdVector_SSE.h index 495d4ae..a8d52d9 100644 --- a/src/nvmath/SimdVector_SSE.h +++ b/src/nvmath/SimdVector_SSE.h @@ -42,35 +42,30 @@ namespace nv { typedef SimdVector const& Arg; - SimdVector() {} - explicit SimdVector(float f) : vec(_mm_set1_ps(f)) {} - explicit SimdVector(__m128 v) : vec(v) {} + NV_FORCEINLINE SimdVector() {} + NV_FORCEINLINE explicit SimdVector(float f) : vec(_mm_set1_ps(f)) {} + NV_FORCEINLINE explicit SimdVector(__m128 v) : vec(v) {} - /*explicit SimdVector(const Vector4 & v) - { - vec = _mm_load_ps( v.components ); - }*/ + NV_FORCEINLINE explicit SimdVector(NV_ALIGN_16 Vector4 v) { + vec = _mm_load_ps( v.component ); + } - explicit SimdVector(const float * v) - { + NV_FORCEINLINE explicit SimdVector(const float * v) { vec = _mm_load_ps( v ); } - SimdVector(float x, float y, float z, float w) - { + NV_FORCEINLINE SimdVector(float x, float y, float z, float w) { vec = _mm_setr_ps( x, y, z, w ); } - SimdVector(const SimdVector & arg) : vec(arg.vec) {} + NV_FORCEINLINE SimdVector(const SimdVector & arg) : vec(arg.vec) {} - SimdVector & operator=(const SimdVector & arg) - { + NV_FORCEINLINE SimdVector & operator=(const SimdVector & arg) { vec = arg.vec; return *this; } - float toFloat() const { NV_ALIGN_16 float f; @@ -93,55 +88,52 @@ namespace nv { } #define SSE_SPLAT( a ) ((a) | ((a) << 2) | ((a) << 4) | ((a) << 6)) - SimdVector splatX() const { return SimdVector( _mm_shuffle_ps( vec, vec, SSE_SPLAT( 0 ) ) ); } - SimdVector splatY() const { return SimdVector( _mm_shuffle_ps( vec, vec, SSE_SPLAT( 1 ) ) ); } - SimdVector splatZ() const { return SimdVector( _mm_shuffle_ps( vec, vec, SSE_SPLAT( 2 ) ) ); } - SimdVector splatW() const { return SimdVector( _mm_shuffle_ps( vec, vec, SSE_SPLAT( 3 ) ) ); } + NV_FORCEINLINE SimdVector splatX() const { return SimdVector( _mm_shuffle_ps( vec, vec, SSE_SPLAT( 0 ) ) ); } + NV_FORCEINLINE SimdVector splatY() const { return SimdVector( _mm_shuffle_ps( vec, vec, SSE_SPLAT( 1 ) ) ); } + NV_FORCEINLINE SimdVector splatZ() const { return SimdVector( _mm_shuffle_ps( vec, vec, SSE_SPLAT( 2 ) ) ); } + NV_FORCEINLINE SimdVector splatW() const { return SimdVector( _mm_shuffle_ps( vec, vec, SSE_SPLAT( 3 ) ) ); } #undef SSE_SPLAT - SimdVector& operator+=( Arg v ) - { + NV_FORCEINLINE SimdVector & operator+=( Arg v ) { vec = _mm_add_ps( vec, v.vec ); return *this; } - SimdVector& operator-=( Arg v ) - { + NV_FORCEINLINE SimdVector & operator-=( Arg v ) { vec = _mm_sub_ps( vec, v.vec ); return *this; } - SimdVector& operator*=( Arg v ) - { + NV_FORCEINLINE SimdVector & operator*=( Arg v ) { vec = _mm_mul_ps( vec, v.vec ); return *this; } }; - inline SimdVector operator+( SimdVector::Arg left, SimdVector::Arg right ) + NV_FORCEINLINE SimdVector operator+(SimdVector::Arg left, SimdVector::Arg right) { return SimdVector( _mm_add_ps( left.vec, right.vec ) ); } - inline SimdVector operator-( SimdVector::Arg left, SimdVector::Arg right ) + NV_FORCEINLINE SimdVector operator-(SimdVector::Arg left, SimdVector::Arg right) { return SimdVector( _mm_sub_ps( left.vec, right.vec ) ); } - inline SimdVector operator*( SimdVector::Arg left, SimdVector::Arg right ) + NV_FORCEINLINE SimdVector operator*(SimdVector::Arg left, SimdVector::Arg right) { return SimdVector( _mm_mul_ps( left.vec, right.vec ) ); } // Returns a*b + c - inline SimdVector multiplyAdd( SimdVector::Arg a, SimdVector::Arg b, SimdVector::Arg c ) + NV_FORCEINLINE SimdVector multiplyAdd(SimdVector::Arg a, SimdVector::Arg b, SimdVector::Arg c) { return SimdVector( _mm_add_ps( _mm_mul_ps( a.vec, b.vec ), c.vec ) ); } - // Returns -( a*b - c ) - inline SimdVector negativeMultiplySubtract( SimdVector::Arg a, SimdVector::Arg b, SimdVector::Arg c ) + // Returns -( a*b - c ) = c - a*b + NV_FORCEINLINE SimdVector negativeMultiplySubtract(SimdVector::Arg a, SimdVector::Arg b, SimdVector::Arg c) { return SimdVector( _mm_sub_ps( c.vec, _mm_mul_ps( a.vec, b.vec ) ) ); } @@ -156,12 +148,12 @@ namespace nv { return SimdVector( _mm_add_ps( _mm_mul_ps( diff, estimate ), estimate ) ); } - inline SimdVector min( SimdVector::Arg left, SimdVector::Arg right ) + NV_FORCEINLINE SimdVector min(SimdVector::Arg left, SimdVector::Arg right) { return SimdVector( _mm_min_ps( left.vec, right.vec ) ); } - inline SimdVector max( SimdVector::Arg left, SimdVector::Arg right ) + NV_FORCEINLINE SimdVector max(SimdVector::Arg left, SimdVector::Arg right) { return SimdVector( _mm_max_ps( left.vec, right.vec ) ); } @@ -187,12 +179,12 @@ namespace nv { #endif } - inline SimdVector compareEqual( SimdVector::Arg left, SimdVector::Arg right ) + NV_FORCEINLINE SimdVector compareEqual(SimdVector::Arg left, SimdVector::Arg right) { return SimdVector( _mm_cmpeq_ps( left.vec, right.vec ) ); } - inline SimdVector select( SimdVector::Arg off, SimdVector::Arg on, SimdVector::Arg bits ) + inline SimdVector select(SimdVector::Arg off, SimdVector::Arg on, SimdVector::Arg bits) { __m128 a = _mm_andnot_ps( bits.vec, off.vec ); __m128 b = _mm_and_ps( bits.vec, on.vec ); @@ -200,7 +192,7 @@ namespace nv { return SimdVector( _mm_or_ps( a, b ) ); } - inline bool compareAnyLessThan( SimdVector::Arg left, SimdVector::Arg right ) + inline bool compareAnyLessThan(SimdVector::Arg left, SimdVector::Arg right) { __m128 bits = _mm_cmplt_ps( left.vec, right.vec ); int value = _mm_movemask_ps( bits ); diff --git a/src/nvmath/nvmath.h b/src/nvmath/nvmath.h index f486743..20c64d9 100644 --- a/src/nvmath/nvmath.h +++ b/src/nvmath/nvmath.h @@ -160,6 +160,11 @@ namespace nv nvCheck(x >= 0); return logf(x) / logf(2.0f); } + + inline float exp2f(float x) + { + return powf(2, x); + } #endif inline float lerp(float f0, float f1, float t) diff --git a/src/nvtt/ClusterFit.cpp b/src/nvtt/ClusterFit.cpp index 6717a84..7a072c5 100644 --- a/src/nvtt/ClusterFit.cpp +++ b/src/nvtt/ClusterFit.cpp @@ -93,7 +93,7 @@ void ClusterFit::setColourSet(const ColorSet * set) int p = order[i]; #if NVTT_USE_SIMD Vector4 tmp(values[p] * set->weights[p], set->weights[p]); - m_weighted[i] = SimdVector(tmp.component); + m_weighted[i] = SimdVector(tmp); m_xxsum += m_weighted[i] * m_weighted[i]; m_xsum += m_weighted[i]; #else @@ -111,7 +111,7 @@ void ClusterFit::setMetric(Vector4::Arg w) { #if NVTT_USE_SIMD Vector4 tmp(w.xyz(), 1); - m_metric = SimdVector(tmp.component); + m_metric = SimdVector(tmp); #else m_metric = w.xyz(); #endif diff --git a/src/nvtt/ClusterFit.h b/src/nvtt/ClusterFit.h index e8c011c..49a3ec4 100644 --- a/src/nvtt/ClusterFit.h +++ b/src/nvtt/ClusterFit.h @@ -31,8 +31,8 @@ #include "nvmath/Vector.h" // Use SIMD version if altivec or SSE are available. -//#define NVTT_USE_SIMD (NV_USE_ALTIVEC || NV_USE_SSE) -#define NVTT_USE_SIMD 0 +#define NVTT_USE_SIMD (NV_USE_ALTIVEC || NV_USE_SSE) +//#define NVTT_USE_SIMD 0 namespace nv { @@ -56,7 +56,7 @@ namespace nv { uint m_count; #if NVTT_USE_SIMD - SimdVector m_weighted[16]; // color | weight + NV_ALIGN_16 SimdVector m_weighted[16]; // color | weight SimdVector m_metric; // vec3 SimdVector m_metricSqr; // vec3 SimdVector m_xxsum; // color | weight diff --git a/src/nvtt/CompressorDX9.cpp b/src/nvtt/CompressorDX9.cpp index 479d2af..e9bf91c 100644 --- a/src/nvtt/CompressorDX9.cpp +++ b/src/nvtt/CompressorDX9.cpp @@ -111,7 +111,7 @@ void FastCompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alpha QuickCompress::compressDXT5(rgba, block); } -#if 1 +#if 0 void NormalCompressorDXT1::compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) { set.setUniformWeights(); diff --git a/src/nvtt/CompressorDX9.h b/src/nvtt/CompressorDX9.h index 4ff8c10..9f81e14 100644 --- a/src/nvtt/CompressorDX9.h +++ b/src/nvtt/CompressorDX9.h @@ -64,7 +64,7 @@ namespace nv // Normal CPU compressors. -#if 1 +#if 0 struct NormalCompressorDXT1 : public ColorSetCompressor { virtual void compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); diff --git a/src/nvtt/tools/imgdiff.cpp b/src/nvtt/tools/imgdiff.cpp index 3e8ea1e..3f3aeb3 100644 --- a/src/nvtt/tools/imgdiff.cpp +++ b/src/nvtt/tools/imgdiff.cpp @@ -61,7 +61,6 @@ static bool loadImage(nv::Image & image, const char * fileName) return true; } -// @@ Compute per-tile errors. struct Error { Error() @@ -72,6 +71,7 @@ struct Error mse = 0.0f; } + // @@ This has poor precision... void addSample(double e) { samples++; @@ -240,23 +240,17 @@ int main(int argc, char *argv[]) error_b.addSample(b); error_a.addSample(a); - if (compareNormal) - { + if (compareNormal) { error_normal.addSample(c0, c1); } - if (compareAlpha) - { - error_total.addSample(r * c0.a / 255.0); - error_total.addSample(g * c0.a / 255.0); - error_total.addSample(b * c0.a / 255.0); - } - else - { - error_total.addSample(r); - error_total.addSample(g); - error_total.addSample(b); + double d = sqrt(r*r + g*g + b*b); + + if (compareAlpha) { + d *= c0.a / 255.0; } + + error_total.addSample(d); } }