Fix msvc build. Fixes issue 171.

Improve sse perf.
Fix rmse computation in nvimgdiff.
pull/216/head
castano 13 years ago
parent 6d843c78cf
commit 8922ab86d6

@ -481,7 +481,7 @@
Name="VCLinkerTool" Name="VCLinkerTool"
OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).dll" OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).dll"
LinkIncremental="2" LinkIncremental="2"
AdditionalLibraryDirectories=""$(CUDA_LIB_PATH)\..\lib64"" AdditionalLibraryDirectories=""
GenerateDebugInformation="true" GenerateDebugInformation="true"
SubSystem="2" SubSystem="2"
RandomizedBaseAddress="1" RandomizedBaseAddress="1"

@ -12,19 +12,19 @@
# if NV_CPU_X86 || NV_CPU_X86_64 # if NV_CPU_X86 || NV_CPU_X86_64
# define NV_USE_SSE 2 # define NV_USE_SSE 2
# endif # endif
//# if defined(__SSE2__) //# if defined(__SSE2__)
//# define NV_USE_SSE 2 //# define NV_USE_SSE 2
//# elif defined(__SSE__) //# elif defined(__SSE__)
//# define NV_USE_SSE 1 //# define NV_USE_SSE 1
//# else //# else
//# define NV_USE_SSE 0 //# define NV_USE_SSE 0
//# endif //# endif
#endif #endif
// Internally set NV_USE_SIMD when either altivec or sse is available. // Internally set NV_USE_SIMD when either altivec or sse is available.
#if NV_USE_ALTIVEC && NV_USE_SSE #if NV_USE_ALTIVEC && NV_USE_SSE
# error "Cannot enable both altivec and sse!" # error "Cannot enable both altivec and sse!"
#endif #endif
#if NV_USE_ALTIVEC #if NV_USE_ALTIVEC
# include "SimdVector_VE.h" # include "SimdVector_VE.h"

@ -42,35 +42,30 @@ namespace nv {
typedef SimdVector const& Arg; typedef SimdVector const& Arg;
SimdVector() {} NV_FORCEINLINE SimdVector() {}
explicit SimdVector(float f) : vec(_mm_set1_ps(f)) {} NV_FORCEINLINE explicit SimdVector(float f) : vec(_mm_set1_ps(f)) {}
explicit SimdVector(__m128 v) : vec(v) {} NV_FORCEINLINE explicit SimdVector(__m128 v) : vec(v) {}
/*explicit SimdVector(const Vector4 & v) NV_FORCEINLINE explicit SimdVector(NV_ALIGN_16 Vector4 v) {
{ vec = _mm_load_ps( v.component );
vec = _mm_load_ps( v.components ); }
}*/
explicit SimdVector(const float * v) NV_FORCEINLINE explicit SimdVector(const float * v) {
{
vec = _mm_load_ps( v ); vec = _mm_load_ps( v );
} }
SimdVector(float x, float y, float z, float w) NV_FORCEINLINE SimdVector(float x, float y, float z, float w) {
{
vec = _mm_setr_ps( x, y, z, w ); vec = _mm_setr_ps( x, y, z, w );
} }
SimdVector(const SimdVector & arg) : vec(arg.vec) {} NV_FORCEINLINE SimdVector(const SimdVector & arg) : vec(arg.vec) {}
SimdVector & operator=(const SimdVector & arg) NV_FORCEINLINE SimdVector & operator=(const SimdVector & arg) {
{
vec = arg.vec; vec = arg.vec;
return *this; return *this;
} }
float toFloat() const float toFloat() const
{ {
NV_ALIGN_16 float f; NV_ALIGN_16 float f;
@ -93,55 +88,52 @@ namespace nv {
} }
#define SSE_SPLAT( a ) ((a) | ((a) << 2) | ((a) << 4) | ((a) << 6)) #define SSE_SPLAT( a ) ((a) | ((a) << 2) | ((a) << 4) | ((a) << 6))
SimdVector splatX() const { return SimdVector( _mm_shuffle_ps( vec, vec, SSE_SPLAT( 0 ) ) ); } NV_FORCEINLINE SimdVector splatX() const { return SimdVector( _mm_shuffle_ps( vec, vec, SSE_SPLAT( 0 ) ) ); }
SimdVector splatY() const { return SimdVector( _mm_shuffle_ps( vec, vec, SSE_SPLAT( 1 ) ) ); } NV_FORCEINLINE SimdVector splatY() const { return SimdVector( _mm_shuffle_ps( vec, vec, SSE_SPLAT( 1 ) ) ); }
SimdVector splatZ() const { return SimdVector( _mm_shuffle_ps( vec, vec, SSE_SPLAT( 2 ) ) ); } NV_FORCEINLINE SimdVector splatZ() const { return SimdVector( _mm_shuffle_ps( vec, vec, SSE_SPLAT( 2 ) ) ); }
SimdVector splatW() const { return SimdVector( _mm_shuffle_ps( vec, vec, SSE_SPLAT( 3 ) ) ); } NV_FORCEINLINE SimdVector splatW() const { return SimdVector( _mm_shuffle_ps( vec, vec, SSE_SPLAT( 3 ) ) ); }
#undef SSE_SPLAT #undef SSE_SPLAT
SimdVector& operator+=( Arg v ) NV_FORCEINLINE SimdVector & operator+=( Arg v ) {
{
vec = _mm_add_ps( vec, v.vec ); vec = _mm_add_ps( vec, v.vec );
return *this; return *this;
} }
SimdVector& operator-=( Arg v ) NV_FORCEINLINE SimdVector & operator-=( Arg v ) {
{
vec = _mm_sub_ps( vec, v.vec ); vec = _mm_sub_ps( vec, v.vec );
return *this; return *this;
} }
SimdVector& operator*=( Arg v ) NV_FORCEINLINE SimdVector & operator*=( Arg v ) {
{
vec = _mm_mul_ps( vec, v.vec ); vec = _mm_mul_ps( vec, v.vec );
return *this; return *this;
} }
}; };
inline SimdVector operator+( SimdVector::Arg left, SimdVector::Arg right ) NV_FORCEINLINE SimdVector operator+(SimdVector::Arg left, SimdVector::Arg right)
{ {
return SimdVector( _mm_add_ps( left.vec, right.vec ) ); return SimdVector( _mm_add_ps( left.vec, right.vec ) );
} }
inline SimdVector operator-( SimdVector::Arg left, SimdVector::Arg right ) NV_FORCEINLINE SimdVector operator-(SimdVector::Arg left, SimdVector::Arg right)
{ {
return SimdVector( _mm_sub_ps( left.vec, right.vec ) ); return SimdVector( _mm_sub_ps( left.vec, right.vec ) );
} }
inline SimdVector operator*( SimdVector::Arg left, SimdVector::Arg right ) NV_FORCEINLINE SimdVector operator*(SimdVector::Arg left, SimdVector::Arg right)
{ {
return SimdVector( _mm_mul_ps( left.vec, right.vec ) ); return SimdVector( _mm_mul_ps( left.vec, right.vec ) );
} }
// Returns a*b + c // Returns a*b + c
inline SimdVector multiplyAdd( SimdVector::Arg a, SimdVector::Arg b, SimdVector::Arg c ) NV_FORCEINLINE SimdVector multiplyAdd(SimdVector::Arg a, SimdVector::Arg b, SimdVector::Arg c)
{ {
return SimdVector( _mm_add_ps( _mm_mul_ps( a.vec, b.vec ), c.vec ) ); return SimdVector( _mm_add_ps( _mm_mul_ps( a.vec, b.vec ), c.vec ) );
} }
// Returns -( a*b - c ) // Returns -( a*b - c ) = c - a*b
inline SimdVector negativeMultiplySubtract( SimdVector::Arg a, SimdVector::Arg b, SimdVector::Arg c ) NV_FORCEINLINE SimdVector negativeMultiplySubtract(SimdVector::Arg a, SimdVector::Arg b, SimdVector::Arg c)
{ {
return SimdVector( _mm_sub_ps( c.vec, _mm_mul_ps( a.vec, b.vec ) ) ); return SimdVector( _mm_sub_ps( c.vec, _mm_mul_ps( a.vec, b.vec ) ) );
} }
@ -156,12 +148,12 @@ namespace nv {
return SimdVector( _mm_add_ps( _mm_mul_ps( diff, estimate ), estimate ) ); return SimdVector( _mm_add_ps( _mm_mul_ps( diff, estimate ), estimate ) );
} }
inline SimdVector min( SimdVector::Arg left, SimdVector::Arg right ) NV_FORCEINLINE SimdVector min(SimdVector::Arg left, SimdVector::Arg right)
{ {
return SimdVector( _mm_min_ps( left.vec, right.vec ) ); return SimdVector( _mm_min_ps( left.vec, right.vec ) );
} }
inline SimdVector max( SimdVector::Arg left, SimdVector::Arg right ) NV_FORCEINLINE SimdVector max(SimdVector::Arg left, SimdVector::Arg right)
{ {
return SimdVector( _mm_max_ps( left.vec, right.vec ) ); return SimdVector( _mm_max_ps( left.vec, right.vec ) );
} }
@ -187,12 +179,12 @@ namespace nv {
#endif #endif
} }
inline SimdVector compareEqual( SimdVector::Arg left, SimdVector::Arg right ) NV_FORCEINLINE SimdVector compareEqual(SimdVector::Arg left, SimdVector::Arg right)
{ {
return SimdVector( _mm_cmpeq_ps( left.vec, right.vec ) ); return SimdVector( _mm_cmpeq_ps( left.vec, right.vec ) );
} }
inline SimdVector select( SimdVector::Arg off, SimdVector::Arg on, SimdVector::Arg bits ) inline SimdVector select(SimdVector::Arg off, SimdVector::Arg on, SimdVector::Arg bits)
{ {
__m128 a = _mm_andnot_ps( bits.vec, off.vec ); __m128 a = _mm_andnot_ps( bits.vec, off.vec );
__m128 b = _mm_and_ps( bits.vec, on.vec ); __m128 b = _mm_and_ps( bits.vec, on.vec );
@ -200,7 +192,7 @@ namespace nv {
return SimdVector( _mm_or_ps( a, b ) ); return SimdVector( _mm_or_ps( a, b ) );
} }
inline bool compareAnyLessThan( SimdVector::Arg left, SimdVector::Arg right ) inline bool compareAnyLessThan(SimdVector::Arg left, SimdVector::Arg right)
{ {
__m128 bits = _mm_cmplt_ps( left.vec, right.vec ); __m128 bits = _mm_cmplt_ps( left.vec, right.vec );
int value = _mm_movemask_ps( bits ); int value = _mm_movemask_ps( bits );

@ -160,6 +160,11 @@ namespace nv
nvCheck(x >= 0); nvCheck(x >= 0);
return logf(x) / logf(2.0f); return logf(x) / logf(2.0f);
} }
inline float exp2f(float x)
{
return powf(2, x);
}
#endif #endif
inline float lerp(float f0, float f1, float t) inline float lerp(float f0, float f1, float t)

@ -93,7 +93,7 @@ void ClusterFit::setColourSet(const ColorSet * set)
int p = order[i]; int p = order[i];
#if NVTT_USE_SIMD #if NVTT_USE_SIMD
Vector4 tmp(values[p] * set->weights[p], set->weights[p]); Vector4 tmp(values[p] * set->weights[p], set->weights[p]);
m_weighted[i] = SimdVector(tmp.component); m_weighted[i] = SimdVector(tmp);
m_xxsum += m_weighted[i] * m_weighted[i]; m_xxsum += m_weighted[i] * m_weighted[i];
m_xsum += m_weighted[i]; m_xsum += m_weighted[i];
#else #else
@ -111,7 +111,7 @@ void ClusterFit::setMetric(Vector4::Arg w)
{ {
#if NVTT_USE_SIMD #if NVTT_USE_SIMD
Vector4 tmp(w.xyz(), 1); Vector4 tmp(w.xyz(), 1);
m_metric = SimdVector(tmp.component); m_metric = SimdVector(tmp);
#else #else
m_metric = w.xyz(); m_metric = w.xyz();
#endif #endif

@ -31,8 +31,8 @@
#include "nvmath/Vector.h" #include "nvmath/Vector.h"
// Use SIMD version if altivec or SSE are available. // Use SIMD version if altivec or SSE are available.
//#define NVTT_USE_SIMD (NV_USE_ALTIVEC || NV_USE_SSE) #define NVTT_USE_SIMD (NV_USE_ALTIVEC || NV_USE_SSE)
#define NVTT_USE_SIMD 0 //#define NVTT_USE_SIMD 0
namespace nv { namespace nv {
@ -56,7 +56,7 @@ namespace nv {
uint m_count; uint m_count;
#if NVTT_USE_SIMD #if NVTT_USE_SIMD
SimdVector m_weighted[16]; // color | weight NV_ALIGN_16 SimdVector m_weighted[16]; // color | weight
SimdVector m_metric; // vec3 SimdVector m_metric; // vec3
SimdVector m_metricSqr; // vec3 SimdVector m_metricSqr; // vec3
SimdVector m_xxsum; // color | weight SimdVector m_xxsum; // color | weight

@ -111,7 +111,7 @@ void FastCompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alpha
QuickCompress::compressDXT5(rgba, block); QuickCompress::compressDXT5(rgba, block);
} }
#if 1 #if 0
void NormalCompressorDXT1::compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) void NormalCompressorDXT1::compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{ {
set.setUniformWeights(); set.setUniformWeights();

@ -64,7 +64,7 @@ namespace nv
// Normal CPU compressors. // Normal CPU compressors.
#if 1 #if 0
struct NormalCompressorDXT1 : public ColorSetCompressor struct NormalCompressorDXT1 : public ColorSetCompressor
{ {
virtual void compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); virtual void compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);

@ -61,7 +61,6 @@ static bool loadImage(nv::Image & image, const char * fileName)
return true; return true;
} }
// @@ Compute per-tile errors.
struct Error struct Error
{ {
Error() Error()
@ -72,6 +71,7 @@ struct Error
mse = 0.0f; mse = 0.0f;
} }
// @@ This has poor precision...
void addSample(double e) void addSample(double e)
{ {
samples++; samples++;
@ -240,23 +240,17 @@ int main(int argc, char *argv[])
error_b.addSample(b); error_b.addSample(b);
error_a.addSample(a); error_a.addSample(a);
if (compareNormal) if (compareNormal) {
{
error_normal.addSample(c0, c1); error_normal.addSample(c0, c1);
} }
if (compareAlpha) double d = sqrt(r*r + g*g + b*b);
{
error_total.addSample(r * c0.a / 255.0); if (compareAlpha) {
error_total.addSample(g * c0.a / 255.0); d *= c0.a / 255.0;
error_total.addSample(b * c0.a / 255.0);
}
else
{
error_total.addSample(r);
error_total.addSample(g);
error_total.addSample(b);
} }
error_total.addSample(d);
} }
} }

Loading…
Cancel
Save