From 23bfc1b514dce8bad56cb3eac8fc965abba5c61f Mon Sep 17 00:00:00 2001 From: "castano@gmail.com" Date: Sun, 1 Jan 2012 21:29:27 +0000 Subject: [PATCH] Fix errors and enable new cluster fit compressor. --- project/vc9/nvconfig.h | 1 + project/vc9/nvimage/nvimage.vcproj | 8 ++-- project/vc9/squish/squish.vcproj | 4 ++ src/nvmath/Fitting.cpp | 2 +- src/nvmath/SimdVector.h | 13 +++++- src/nvmath/SimdVector_SSE.h | 6 +-- src/nvtt/ClusterFit.cpp | 71 ++++++++++++++---------------- src/nvtt/ClusterFit.h | 6 ++- src/nvtt/CompressorDX9.cpp | 2 +- src/nvtt/CompressorDX9.h | 2 +- src/nvtt/QuickCompressDXT.cpp | 19 ++++---- 11 files changed, 76 insertions(+), 58 deletions(-) diff --git a/project/vc9/nvconfig.h b/project/vc9/nvconfig.h index 0702786..b18fcc8 100644 --- a/project/vc9/nvconfig.h +++ b/project/vc9/nvconfig.h @@ -17,5 +17,6 @@ #define HAVE_JPEG #define HAVE_TIFF #endif*/ +#define HAVE_STBIMAGE #endif // NV_CONFIG diff --git a/project/vc9/nvimage/nvimage.vcproj b/project/vc9/nvimage/nvimage.vcproj index 314783f..1676b05 100644 --- a/project/vc9/nvimage/nvimage.vcproj +++ b/project/vc9/nvimage/nvimage.vcproj @@ -45,7 +45,7 @@ + + diff --git a/src/nvmath/Fitting.cpp b/src/nvmath/Fitting.cpp index 98172b4..57c755a 100644 --- a/src/nvmath/Fitting.cpp +++ b/src/nvmath/Fitting.cpp @@ -50,7 +50,7 @@ static inline Vector3 firstEigenVector_PowerMethod(const float *__restrict matri v = Vector3(x, y, z) / norm; } - return v; + return v; } diff --git a/src/nvmath/SimdVector.h b/src/nvmath/SimdVector.h index fa164f3..42274c0 100644 --- a/src/nvmath/SimdVector.h +++ b/src/nvmath/SimdVector.h @@ -2,18 +2,29 @@ #include "Vector.h" // Vector3, Vector4 - // Set some reasonable defaults. #ifndef NV_USE_ALTIVEC # define NV_USE_ALTIVEC NV_CPU_PPC +//# define NV_USE_ALTIVEC defined(__VEC__) #endif #ifndef NV_USE_SSE # if NV_CPU_X86 || NV_CPU_X86_64 # define NV_USE_SSE 2 # endif +//# if defined(__SSE2__) +//# define NV_USE_SSE 2 +//# elif defined(__SSE__) +//# define NV_USE_SSE 1 +//# else +//# define NV_USE_SSE 0 +//# endif #endif +// Internally set NV_USE_SIMD when either altivec or sse is available. +#if NV_USE_ALTIVEC && NV_USE_SSE +# error "Cannot enable both altivec and sse!" +#endif #if NV_USE_ALTIVEC # include "SimdVector_VE.h" diff --git a/src/nvmath/SimdVector_SSE.h b/src/nvmath/SimdVector_SSE.h index 2b8271b..495d4ae 100644 --- a/src/nvmath/SimdVector_SSE.h +++ b/src/nvmath/SimdVector_SSE.h @@ -46,10 +46,10 @@ namespace nv { explicit SimdVector(float f) : vec(_mm_set1_ps(f)) {} explicit SimdVector(__m128 v) : vec(v) {} - explicit SimdVector(Vector4::Arg v) + /*explicit SimdVector(const Vector4 & v) { - vec = _mm_load_ps( v.component ); - } + vec = _mm_load_ps( v.components ); + }*/ explicit SimdVector(const float * v) { diff --git a/src/nvtt/ClusterFit.cpp b/src/nvtt/ClusterFit.cpp index d148fb1..c191c35 100644 --- a/src/nvtt/ClusterFit.cpp +++ b/src/nvtt/ClusterFit.cpp @@ -83,6 +83,7 @@ void ClusterFit::setColourSet(const ColorSet * set) m_xxsum = SimdVector( 0.0f ); m_xsum = SimdVector( 0.0f ); #else + m_xxsum = Vector3(0.0f); m_xsum = Vector3(0.0f); m_wsum = 0.0f; #endif @@ -91,11 +92,12 @@ void ClusterFit::setColourSet(const ColorSet * set) { int p = order[i]; #if NVTT_USE_SIMD - m_weighted[i] = SimdVector(Vector4(set->weights[p] * values[p], set->weights[p])); + Vector4 tmp(values[p] * set->weights[p], set->weights[p]); + m_weighted[i] = SimdVector(tmp.component); m_xxsum += m_weighted[i] * m_weighted[i]; m_xsum += m_weighted[i]; #else - m_weighted[i] = values[p]; + m_weighted[i] = values[p] * set->weights[p]; m_xxsum += m_weighted[i] * m_weighted[i]; m_xsum += m_weighted[i]; m_weights[i] = set->weights[p]; @@ -108,7 +110,8 @@ void ClusterFit::setColourSet(const ColorSet * set) void ClusterFit::setMetric(Vector4::Arg w) { #if NVTT_USE_SIMD - m_metric = SimdVector(Vector4(w.xyz(), 1)); + Vector4 tmp(w.xyz(), 1); + m_metric = SimdVector(tmp.component); #else m_metric = w.xyz(); #endif @@ -289,22 +292,22 @@ bool ClusterFit::compress4( Vector3 * start, Vector3 * end ) SimdVector e3 = negativeMultiplySubtract( b, betax_sum, e2 ); SimdVector e4 = multiplyAdd( two, e3, e1 ); - // apply the metric to the error term - SimdVector e5 = e4 * m_metricSqr; - SimdVector error = e5.splatX() + e5.splatY() + e5.splatZ(); - - // keep the solution if it wins - if( compareAnyLessThan( error, besterror ) ) - { - besterror = error; - beststart = a; - bestend = b; - b0 = c0; - b1 = c1; - b2 = c2; - } - - x2 += m_weighted[c0+c1+c2]; + // apply the metric to the error term + SimdVector e5 = e4 * m_metricSqr; + SimdVector error = e5.splatX() + e5.splatY() + e5.splatZ(); + + // keep the solution if it wins + if( compareAnyLessThan( error, besterror ) ) + { + besterror = error; + beststart = a; + bestend = b; + b0 = c0; + b1 = c1; + b2 = c2; + } + + x2 += m_weighted[c0+c1+c2]; } x1 += m_weighted[c0+c1]; @@ -333,9 +336,6 @@ bool ClusterFit::compress4( Vector3 * start, Vector3 * end ) bool ClusterFit::compress3(Vector3 * start, Vector3 * end) { const uint count = m_count; - const Vector3 one( 1.0f ); - const Vector3 zero( 0.0f ); - const Vector3 half( 0.5f ); const Vector3 grid( 31.0f, 63.0f, 31.0f ); const Vector3 gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f ); @@ -372,10 +372,10 @@ bool ClusterFit::compress3(Vector3 * start, Vector3 * end) Vector3 b = (betax_sum*alpha2_sum - alphax_sum*alphabeta_sum) * factor; // clamp to the grid - a = min(one, max(zero, a)); - b = min(one, max(zero, b)); - a = floor(grid * a + half) * gridrcp; - b = floor(grid * b + half) * gridrcp; + a = clamp(a, 0, 1); + b = clamp(b, 0, 1); + a = floor(grid * a + 0.5f) * gridrcp; + b = floor(grid * b + 0.5f) * gridrcp; // compute the error Vector3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum ); @@ -420,9 +420,6 @@ bool ClusterFit::compress3(Vector3 * start, Vector3 * end) bool ClusterFit::compress4(Vector3 * start, Vector3 * end) { const uint count = m_count; - Vector3 const one( 1.0f ); - Vector3 const zero( 0.0f ); - Vector3 const half( 0.5f ); Vector3 const grid( 31.0f, 63.0f, 31.0f ); Vector3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f ); @@ -462,10 +459,10 @@ bool ClusterFit::compress4(Vector3 * start, Vector3 * end) Vector3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor; // clamp to the grid - a = min( one, max( zero, a ) ); - b = min( one, max( zero, b ) ); - a = floor( grid*a + half )*gridrcp; - b = floor( grid*b + half )*gridrcp; + a = clamp(a, 0, 1); + b = clamp(b, 0, 1); + a = floor(a * grid + 0.5f) * gridrcp; + b = floor(b * grid + 0.5f) * gridrcp; // compute the error Vector3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum ); @@ -474,7 +471,7 @@ bool ClusterFit::compress4(Vector3 * start, Vector3 * end) float error = dot( e1, m_metricSqr ); // keep the solution if it wins - if( error < besterror ) + if (error < besterror) { besterror = error; beststart = a; @@ -497,13 +494,13 @@ bool ClusterFit::compress4(Vector3 * start, Vector3 * end) } // save the block if necessary - if( besterror < m_besterror ) + if (besterror < m_besterror) { *start = beststart; *end = bestend; - // save the error - m_besterror = besterror; + // save the error + m_besterror = besterror; return true; } diff --git a/src/nvtt/ClusterFit.h b/src/nvtt/ClusterFit.h index e023c66..6e85217 100644 --- a/src/nvtt/ClusterFit.h +++ b/src/nvtt/ClusterFit.h @@ -27,11 +27,13 @@ #ifndef NVTT_CLUSTERFIT_H #define NVTT_CLUSTERFIT_H -#define NVTT_USE_SIMD 0 - #include "nvmath/SimdVector.h" #include "nvmath/Vector.h" +// Use SIMD version if altivec or SSE are available. +//#define NVTT_USE_SIMD (NV_USE_ALTIVEC || NV_USE_SSE) +#define NVTT_USE_SIMD 0 + namespace nv { struct ColorSet; diff --git a/src/nvtt/CompressorDX9.cpp b/src/nvtt/CompressorDX9.cpp index 10c74d9..60e8611 100644 --- a/src/nvtt/CompressorDX9.cpp +++ b/src/nvtt/CompressorDX9.cpp @@ -109,7 +109,7 @@ void FastCompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alpha QuickCompress::compressDXT5(rgba, block); } -#if 0 +#if 1 void NormalCompressorDXT1::compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) { set.setUniformWeights(); diff --git a/src/nvtt/CompressorDX9.h b/src/nvtt/CompressorDX9.h index 9f81e14..4ff8c10 100644 --- a/src/nvtt/CompressorDX9.h +++ b/src/nvtt/CompressorDX9.h @@ -64,7 +64,7 @@ namespace nv // Normal CPU compressors. -#if 0 +#if 1 struct NormalCompressorDXT1 : public ColorSetCompressor { virtual void compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); diff --git a/src/nvtt/QuickCompressDXT.cpp b/src/nvtt/QuickCompressDXT.cpp index 5659cef..b6e788a 100644 --- a/src/nvtt/QuickCompressDXT.cpp +++ b/src/nvtt/QuickCompressDXT.cpp @@ -115,6 +115,7 @@ inline static void insetBBox(Vector3 * restrict maxColor, Vector3 * restrict min *minColor = clamp(*minColor + inset, 0.0f, 255.0f); } +// Takes a normalized color in [0, 255] range and returns inline static uint16 roundAndExpand(Vector3 * restrict v) { uint r = uint(clamp(v->x * (31.0f / 255.0f), 0.0f, 31.0f) + 0.5f); @@ -168,6 +169,7 @@ inline static uint computeIndices4(const Vector3 block[16], Vector3::Arg maxColo return indices; } +// maxColor and minColor are expected to be in the same range as the color set. inline static uint computeIndices4(const ColorSet & set, Vector3::Arg maxColor, Vector3::Arg minColor) { Vector3 palette[4]; @@ -224,6 +226,7 @@ inline static float evaluatePaletteError4(const Vector3 block[16], Vector3::Arg return total; } +// maxColor and minColor are expected to be in the same range as the color set. inline static uint computeIndices3(const ColorSet & set, Vector3::Arg maxColor, Vector3::Arg minColor) { Vector3 palette[4]; @@ -702,8 +705,8 @@ void QuickCompress::compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock, void QuickCompress::outputBlock4(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block) { - Vector3 maxColor = start * 255; - Vector3 minColor = end * 255; + Vector3 minColor = start * 255; + Vector3 maxColor = end * 255; uint16 color0 = roundAndExpand(&maxColor); uint16 color1 = roundAndExpand(&minColor); @@ -715,17 +718,17 @@ void QuickCompress::outputBlock4(const ColorSet & set, const Vector3 & start, co block->col0 = Color16(color0); block->col1 = Color16(color1); - block->indices = computeIndices4(set, maxColor, minColor); + block->indices = computeIndices4(set, maxColor / 255, minColor / 255); //optimizeEndPoints4(set, block); } void QuickCompress::outputBlock3(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block) { - Vector3 maxColor = start * 255; - Vector3 minColor = end * 255; - uint16 color0 = roundAndExpand(&maxColor); - uint16 color1 = roundAndExpand(&minColor); + Vector3 minColor = start * 255; + Vector3 maxColor = end * 255; + uint16 color0 = roundAndExpand(&minColor); + uint16 color1 = roundAndExpand(&maxColor); if (color0 > color1) { @@ -735,7 +738,7 @@ void QuickCompress::outputBlock3(const ColorSet & set, const Vector3 & start, co block->col0 = Color16(color0); block->col1 = Color16(color1); - block->indices = computeIndices3(set, maxColor, minColor); + block->indices = computeIndices3(set, maxColor / 255, minColor / 255); //optimizeEndPoints3(set, block); } \ No newline at end of file