From 49482d1441d34b1607908ef0f4d3e0b8e4155e42 Mon Sep 17 00:00:00 2001 From: castano Date: Tue, 9 Nov 2010 03:38:03 +0000 Subject: [PATCH] Work in progress. Merging squish into nvtt. Using squish only to find endpoints, do discrete refinement afterwards. --- project/vc9/nvconfig.h | 8 +- project/vc9/nvcore/nvcore.vcproj | 8 - project/vc9/nvimage/nvimage.vcproj | 204 ++--- project/vc9/nvmath/nvmath.vcproj | 108 +-- project/vc9/nvtt.sln | 3 +- project/vc9/nvtt/nvtt.vcproj | 310 ++++--- src/nvcore/Memory.h | 8 + src/nvimage/ColorBlock.cpp | 9 +- src/nvimage/ColorBlock.h | 23 +- src/nvimage/FloatImage.h | 21 + src/nvimage/ImageIO.cpp | 46 + src/nvimage/PixelFormat.h | 2 +- src/nvmath/SimdVector.h | 24 + src/nvmath/SimdVector_SSE.h | 201 +++++ src/nvmath/SimdVector_VE.h | 187 ++++ src/nvtt/CompressorDX9.cpp | 1126 ++++++++++++------------ src/nvtt/QuickCompressDXT.cpp | 83 +- src/nvtt/QuickCompressDXT.h | 4 + src/nvtt/TexImage.cpp | 124 ++- src/nvtt/nvtt.h | 12 +- src/nvtt/squish/colourfit.cpp | 8 +- src/nvtt/squish/colourfit.h | 6 +- src/nvtt/squish/weightedclusterfit.cpp | 29 +- src/nvtt/squish/weightedclusterfit.h | 4 +- src/nvtt/tests/testsuite.cpp | 172 +++- 25 files changed, 1724 insertions(+), 1006 deletions(-) create mode 100755 src/nvmath/SimdVector.h create mode 100755 src/nvmath/SimdVector_SSE.h create mode 100755 src/nvmath/SimdVector_VE.h diff --git a/project/vc9/nvconfig.h b/project/vc9/nvconfig.h index 486d087..2997693 100644 --- a/project/vc9/nvconfig.h +++ b/project/vc9/nvconfig.h @@ -12,10 +12,10 @@ #endif #if !defined(_M_X64) -#define HAVE_FREEIMAGE -//#define HAVE_PNG -//#define HAVE_JPEG -//#define HAVE_TIFF +//#define HAVE_FREEIMAGE +#define HAVE_PNG +#define HAVE_JPEG +#define HAVE_TIFF #endif #endif // NV_CONFIG diff --git a/project/vc9/nvcore/nvcore.vcproj b/project/vc9/nvcore/nvcore.vcproj index ca5ae81..2f6b760 100644 --- a/project/vc9/nvcore/nvcore.vcproj +++ b/project/vc9/nvcore/nvcore.vcproj @@ -288,18 +288,10 @@ - - - - diff --git a/project/vc9/nvimage/nvimage.vcproj b/project/vc9/nvimage/nvimage.vcproj index 11d6be8..93b3dae 100644 --- a/project/vc9/nvimage/nvimage.vcproj +++ b/project/vc9/nvimage/nvimage.vcproj @@ -287,118 +287,102 @@ - - - - - - - - - - - - - - - - - - - - - - - - - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/project/vc9/nvmath/nvmath.vcproj b/project/vc9/nvmath/nvmath.vcproj index 2c95ba6..fe8a049 100644 --- a/project/vc9/nvmath/nvmath.vcproj +++ b/project/vc9/nvmath/nvmath.vcproj @@ -288,62 +288,62 @@ - - - - - - - - - + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/project/vc9/nvtt.sln b/project/vc9/nvtt.sln index c6fc6cd..6d2206b 100644 --- a/project/vc9/nvtt.sln +++ b/project/vc9/nvtt.sln @@ -393,7 +393,8 @@ Global {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Mixed Platforms.Build.0 = Debug|x64 {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Win32.ActiveCfg = Debug|Win32 {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Win32.Build.0 = Debug|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|x64.ActiveCfg = Debug|Win32 + {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|x64.ActiveCfg = Debug|x64 + {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|x64.Build.0 = Debug|x64 {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release|x64 {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Mixed Platforms.Build.0 = Release|x64 {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Win32.ActiveCfg = Release|Win32 diff --git a/project/vc9/nvtt/nvtt.vcproj b/project/vc9/nvtt/nvtt.vcproj index 802d4ab..cab8305 100644 --- a/project/vc9/nvtt/nvtt.vcproj +++ b/project/vc9/nvtt/nvtt.vcproj @@ -45,7 +45,7 @@ + + + + @@ -767,34 +775,6 @@ /> - - - - - - - - - - - - - - @@ -867,84 +847,6 @@ RelativePath="..\..\..\src\nvtt\cuda\CudaCompressorDXT.cpp" > - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -954,52 +856,142 @@ > - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/nvcore/Memory.h b/src/nvcore/Memory.h index 67d37c7..27d7e3d 100644 --- a/src/nvcore/Memory.h +++ b/src/nvcore/Memory.h @@ -11,6 +11,14 @@ #include // new and delete + +#if NV_CC_GNUC +# define NV_ALIGN_16 __attribute__ ((__aligned__ (16))) +#else +# define NV_ALIGN_16 __declspec(align(16)) +#endif + + #define NV_OVERRIDE_ALLOC 0 #if NV_OVERRIDE_ALLOC diff --git a/src/nvimage/ColorBlock.cpp b/src/nvimage/ColorBlock.cpp index 511a6b9..db77083 100644 --- a/src/nvimage/ColorBlock.cpp +++ b/src/nvimage/ColorBlock.cpp @@ -458,7 +458,8 @@ float ColorBlock::volume() const }*/ -void FloatColorBlock::init(const Image * img, uint x, uint y) + +void ColorSet::init(const Image * img, uint x, uint y) { w = min(4U, img->width() - x); h = min(4U, img->height() - y); @@ -485,15 +486,15 @@ void FloatColorBlock::init(const Image * img, uint x, uint y) } } -void FloatColorBlock::init(const FloatImage * img, uint x, uint y) +void ColorSet::init(const FloatImage * img, uint x, uint y) { } -void FloatColorBlock::init(const uint * data, uint w, uint h, uint x, uint y) +void ColorSet::init(const uint * data, uint w, uint h, uint x, uint y) { } -void FloatColorBlock::init(const float * data, uint w, uint h, uint x, uint y) +void ColorSet::init(const float * data, uint w, uint h, uint x, uint y) { } diff --git a/src/nvimage/ColorBlock.h b/src/nvimage/ColorBlock.h index 09ce254..e87cc9f 100644 --- a/src/nvimage/ColorBlock.h +++ b/src/nvimage/ColorBlock.h @@ -26,21 +26,8 @@ namespace nv void swizzle(uint x, uint y, uint z, uint w); // 0=r, 1=g, 2=b, 3=a, 4=0xFF, 5=0 bool isSingleColor(Color32 mask = Color32(0xFF, 0xFF, 0xFF, 0x00)) const; - //uint countUniqueColors() const; - //Color32 averageColor() const; bool hasAlpha() const; - //void diameterRange(Color32 * start, Color32 * end) const; - //void luminanceRange(Color32 * start, Color32 * end) const; - //void boundsRange(Color32 * start, Color32 * end) const; - //void boundsRangeAlpha(Color32 * start, Color32 * end) const; - - //void sortColorsByAbsoluteValue(); - - //void computeRange(const Vector3 & axis, Color32 * start, Color32 * end) const; - //void sortColors(const Vector3 & axis); - - //float volume() const; // Accessors const Color32 * colors() const; @@ -93,19 +80,21 @@ namespace nv } - struct FloatColorBlock + struct ColorSet { - FloatColorBlock() : w(4), h(4) {} - FloatColorBlock(uint w, uint h) : w(w), h(h) {} + ColorSet() : w(4), h(4) {} + ColorSet(uint w, uint h) : w(w), h(h) {} void init(const Image * img, uint x, uint y); void init(const FloatImage * img, uint x, uint y); void init(const uint * data, uint w, uint h, uint x, uint y); void init(const float * data, uint w, uint h, uint x, uint y); - Vector4 color(uint x, uint y) const { nvDebugCheck(x < w && y < h); return colors[y * 4 + x]; } + Vector4 color(uint x, uint y) const { nvDebugCheck(x < w && y < h); return colors[y * 4 + x]; } Vector4 & color(uint x, uint y) { nvDebugCheck(x < w && y < h); return colors[y * 4 + x]; } + Vector4 color(uint i) const { nvDebugCheck(i < 16); return colors[i]; } + Vector4 & color(uint i) { nvDebugCheck(i < 16); return colors[i]; } Vector4 colors[16]; uint w, h; diff --git a/src/nvimage/FloatImage.h b/src/nvimage/FloatImage.h index e8f273d..9e8d7b6 100644 --- a/src/nvimage/FloatImage.h +++ b/src/nvimage/FloatImage.h @@ -111,6 +111,9 @@ namespace nv float pixel(uint x, uint y, uint c) const; float & pixel(uint x, uint y, uint c); + float pixel(uint idx, uint c) const; + float & pixel(uint idx, uint c); + float pixel(uint idx) const; float & pixel(uint idx); @@ -197,6 +200,24 @@ namespace nv return m_mem[(c * m_height + y) * m_width + x]; } + /// Get pixel component. + inline float FloatImage::pixel(uint idx, uint c) const + { + nvDebugCheck(m_mem != NULL); + nvDebugCheck(idx < uint(m_width*m_height)); + nvDebugCheck(c < m_componentNum); + return m_mem[c * m_height * m_width + idx]; + } + + /// Get pixel component. + inline float & FloatImage::pixel(uint idx, uint c) + { + nvDebugCheck(m_mem != NULL); + nvDebugCheck(idx < uint(m_width*m_height)); + nvDebugCheck(c < m_componentNum); + return m_mem[c * m_height * m_width + idx]; + } + /// Get pixel component. inline float FloatImage::pixel(uint idx) const { diff --git a/src/nvimage/ImageIO.cpp b/src/nvimage/ImageIO.cpp index d28b270..8b5ecae 100644 --- a/src/nvimage/ImageIO.cpp +++ b/src/nvimage/ImageIO.cpp @@ -97,6 +97,7 @@ namespace nv #endif // defined(HAVE_FREEIMAGE) static FloatImage * loadFloatDDS(Stream & s); + static bool saveFloatDDS(const char * fileName, Stream & s, const FloatImage * img, uint base_component, uint num_components); } // ImageIO namespace } // nv namespace @@ -264,6 +265,12 @@ bool nv::ImageIO::saveFloat(const char * fileName, Stream & s, const FloatImage return false; } + const char * extension = Path::extension(fileName); + + if (strCaseCmp(extension, ".dds") == 0) { + return saveFloatDDS(fileName, s, fimage, baseComponent, componentCount); + } + #if defined(HAVE_FREEIMAGE) FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName); if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsWriting(fif)) { @@ -1792,3 +1799,42 @@ FloatImage * nv::ImageIO::loadFloatDDS(Stream & s) return NULL; } + +bool nv::ImageIO::saveFloatDDS(const char * fileName, Stream & s, const FloatImage * img, uint base_component, uint num_components) +{ + nvCheck(s.isSaving()); + nvCheck(!s.isError()); + + if (num_components != 4) return false; + + static const uint D3DFMT_A16B16G16R16F = 113; + + DDSHeader header; + header.setTexture2D(); + header.setWidth(img->width()); + header.setHeight(img->height()); + header.setFormatCode(D3DFMT_A16B16G16R16F); + // ... + + s << header; + + uint32 * r = (uint32 *)img->channel(base_component + 0); + uint32 * g = (uint32 *)img->channel(base_component + 1); + uint32 * b = (uint32 *)img->channel(base_component + 2); + uint32 * a = (uint32 *)img->channel(base_component + 3); + + const uint size = img->width() * img->height(); + for (uint i = 0; i < size; i++) { + uint16 R = half_from_float( *r++ ); + uint16 G = half_from_float( *g++ ); + uint16 B = half_from_float( *b++ ); + uint16 A = half_from_float( *a++ ); + + s.serialize(&R, sizeof(uint16)); + s.serialize(&G, sizeof(uint16)); + s.serialize(&B, sizeof(uint16)); + s.serialize(&A, sizeof(uint16)); + } + + return true; +} diff --git a/src/nvimage/PixelFormat.h b/src/nvimage/PixelFormat.h index 9a702e7..8ccf2c1 100644 --- a/src/nvimage/PixelFormat.h +++ b/src/nvimage/PixelFormat.h @@ -85,7 +85,7 @@ namespace nv float result; int offset = 0; do { - uint i = offset + f * (float(1 << inbits) - 1); + uint i = offset + uint(f * (float(1 << inbits) - 1)); i = convert(i, inbits, outbits); result = float(i) / (float(1 << outbits) - 1); offset++; diff --git a/src/nvmath/SimdVector.h b/src/nvmath/SimdVector.h new file mode 100755 index 0000000..b84ea6f --- /dev/null +++ b/src/nvmath/SimdVector.h @@ -0,0 +1,24 @@ +// This code is in the public domain -- Ignacio Castaņo + +#include "Vector.h" // Vector3, Vector4 + + +// Set some reasonable defaults. +#ifndef NV_USE_ALTIVEC +# define NV_USE_ALTIVEC POSH_CPU_PPC +#endif + +#ifndef NV_USE_SSE +# if NV_CPU_X86 || NV_CPU_X86_64 +# define NV_USE_SSE 2 +# endif +#endif + + +#if NV_USE_ALTIVEC +# include "SimdVector_VE.h" +#endif + +#if NV_USE_SSE +# include "SimdVector_SSE.h" +#endif diff --git a/src/nvmath/SimdVector_SSE.h b/src/nvmath/SimdVector_SSE.h new file mode 100755 index 0000000..8677322 --- /dev/null +++ b/src/nvmath/SimdVector_SSE.h @@ -0,0 +1,201 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +#ifndef NV_SIMD_VECTOR_SSE_H +#define NV_SIMD_VECTOR_SSE_H + +#include +#if (NV_USE_SSE > 1) +#include +#endif + +namespace nv { + + class SimdVector + { + __m128 vec; + + typedef SimdVector const& Arg; + + SimdVector() {} + explicit SimdVector(float f) : vec(_mm_set1_ps(f)) {} + explicit SimdVector(__m128 v) : vec(v) {} + SimdVector(const SimdVector & arg) : vec(arg.vec) {} + + SimdVector & operator=(const SimdVector & arg) + { + vec = arg.vec; + return *this; + } + + SimdVector(const float * v) + { + vec = _mm_load_ps( v ); + } + + SimdVector(float x, float y, float z, float w) + { + vec = _mm_setr_ps( x, y, z, w ); + } + + float toFloat() const + { + NV_ALIGN_16 float f; + _mm_store_ss(&f, vec); + return f; + } + + Vector3 toVector3() const + { + NV_ALIGN_16 float c[4]; + _mm_store_ps( c, vec ); + return Vector3( c[0], c[1], c[2] ); + } + + Vector4 toVector4() const + { + NV_ALIGN_16 float c[4]; + _mm_store_ps( v.components, vec ); + return Vector4( c[0], c[1], c[2], c[3] ); + } + +#define SSE_SPLAT( a ) ((a) | ((a) << 2) | ((a) << 4) | ((a) << 6)) + SimdVector splatX() const { return SimdVector( _mm_shuffle_ps( vec, vec, SSE_SPLAT( 0 ) ) ); } + SimdVector splatY() const { return SimdVector( _mm_shuffle_ps( vec, vec, SSE_SPLAT( 1 ) ) ); } + SimdVector splatZ() const { return SimdVector( _mm_shuffle_ps( vec, vec, SSE_SPLAT( 2 ) ) ); } + SimdVector splatW() const { return SimdVector( _mm_shuffle_ps( vec, vec, SSE_SPLAT( 3 ) ) ); } +#undef SSE_SPLAT + + SimdVector& operator+=( Arg v ) + { + vec = _mm_add_ps( vec, v.vec ); + return *this; + } + + SimdVector& operator-=( Arg v ) + { + vec = _mm_sub_ps( vec, v.vec ); + return *this; + } + + SimdVector& operator*=( Arg v ) + { + vec = _mm_mul_ps( vec, v.vec ); + return *this; + } + }; + + + SimdVector operator+( SimdVector::Arg left, SimdVector::Arg right ) + { + return SimdVector( _mm_add_ps( left.vec, right.vec ) ); + } + + SimdVector operator-( SimdVector::Arg left, SimdVector::Arg right ) + { + return SimdVector( _mm_sub_ps( left.vec, right.vec ) ); + } + + SimdVector operator*( SimdVector::Arg left, SimdVector::Arg right ) + { + return SimdVector( _mm_mul_ps( left.vec, right.vec ) ); + } + + // Returns a*b + c + SimdVector multiplyAdd( SimdVector::Arg a, SimdVector::Arg b, SimdVector::Arg c ) + { + return SimdVector( _mm_add_ps( _mm_mul_ps( a.vec, b.vec ), c.vec ) ); + } + + // Returns -( a*b - c ) + SimdVector negativeMultiplySubtract( SimdVector::Arg a, SimdVector::Arg b, SimdVector::Arg c ) + { + return SimdVector( _mm_sub_ps( c.vec, _mm_mul_ps( a.vec, b.vec ) ) ); + } + + SimdVector reciprocal( SimdVector::Arg v ) + { + // get the reciprocal estimate + __m128 estimate = _mm_rcp_ps( v.vec ); + + // one round of Newton-Rhaphson refinement + __m128 diff = _mm_sub_ps( _mm_set1_ps( 1.0f ), _mm_mul_ps( estimate, v.vec ) ); + return SimdVector( _mm_add_ps( _mm_mul_ps( diff, estimate ), estimate ) ); + } + + SimdVector min( SimdVector::Arg left, SimdVector::Arg right ) + { + return SimdVector( _mm_min_ps( left.vec, right.vec ) ); + } + + SimdVector max( SimdVector::Arg left, SimdVector::Arg right ) + { + return SimdVector( _mm_max_ps( left.vec, right.vec ) ); + } + + SimdVector truncate( SimdVector::Arg v ) + { +#if (NV_USE_SSE == 1) + // convert to ints + __m128 input = v.vec; + __m64 lo = _mm_cvttps_pi32( input ); + __m64 hi = _mm_cvttps_pi32( _mm_movehl_ps( input, input ) ); + + // convert to floats + __m128 part = _mm_movelh_ps( input, _mm_cvtpi32_ps( input, hi ) ); + __m128 truncated = _mm_cvtpi32_ps( part, lo ); + + // clear out the MMX multimedia state to allow FP calls later + _mm_empty(); + return SimdVector( truncated ); +#else + // use SSE2 instructions + return SimdVector( _mm_cvtepi32_ps( _mm_cvttps_epi32( v.vec ) ) ); +#endif + } + + SimdVector compareEqual( SimdVector::Arg left, SimdVector::Arg right ) + { + return SimdVector( _mm_cmpeq_ps( left.vec, right.vec ) ); + } + + SimdVector select( SimdVector::Arg off, SimdVector::Arg on, SimdVector::Arg bits ) + { + __m128 a = _mm_andnot_ps( bits.vec, off.vec ); + __m128 b = _mm_and_ps( bits.vec, on.vec ); + + return SimdVector( _mm_or_ps( a, b ) ); + } + + bool compareAnyLessThan( SimdVector::Arg left, SimdVector::Arg right ) + { + __m128 bits = _mm_cmplt_ps( left.vec, right.vec ); + int value = _mm_movemask_ps( bits ); + return value != 0; + } + +} // namespace nv + +#endif // NV_SIMD_VECTOR_SSE_H diff --git a/src/nvmath/SimdVector_VE.h b/src/nvmath/SimdVector_VE.h new file mode 100755 index 0000000..dabb525 --- /dev/null +++ b/src/nvmath/SimdVector_VE.h @@ -0,0 +1,187 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +#ifndef NV_SIMD_VECTOR_VE_H +#define NV_SIMD_VECTOR_VE_H + +#ifndef __APPLE_ALTIVEC__ +#include +#undef bool +#endif + +namespace nv { + + class SimdVector + { + vector float vec; + + typedef SimdVector Arg; + + SimdVector() {} + explicit SimdVector(float v) : vec((vector float)(X)) {} + explicit SimdVector(vector float v) : vec(v) {} + SimdVector(const SimdVector & arg) : vec(arg.vec) {} + + SimdVector& operator=(const SimdVector & arg) + { + vec = arg.vec; + return *this; + } + + SimdVector(const float * v) + { + union { vector float v; float c[4]; } u; + u.c[0] = v[0]; + u.c[1] = v[1]; + u.c[2] = v[2]; + u.c[3] = v[3]; + vec = u.v; + } + + SimdVector(float x, float y, float z, float w) + { + union { vector float v; float c[4]; } u; + u.c[0] = x; + u.c[1] = y; + u.c[2] = z; + u.c[3] = w; + vec = u.v; + } + + float toFloat() const + { + union { vector float v; float c[4]; } u; + u.v = vec; + return u.c[0]; + } + + Vector3 toVector3() const + { + union { vector float v; float c[4]; } u; + u.v = vec; + return Vector3( u.c[0], u.c[1], u.c[2] ); + } + + Vector4 toVector4() const + { + union { vector float v; float c[4]; } u; + u.v = vec; + return Vector4( u.c[0], u.c[1], u.c[2], u.c[3] ); + } + + SimdVector splatX() const { return SimdVector( vec_splat( vec, 0 ) ); } + SimdVector splatY() const { return SimdVector( vec_splat( vec, 1 ) ); } + SimdVector splatZ() const { return SimdVector( vec_splat( vec, 2 ) ); } + SimdVector splatW() const { return SimdVector( vec_splat( vec, 3 ) ); } + + SimdVector& operator+=( Arg v ) + { + vec = vec_add( vec, v.vec ); + return *this; + } + + SimdVector& operator-=( Arg v ) + { + vec = vec_sub( vec, v.vec ); + return *this; + } + + SimdVector& operator*=( Arg v ) + { + vec = vec_madd( vec, v.vec, ( vector float )( -0.0f ) ); + return *this; + } + }; + + SimdVector operator+( SimdVector::Arg left, SimdVector::Arg right ) + { + return SimdVector( vec_add( left.vec, right.vec ) ); + } + + SimdVector operator-( SimdVector::Arg left, SimdVector::Arg right ) + { + return SimdVector( vec_sub( left.vec, right.vec ) ); + } + + SimdVector operator*( SimdVector::Arg left, SimdVector::Arg right ) + { + return SimdVector( vec_madd( left.vec, right.vec, ( vector float )( -0.0f ) ) ); + } + + // Returns a*b + c + SimdVector multiplyAdd( SimdVector::Arg a, SimdVector::Arg b, SimdVector::Arg c ) + { + return SimdVector( vec_madd( a.vec, b.vec, c.vec ) ); + } + + // Returns -( a*b - c ) + SimdVector negativeMultiplySubtract( SimdVector::Arg a, SimdVector::Arg b, SimdVector::Arg c ) + { + return SimdVector( vec_nmsub( a.vec, b.vec, c.vec ) ); + } + + SimdVector reciprocal( SimdVector::Arg v ) + { + // get the reciprocal estimate + vector float estimate = vec_re( v.vec ); + + // one round of Newton-Rhaphson refinement + vector float diff = vec_nmsub( estimate, v.vec, ( vector float )( 1.0f ) ); + return SimdVector( vec_madd( diff, estimate, estimate ) ); + } + + SimdVector min( SimdVector::Arg left, SimdVector::Arg right ) + { + return SimdVector( vec_min( left.vec, right.vec ) ); + } + + SimdVector max( SimdVector::Arg left, SimdVector::Arg right ) + { + return SimdVector( vec_max( left.vec, right.vec ) ); + } + + SimdVector truncate( SimdVector::Arg v ) + { + return SimdVector( vec_trunc( v.vec ) ); + } + + SimdVector compareEqual( SimdVector::Arg left, SimdVector::Arg right ) + { + return SimdVector( ( vector float )vec_cmpeq( left.vec, right.vec ) ); + } + + SimdVector select( SimdVector::Arg off, SimdVector::Arg on, SimdVector::Arg bits ) + { + return SimdVector( vec_sel( off.vec, on.vec, ( vector unsigned int )bits.vec ) ); + } + + bool compareAnyLessThan( SimdVector::Arg left, SimdVector::Arg right ) + { + return vec_any_lt( left.vec, right.vec ) != 0; + } + +} // namespace nv + +#endif // NV_SIMD_VECTOR_VE_H diff --git a/src/nvtt/CompressorDX9.cpp b/src/nvtt/CompressorDX9.cpp index 5a4eae4..cd6ba84 100644 --- a/src/nvtt/CompressorDX9.cpp +++ b/src/nvtt/CompressorDX9.cpp @@ -1,554 +1,576 @@ // Copyright (c) 2009-2011 Ignacio Castano // Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include "CompressorDX9.h" -#include "QuickCompressDXT.h" -#include "OptimalCompressDXT.h" -#include "CompressionOptions.h" -#include "OutputOptions.h" - -// squish -#include "squish/colourset.h" -#include "squish/weightedclusterfit.h" - -#include "nvtt.h" - -#include "nvcore/Memory.h" - -#include "nvimage/Image.h" -#include "nvimage/ColorBlock.h" -#include "nvimage/BlockDXT.h" - -#include // placement new - -// s3_quant -#if defined(HAVE_S3QUANT) -#include "s3tc/s3_quant.h" -#endif - -// ati tc -#if defined(HAVE_ATITC) -typedef int BOOL; -typedef _W64 unsigned long ULONG_PTR; -typedef ULONG_PTR DWORD_PTR; -#include "atitc/ATI_Compress.h" -#endif - -// squish -#if defined(HAVE_SQUISH) -//#include "squish/squish.h" -#include "squish-1.10/squish.h" -#endif - -// d3dx -#if defined(HAVE_D3DX) -#include -#endif - -// stb -#if defined(HAVE_STB) -#define STB_DEFINE -#include "stb/stb_dxt.h" -#endif - -using namespace nv; -using namespace nvtt; - - -void FastCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - BlockDXT1 * block = new(output) BlockDXT1; - QuickCompress::compressDXT1(rgba, block); -} - -void FastCompressorDXT1a::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - BlockDXT1 * block = new(output) BlockDXT1; - QuickCompress::compressDXT1a(rgba, block); -} - -void FastCompressorDXT3::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - BlockDXT3 * block = new(output) BlockDXT3; - QuickCompress::compressDXT3(rgba, block); -} - -void FastCompressorDXT5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - BlockDXT5 * block = new(output) BlockDXT5; - QuickCompress::compressDXT5(rgba, block); -} - -void FastCompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - rgba.swizzle(4, 1, 5, 0); // 0xFF, G, 0, R - - BlockDXT5 * block = new(output) BlockDXT5; - QuickCompress::compressDXT5(rgba, block); -} - - -void NormalCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - nvsquish::WeightedClusterFit fit; - fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z); - - if (rgba.isSingleColor()) - { - BlockDXT1 * block = new(output) BlockDXT1; - OptimalCompress::compressDXT1(rgba.color(0), block); - } - else - { - nvsquish::ColourSet colours((uint8 *)rgba.colors(), 0); - fit.SetColourSet(&colours, nvsquish::kDxt1); - fit.Compress(output); - } -} - - -void NormalCompressorDXT1a::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - uint alphaMask = 0; - for (uint i = 0; i < 16; i++) - { - if (rgba.color(i).a == 0) alphaMask |= (3 << (i * 2)); // Set two bits for each color. - } - - const bool isSingleColor = rgba.isSingleColor(); - - if (isSingleColor) - { - BlockDXT1 * block = new(output) BlockDXT1; - OptimalCompress::compressDXT1a(rgba.color(0), alphaMask, block); - } - else - { - nvsquish::WeightedClusterFit fit; - fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z); - - int flags = nvsquish::kDxt1; - if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha; - - nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags); - fit.SetColourSet(&colours, nvsquish::kDxt1); - - fit.Compress(output); - } -} - - -void NormalCompressorDXT3::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - BlockDXT3 * block = new(output) BlockDXT3; - - // Compress explicit alpha. - OptimalCompress::compressDXT3A(rgba, &block->alpha); - - // Compress color. - if (rgba.isSingleColor()) - { - OptimalCompress::compressDXT1(rgba.color(0), &block->color); - } - else - { - nvsquish::WeightedClusterFit fit; - fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z); - - int flags = 0; - if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha; - - nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags); - fit.SetColourSet(&colours, 0); - fit.Compress(&block->color); - } -} - - -void NormalCompressorDXT5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - BlockDXT5 * block = new(output) BlockDXT5; - - // Compress alpha. - if (compressionOptions.quality == Quality_Highest) - { - OptimalCompress::compressDXT5A(rgba, &block->alpha); - } - else - { - QuickCompress::compressDXT5A(rgba, &block->alpha); - } - - // Compress color. - if (rgba.isSingleColor()) - { - OptimalCompress::compressDXT1(rgba.color(0), &block->color); - } - else - { - nvsquish::WeightedClusterFit fit; - fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z); - - int flags = 0; - if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha; - - nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags); - fit.SetColourSet(&colours, 0); - fit.Compress(&block->color); - } -} - - -void NormalCompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - BlockDXT5 * block = new(output) BlockDXT5; - - // Compress Y. - if (compressionOptions.quality == Quality_Highest) - { - OptimalCompress::compressDXT1G(rgba, &block->color); - } - else - { - if (rgba.isSingleColor(Color32(0, 0xFF, 0, 0))) // Mask all but green channel. - { - OptimalCompress::compressDXT1G(rgba.color(0).g, &block->color); - } - else - { - ColorBlock tile = rgba; - tile.swizzle(4, 1, 5, 3); // leave alpha in alpha channel. - - nvsquish::WeightedClusterFit fit; - fit.SetMetric(0, 1, 0); - - int flags = 0; - if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha; - - nvsquish::ColourSet colours((uint8 *)tile.colors(), flags); - fit.SetColourSet(&colours, 0); - fit.Compress(&block->color); - } - } - - rgba.swizzle(4, 1, 5, 0); // 1, G, 0, R - - // Compress X. - if (compressionOptions.quality == Quality_Highest) - { - OptimalCompress::compressDXT5A(rgba, &block->alpha); - } - else - { - QuickCompress::compressDXT5A(rgba, &block->alpha); - } -} - - -#if defined(HAVE_S3QUANT) - -void S3CompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) -{ - float error = 0.0f; - - BlockDXT1 dxtBlock3; - BlockDXT1 dxtBlock4; - ColorBlock block; - - for (uint y = 0; y < h; y += 4) { - for (uint x = 0; x < w; x += 4) { - block.init(inputFormat, w, h, data, x, y); - - // Init rgb block. - RGBBlock rgbBlock; - rgbBlock.n = 16; - for (uint i = 0; i < 16; i++) { - rgbBlock.colorChannel[i][0] = clamp(float(block.color(i).r) / 255.0f, 0.0f, 1.0f); - rgbBlock.colorChannel[i][1] = clamp(float(block.color(i).g) / 255.0f, 0.0f, 1.0f); - rgbBlock.colorChannel[i][2] = clamp(float(block.color(i).b) / 255.0f, 0.0f, 1.0f); - } - rgbBlock.weight[0] = 1.0f; - rgbBlock.weight[1] = 1.0f; - rgbBlock.weight[2] = 1.0f; - - rgbBlock.inLevel = 4; - CodeRGBBlock(&rgbBlock); - - // Copy results to DXT block. - dxtBlock4.col0.r = rgbBlock.endPoint[0][0]; - dxtBlock4.col0.g = rgbBlock.endPoint[0][1]; - dxtBlock4.col0.b = rgbBlock.endPoint[0][2]; - - dxtBlock4.col1.r = rgbBlock.endPoint[1][0]; - dxtBlock4.col1.g = rgbBlock.endPoint[1][1]; - dxtBlock4.col1.b = rgbBlock.endPoint[1][2]; - - dxtBlock4.setIndices(rgbBlock.index); - - if (dxtBlock4.col0.u < dxtBlock4.col1.u) { - swap(dxtBlock4.col0.u, dxtBlock4.col1.u); - dxtBlock4.indices ^= 0x55555555; - } - - uint error4 = blockError(block, dxtBlock4); - - rgbBlock.inLevel = 3; - - CodeRGBBlock(&rgbBlock); - - // Copy results to DXT block. - dxtBlock3.col0.r = rgbBlock.endPoint[0][0]; - dxtBlock3.col0.g = rgbBlock.endPoint[0][1]; - dxtBlock3.col0.b = rgbBlock.endPoint[0][2]; - - dxtBlock3.col1.r = rgbBlock.endPoint[1][0]; - dxtBlock3.col1.g = rgbBlock.endPoint[1][1]; - dxtBlock3.col1.b = rgbBlock.endPoint[1][2]; - - dxtBlock3.setIndices(rgbBlock.index); - - if (dxtBlock3.col0.u > dxtBlock3.col1.u) { - swap(dxtBlock3.col0.u, dxtBlock3.col1.u); - dxtBlock3.indices ^= (~dxtBlock3.indices >> 1) & 0x55555555; - } - - uint error3 = blockError(block, dxtBlock3); - - if (error3 < error4) { - error += error3; - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(&dxtBlock3, sizeof(dxtBlock3)); - } - } - else { - error += error4; - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(&dxtBlock4, sizeof(dxtBlock4)); - } - } - } - } -} - -#endif // defined(HAVE_S3QUANT) - - -#if defined(HAVE_ATITC) - -void AtiCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) -{ - // Init source texture - ATI_TC_Texture srcTexture; - srcTexture.dwSize = sizeof(srcTexture); - srcTexture.dwWidth = w; - srcTexture.dwHeight = h; - if (inputFormat == nvtt::InputFormat_BGRA_8UB) - { - srcTexture.dwPitch = w * 4; - srcTexture.format = ATI_TC_FORMAT_ARGB_8888; - } - else - { - srcTexture.dwPitch = w * 16; - srcTexture.format = ATI_TC_FORMAT_ARGB_32F; - } - srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture); - srcTexture.pData = (ATI_TC_BYTE*) data; - - // Init dest texture - ATI_TC_Texture destTexture; - destTexture.dwSize = sizeof(destTexture); - destTexture.dwWidth = w; - destTexture.dwHeight = h; - destTexture.dwPitch = 0; - destTexture.format = ATI_TC_FORMAT_DXT1; - destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture); - destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize); - - ATI_TC_CompressOptions options; - options.dwSize = sizeof(options); - options.bUseChannelWeighting = false; - options.bUseAdaptiveWeighting = false; - options.bDXT1UseAlpha = false; - options.nCompressionSpeed = ATI_TC_Speed_Normal; - options.bDisableMultiThreading = false; - //options.bDisableMultiThreading = true; - - // Compress - ATI_TC_ConvertTexture(&srcTexture, &destTexture, &options, NULL, NULL, NULL); - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize); - } - - mem::free(destTexture.pData); -} - -void AtiCompressorDXT5::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) -{ - // Init source texture - ATI_TC_Texture srcTexture; - srcTexture.dwSize = sizeof(srcTexture); - srcTexture.dwWidth = w; - srcTexture.dwHeight = h; - if (inputFormat == nvtt::InputFormat_BGRA_8UB) - { - srcTexture.dwPitch = w * 4; - srcTexture.format = ATI_TC_FORMAT_ARGB_8888; - } - else - { - srcTexture.dwPitch = w * 16; - srcTexture.format = ATI_TC_FORMAT_ARGB_32F; - } - srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture); - srcTexture.pData = (ATI_TC_BYTE*) data; - - // Init dest texture - ATI_TC_Texture destTexture; - destTexture.dwSize = sizeof(destTexture); - destTexture.dwWidth = w; - destTexture.dwHeight = h; - destTexture.dwPitch = 0; - destTexture.format = ATI_TC_FORMAT_DXT5; - destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture); - destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize); - - // Compress - ATI_TC_ConvertTexture(&srcTexture, &destTexture, NULL, NULL, NULL, NULL); - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize); - } - - mem::free(destTexture.pData); -} - -#endif // defined(HAVE_ATITC) - -#if defined(HAVE_SQUISH) - -void SquishCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) -{ -#pragma message(NV_FILE_LINE "TODO: Convert input to fixed point ABGR format instead of ARGB") - /* - Image img(*image); - int count = img.width() * img.height(); - for (int i = 0; i < count; i++) - { - Color32 c = img.pixel(i); - img.pixel(i) = Color32(c.b, c.g, c.r, c.a); - } - - int size = squish::GetStorageRequirements(img.width(), img.height(), squish::kDxt1); - void * blocks = mem::malloc(size); - - squish::CompressImage((const squish::u8 *)img.pixels(), img.width(), img.height(), blocks, squish::kDxt1 | squish::kColourClusterFit); - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(blocks, size); - } - - mem::free(blocks); - */ -} - -#endif // defined(HAVE_SQUISH) - - -#if defined(HAVE_D3DX) - -void D3DXCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) -{ - IDirect3D9 * d3d = Direct3DCreate9(D3D_SDK_VERSION); - - D3DPRESENT_PARAMETERS presentParams; - ZeroMemory(&presentParams, sizeof(presentParams)); - presentParams.Windowed = TRUE; - presentParams.SwapEffect = D3DSWAPEFFECT_COPY; - presentParams.BackBufferWidth = 8; - presentParams.BackBufferHeight = 8; - presentParams.BackBufferFormat = D3DFMT_UNKNOWN; - - HRESULT err; - - IDirect3DDevice9 * device = NULL; - err = d3d->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_REF, GetDesktopWindow(), D3DCREATE_SOFTWARE_VERTEXPROCESSING, &presentParams, &device); - - IDirect3DTexture9 * texture = NULL; - err = D3DXCreateTexture(device, w, h, 1, 0, D3DFMT_DXT1, D3DPOOL_SYSTEMMEM, &texture); - - IDirect3DSurface9 * surface = NULL; - err = texture->GetSurfaceLevel(0, &surface); - - RECT rect; - rect.left = 0; - rect.top = 0; - rect.bottom = h; - rect.right = w; - - if (inputFormat == nvtt::InputFormat_BGRA_8UB) - { - err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A8R8G8B8, w * 4, NULL, &rect, D3DX_DEFAULT, 0); - } - else - { - err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A32B32G32R32F, w * 16, NULL, &rect, D3DX_DEFAULT, 0); - } - - if (err != D3DERR_INVALIDCALL && err != D3DXERR_INVALIDDATA) - { - D3DLOCKED_RECT rect; - ZeroMemory(&rect, sizeof(rect)); - - err = surface->LockRect(&rect, NULL, D3DLOCK_READONLY); - - if (outputOptions.outputHandler != NULL) { - int size = rect.Pitch * ((h + 3) / 4); - outputOptions.outputHandler->writeData(rect.pBits, size); - } - - err = surface->UnlockRect(); - } - - surface->Release(); - device->Release(); - d3d->Release(); -} - -#endif // defined(HAVE_D3DX) - - -#if defined(HAVE_STB) - -void StbCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - rgba.swizzle(2, 1, 0, 3); // Swap R and B - stb_compress_dxt_block((unsigned char *)output, (unsigned char *)rgba.colors(), 0, 0); -} - - -#endif // defined(HAVE_STB) +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include "CompressorDX9.h" +#include "QuickCompressDXT.h" +#include "OptimalCompressDXT.h" +#include "CompressionOptions.h" +#include "OutputOptions.h" + +// squish +#include "squish/colourset.h" +#include "squish/weightedclusterfit.h" + +#include "nvtt.h" + +#include "nvcore/Memory.h" + +#include "nvimage/Image.h" +#include "nvimage/ColorBlock.h" +#include "nvimage/BlockDXT.h" + +#include // placement new + +// s3_quant +#if defined(HAVE_S3QUANT) +#include "s3tc/s3_quant.h" +#endif + +// ati tc +#if defined(HAVE_ATITC) +typedef int BOOL; +typedef _W64 unsigned long ULONG_PTR; +typedef ULONG_PTR DWORD_PTR; +#include "atitc/ATI_Compress.h" +#endif + +// squish +#if defined(HAVE_SQUISH) +//#include "squish/squish.h" +#include "squish-1.10/squish.h" +#endif + +// d3dx +#if defined(HAVE_D3DX) +#include +#endif + +// stb +#if defined(HAVE_STB) +#define STB_DEFINE +#include "stb/stb_dxt.h" +#endif + +using namespace nv; +using namespace nvtt; + + +void FastCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ + BlockDXT1 * block = new(output) BlockDXT1; + QuickCompress::compressDXT1(rgba, block); +} + +void FastCompressorDXT1a::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ + BlockDXT1 * block = new(output) BlockDXT1; + QuickCompress::compressDXT1a(rgba, block); +} + +void FastCompressorDXT3::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ + BlockDXT3 * block = new(output) BlockDXT3; + QuickCompress::compressDXT3(rgba, block); +} + +void FastCompressorDXT5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ + BlockDXT5 * block = new(output) BlockDXT5; + QuickCompress::compressDXT5(rgba, block); +} + +void FastCompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ + rgba.swizzle(4, 1, 5, 0); // 0xFF, G, 0, R + + BlockDXT5 * block = new(output) BlockDXT5; + QuickCompress::compressDXT5(rgba, block); +} + + +inline static Vector3 vec(nvsquish::Vec3 v) { return Vector3(v.X(), v.Y(), v.Z()); } + +void NormalCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ + nvsquish::WeightedClusterFit fit; + fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z); + + BlockDXT1 * block = new(output) BlockDXT1; + if (rgba.isSingleColor()) + { + OptimalCompress::compressDXT1(rgba.color(0), block); + } + else + { + nvsquish::ColourSet colours((uint8 *)rgba.colors(), 0); + fit.SetColourSet(&colours, nvsquish::kDxt1); + + nvsquish::Vec3 start, end; + + fit.Compress4(&start, &end); + QuickCompress::outputBlock4(rgba, vec(start), vec(end), block); + + if (fit.Compress3(&start, &end)) { + QuickCompress::outputBlock3(rgba, vec(start), vec(end), block); + } + } +} + + +void NormalCompressorDXT1a::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ +#pragma NV_MESSAGE("NormalCompressorDXT1a - Not implemented!") + /* + uint alphaMask = 0; + for (uint i = 0; i < 16; i++) + { + if (rgba.color(i).a == 0) alphaMask |= (3 << (i * 2)); // Set two bits for each color. + } + + const bool isSingleColor = rgba.isSingleColor(); + + if (isSingleColor) + { + BlockDXT1 * block = new(output) BlockDXT1; + OptimalCompress::compressDXT1a(rgba.color(0), alphaMask, block); + } + else + { + nvsquish::WeightedClusterFit fit; + fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z); + + int flags = nvsquish::kDxt1; + if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha; + + nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags); + fit.SetColourSet(&colours, nvsquish::kDxt1); + + fit.Compress(output); + } + */ +} + + +void NormalCompressorDXT3::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ +#pragma NV_MESSAGE("NormalCompressorDXT1a - Not implemented!") + /* + BlockDXT3 * block = new(output) BlockDXT3; + + // Compress explicit alpha. + OptimalCompress::compressDXT3A(rgba, &block->alpha); + + // Compress color. + if (rgba.isSingleColor()) + { + OptimalCompress::compressDXT1(rgba.color(0), &block->color); + } + else + { + nvsquish::WeightedClusterFit fit; + fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z); + + int flags = 0; + if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha; + + nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags); + fit.SetColourSet(&colours, 0); + fit.Compress(&block->color); + } + */ +} + + +void NormalCompressorDXT5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ +#pragma NV_MESSAGE("NormalCompressorDXT1a - Not implemented!") + /* + BlockDXT5 * block = new(output) BlockDXT5; + + // Compress alpha. + if (compressionOptions.quality == Quality_Highest) + { + OptimalCompress::compressDXT5A(rgba, &block->alpha); + } + else + { + QuickCompress::compressDXT5A(rgba, &block->alpha); + } + + // Compress color. + if (rgba.isSingleColor()) + { + OptimalCompress::compressDXT1(rgba.color(0), &block->color); + } + else + { + nvsquish::WeightedClusterFit fit; + fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z); + + int flags = 0; + if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha; + + nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags); + fit.SetColourSet(&colours, 0); + fit.Compress(&block->color); + } + */ +} + + +void NormalCompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ +#pragma NV_MESSAGE("NormalCompressorDXT1a - Not implemented!") + /* + BlockDXT5 * block = new(output) BlockDXT5; + + // Compress Y. + if (compressionOptions.quality == Quality_Highest) + { + OptimalCompress::compressDXT1G(rgba, &block->color); + } + else + { + if (rgba.isSingleColor(Color32(0, 0xFF, 0, 0))) // Mask all but green channel. + { + OptimalCompress::compressDXT1G(rgba.color(0).g, &block->color); + } + else + { + ColorBlock tile = rgba; + tile.swizzle(4, 1, 5, 3); // leave alpha in alpha channel. + + nvsquish::WeightedClusterFit fit; + fit.SetMetric(0, 1, 0); + + int flags = 0; + if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha; + + nvsquish::ColourSet colours((uint8 *)tile.colors(), flags); + fit.SetColourSet(&colours, 0); + fit.Compress(&block->color); + } + } + + rgba.swizzle(4, 1, 5, 0); // 1, G, 0, R + + // Compress X. + if (compressionOptions.quality == Quality_Highest) + { + OptimalCompress::compressDXT5A(rgba, &block->alpha); + } + else + { + QuickCompress::compressDXT5A(rgba, &block->alpha); + } + */ +} + + +#if defined(HAVE_S3QUANT) + +void S3CompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) +{ + float error = 0.0f; + + BlockDXT1 dxtBlock3; + BlockDXT1 dxtBlock4; + ColorBlock block; + + for (uint y = 0; y < h; y += 4) { + for (uint x = 0; x < w; x += 4) { + block.init(inputFormat, w, h, data, x, y); + + // Init rgb block. + RGBBlock rgbBlock; + rgbBlock.n = 16; + for (uint i = 0; i < 16; i++) { + rgbBlock.colorChannel[i][0] = clamp(float(block.color(i).r) / 255.0f, 0.0f, 1.0f); + rgbBlock.colorChannel[i][1] = clamp(float(block.color(i).g) / 255.0f, 0.0f, 1.0f); + rgbBlock.colorChannel[i][2] = clamp(float(block.color(i).b) / 255.0f, 0.0f, 1.0f); + } + rgbBlock.weight[0] = 1.0f; + rgbBlock.weight[1] = 1.0f; + rgbBlock.weight[2] = 1.0f; + + rgbBlock.inLevel = 4; + CodeRGBBlock(&rgbBlock); + + // Copy results to DXT block. + dxtBlock4.col0.r = rgbBlock.endPoint[0][0]; + dxtBlock4.col0.g = rgbBlock.endPoint[0][1]; + dxtBlock4.col0.b = rgbBlock.endPoint[0][2]; + + dxtBlock4.col1.r = rgbBlock.endPoint[1][0]; + dxtBlock4.col1.g = rgbBlock.endPoint[1][1]; + dxtBlock4.col1.b = rgbBlock.endPoint[1][2]; + + dxtBlock4.setIndices(rgbBlock.index); + + if (dxtBlock4.col0.u < dxtBlock4.col1.u) { + swap(dxtBlock4.col0.u, dxtBlock4.col1.u); + dxtBlock4.indices ^= 0x55555555; + } + + uint error4 = blockError(block, dxtBlock4); + + rgbBlock.inLevel = 3; + + CodeRGBBlock(&rgbBlock); + + // Copy results to DXT block. + dxtBlock3.col0.r = rgbBlock.endPoint[0][0]; + dxtBlock3.col0.g = rgbBlock.endPoint[0][1]; + dxtBlock3.col0.b = rgbBlock.endPoint[0][2]; + + dxtBlock3.col1.r = rgbBlock.endPoint[1][0]; + dxtBlock3.col1.g = rgbBlock.endPoint[1][1]; + dxtBlock3.col1.b = rgbBlock.endPoint[1][2]; + + dxtBlock3.setIndices(rgbBlock.index); + + if (dxtBlock3.col0.u > dxtBlock3.col1.u) { + swap(dxtBlock3.col0.u, dxtBlock3.col1.u); + dxtBlock3.indices ^= (~dxtBlock3.indices >> 1) & 0x55555555; + } + + uint error3 = blockError(block, dxtBlock3); + + if (error3 < error4) { + error += error3; + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(&dxtBlock3, sizeof(dxtBlock3)); + } + } + else { + error += error4; + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(&dxtBlock4, sizeof(dxtBlock4)); + } + } + } + } +} + +#endif // defined(HAVE_S3QUANT) + + +#if defined(HAVE_ATITC) + +void AtiCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) +{ + // Init source texture + ATI_TC_Texture srcTexture; + srcTexture.dwSize = sizeof(srcTexture); + srcTexture.dwWidth = w; + srcTexture.dwHeight = h; + if (inputFormat == nvtt::InputFormat_BGRA_8UB) + { + srcTexture.dwPitch = w * 4; + srcTexture.format = ATI_TC_FORMAT_ARGB_8888; + } + else + { + srcTexture.dwPitch = w * 16; + srcTexture.format = ATI_TC_FORMAT_ARGB_32F; + } + srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture); + srcTexture.pData = (ATI_TC_BYTE*) data; + + // Init dest texture + ATI_TC_Texture destTexture; + destTexture.dwSize = sizeof(destTexture); + destTexture.dwWidth = w; + destTexture.dwHeight = h; + destTexture.dwPitch = 0; + destTexture.format = ATI_TC_FORMAT_DXT1; + destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture); + destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize); + + ATI_TC_CompressOptions options; + options.dwSize = sizeof(options); + options.bUseChannelWeighting = false; + options.bUseAdaptiveWeighting = false; + options.bDXT1UseAlpha = false; + options.nCompressionSpeed = ATI_TC_Speed_Normal; + options.bDisableMultiThreading = false; + //options.bDisableMultiThreading = true; + + // Compress + ATI_TC_ConvertTexture(&srcTexture, &destTexture, &options, NULL, NULL, NULL); + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize); + } + + mem::free(destTexture.pData); +} + +void AtiCompressorDXT5::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) +{ + // Init source texture + ATI_TC_Texture srcTexture; + srcTexture.dwSize = sizeof(srcTexture); + srcTexture.dwWidth = w; + srcTexture.dwHeight = h; + if (inputFormat == nvtt::InputFormat_BGRA_8UB) + { + srcTexture.dwPitch = w * 4; + srcTexture.format = ATI_TC_FORMAT_ARGB_8888; + } + else + { + srcTexture.dwPitch = w * 16; + srcTexture.format = ATI_TC_FORMAT_ARGB_32F; + } + srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture); + srcTexture.pData = (ATI_TC_BYTE*) data; + + // Init dest texture + ATI_TC_Texture destTexture; + destTexture.dwSize = sizeof(destTexture); + destTexture.dwWidth = w; + destTexture.dwHeight = h; + destTexture.dwPitch = 0; + destTexture.format = ATI_TC_FORMAT_DXT5; + destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture); + destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize); + + // Compress + ATI_TC_ConvertTexture(&srcTexture, &destTexture, NULL, NULL, NULL, NULL); + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize); + } + + mem::free(destTexture.pData); +} + +#endif // defined(HAVE_ATITC) + +#if defined(HAVE_SQUISH) + +void SquishCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) +{ +#pragma message(NV_FILE_LINE "TODO: Convert input to fixed point ABGR format instead of ARGB") + /* + Image img(*image); + int count = img.width() * img.height(); + for (int i = 0; i < count; i++) + { + Color32 c = img.pixel(i); + img.pixel(i) = Color32(c.b, c.g, c.r, c.a); + } + + int size = squish::GetStorageRequirements(img.width(), img.height(), squish::kDxt1); + void * blocks = mem::malloc(size); + + squish::CompressImage((const squish::u8 *)img.pixels(), img.width(), img.height(), blocks, squish::kDxt1 | squish::kColourClusterFit); + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(blocks, size); + } + + mem::free(blocks); + */ +} + +#endif // defined(HAVE_SQUISH) + + +#if defined(HAVE_D3DX) + +void D3DXCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) +{ + IDirect3D9 * d3d = Direct3DCreate9(D3D_SDK_VERSION); + + D3DPRESENT_PARAMETERS presentParams; + ZeroMemory(&presentParams, sizeof(presentParams)); + presentParams.Windowed = TRUE; + presentParams.SwapEffect = D3DSWAPEFFECT_COPY; + presentParams.BackBufferWidth = 8; + presentParams.BackBufferHeight = 8; + presentParams.BackBufferFormat = D3DFMT_UNKNOWN; + + HRESULT err; + + IDirect3DDevice9 * device = NULL; + err = d3d->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_REF, GetDesktopWindow(), D3DCREATE_SOFTWARE_VERTEXPROCESSING, &presentParams, &device); + + IDirect3DTexture9 * texture = NULL; + err = D3DXCreateTexture(device, w, h, 1, 0, D3DFMT_DXT1, D3DPOOL_SYSTEMMEM, &texture); + + IDirect3DSurface9 * surface = NULL; + err = texture->GetSurfaceLevel(0, &surface); + + RECT rect; + rect.left = 0; + rect.top = 0; + rect.bottom = h; + rect.right = w; + + if (inputFormat == nvtt::InputFormat_BGRA_8UB) + { + err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A8R8G8B8, w * 4, NULL, &rect, D3DX_DEFAULT, 0); + } + else + { + err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A32B32G32R32F, w * 16, NULL, &rect, D3DX_DEFAULT, 0); + } + + if (err != D3DERR_INVALIDCALL && err != D3DXERR_INVALIDDATA) + { + D3DLOCKED_RECT rect; + ZeroMemory(&rect, sizeof(rect)); + + err = surface->LockRect(&rect, NULL, D3DLOCK_READONLY); + + if (outputOptions.outputHandler != NULL) { + int size = rect.Pitch * ((h + 3) / 4); + outputOptions.outputHandler->writeData(rect.pBits, size); + } + + err = surface->UnlockRect(); + } + + surface->Release(); + device->Release(); + d3d->Release(); +} + +#endif // defined(HAVE_D3DX) + + +#if defined(HAVE_STB) + +void StbCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ + rgba.swizzle(2, 1, 0, 3); // Swap R and B + stb_compress_dxt_block((unsigned char *)output, (unsigned char *)rgba.colors(), 0, 0); +} + + +#endif // defined(HAVE_STB) diff --git a/src/nvtt/QuickCompressDXT.cpp b/src/nvtt/QuickCompressDXT.cpp index 5f8e4b8..369b0d5 100644 --- a/src/nvtt/QuickCompressDXT.cpp +++ b/src/nvtt/QuickCompressDXT.cpp @@ -217,6 +217,33 @@ inline static uint computeIndices3(const ColorBlock & rgba, Vector3::Arg maxColo return indices; } +inline static uint computeIndices3(const Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor) +{ + Vector3 palette[4]; + palette[0] = minColor; + palette[1] = maxColor; + palette[2] = (palette[0] + palette[1]) * 0.5f; + + uint indices = 0; + for(int i = 0; i < 16; i++) + { + float d0 = colorDistance(palette[0], block[i]); + float d1 = colorDistance(palette[1], block[i]); + float d2 = colorDistance(palette[2], block[i]); + + uint index; + if (d0 < d1 && d0 < d2) index = 0; + else if (d1 < d2) index = 1; + else index = 2; + + indices |= index << (2 * i); + } + + return indices; +} + + + static void optimizeEndPoints4(Vector3 block[16], BlockDXT1 * dxtBlock) { @@ -266,7 +293,7 @@ static void optimizeEndPoints4(Vector3 block[16], BlockDXT1 * dxtBlock) dxtBlock->indices = computeIndices4(block, a, b); } -/*static void optimizeEndPoints3(Vector3 block[16], BlockDXT1 * dxtBlock) +static void optimizeEndPoints3(Vector3 block[16], BlockDXT1 * dxtBlock) { float alpha2_sum = 0.0f; float beta2_sum = 0.0f; @@ -278,7 +305,7 @@ static void optimizeEndPoints4(Vector3 block[16], BlockDXT1 * dxtBlock) { const uint bits = dxtBlock->indices >> (2 * i); - float beta = (bits & 1); + float beta = float(bits & 1); if (bits & 2) beta = 0.5f; float alpha = 1.0f - beta; @@ -312,7 +339,7 @@ static void optimizeEndPoints4(Vector3 block[16], BlockDXT1 * dxtBlock) dxtBlock->col0 = Color16(color1); dxtBlock->col1 = Color16(color0); dxtBlock->indices = computeIndices3(block, a, b); -}*/ +} namespace { @@ -571,7 +598,7 @@ void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock) dxtBlock->col0 = Color16(color1); dxtBlock->col1 = Color16(color0); - dxtBlock->indices = computeIndices3(rgba, maxColor, minColor); + dxtBlock->indices = computeIndices3(block, maxColor, minColor); // optimizeEndPoints(block, dxtBlock); } @@ -634,3 +661,51 @@ void QuickCompress::compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock, compressDXT1(rgba, &dxtBlock->color); compressDXT5A(rgba, &dxtBlock->alpha, iterationCount); } + + + +void QuickCompress::outputBlock4(const ColorBlock & rgba, const Vector3 & start, const Vector3 & end, BlockDXT1 * dxtBlock) +{ + Vector3 block[16]; + extractColorBlockRGB(rgba, block); + + Vector3 maxColor = start * 255; + Vector3 minColor = end * 255; + uint16 color0 = roundAndExpand(&maxColor); + uint16 color1 = roundAndExpand(&minColor); + + if (color0 < color1) + { + swap(maxColor, minColor); + swap(color0, color1); + } + + dxtBlock->col0 = Color16(color0); + dxtBlock->col1 = Color16(color1); + dxtBlock->indices = computeIndices4(block, maxColor, minColor); + + optimizeEndPoints4(block, dxtBlock); +} + +void QuickCompress::outputBlock3(const ColorBlock & rgba, const Vector3 & start, const Vector3 & end, BlockDXT1 * dxtBlock) +{ + Vector3 block[16]; + extractColorBlockRGB(rgba, block); + + Vector3 maxColor = start * 255; + Vector3 minColor = end * 255; + uint16 color0 = roundAndExpand(&maxColor); + uint16 color1 = roundAndExpand(&minColor); + + if (color0 > color1) + { + swap(maxColor, minColor); + swap(color0, color1); + } + + dxtBlock->col0 = Color16(color0); + dxtBlock->col1 = Color16(color1); + dxtBlock->indices = computeIndices3(block, maxColor, minColor); + + optimizeEndPoints3(block, dxtBlock); +} \ No newline at end of file diff --git a/src/nvtt/QuickCompressDXT.h b/src/nvtt/QuickCompressDXT.h index 43d48cb..f7140c0 100644 --- a/src/nvtt/QuickCompressDXT.h +++ b/src/nvtt/QuickCompressDXT.h @@ -35,6 +35,7 @@ namespace nv struct BlockDXT5; struct AlphaBlockDXT3; struct AlphaBlockDXT5; + class Vector3; namespace QuickCompress { @@ -45,6 +46,9 @@ namespace nv void compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock, int iterationCount=8); void compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock, int iterationCount=8); + + void outputBlock4(const ColorBlock & rgba, const Vector3 & start, const Vector3 & end, BlockDXT1 * block); + void outputBlock3(const ColorBlock & rgba, const Vector3 & start, const Vector3 & end, BlockDXT1 * block); } } // nv namespace diff --git a/src/nvtt/TexImage.cpp b/src/nvtt/TexImage.cpp index ea719f3..d608c98 100644 --- a/src/nvtt/TexImage.cpp +++ b/src/nvtt/TexImage.cpp @@ -299,6 +299,46 @@ const float * TexImage::data() const return m->image->channel(0); } +void TexImage::histogram(int channel, float rangeMin, float rangeMax, int binCount, int * binPtr) const +{ + // We assume it's clear in case we want to accumulate multiple histograms. + //memset(bins, 0, sizeof(int)*count); + + if (m->image == NULL) return; + + const float * c = m->image->channel(channel); + + float scale = float(binCount) / rangeMax; + float bias = - scale * rangeMin; + + const uint count = m->image->width() * m->image->height(); + for (uint i = 0; i < count; i++) { + float f = c[i] * scale + bias; + int idx = ifloor(f); + if (idx < 0) idx = 0; + if (idx > binCount-1) idx = binCount-1; + binPtr[idx]++; + } +} + +void TexImage::range(int channel, float * rangeMin, float * rangeMax) +{ + Vector2 range(FLT_MAX, -FLT_MAX); + + FloatImage * img = m->image; + float * c = img->channel(channel); + + const uint count = img->width() * img->height(); + for (uint p = 0; p < count; p++) { + float f = c[p]; + if (f < range.x) range.x = f; + if (f > range.y) range.y = f; + } + + *rangeMin = range.x; + *rangeMax = range.y; +} + bool TexImage::load(const char * fileName) { @@ -320,8 +360,6 @@ bool TexImage::load(const char * fileName) bool TexImage::save(const char * fileName) const { -#pragma NV_MESSAGE("TODO: Add support for DDS textures in TexImage::save") - if (m->image != NULL) { return ImageIO::saveFloat(fileName, m->image, 0, 4); @@ -989,33 +1027,19 @@ void TexImage::scaleAlphaToCoverage(float coverage, float alphaRef/*= 0.5f*/) m->image->scaleAlphaToCoverage(coverage, alphaRef, 3); } -bool TexImage::normalizeRange(float * rangeMin, float * rangeMax) +/*bool TexImage::normalizeRange(float * rangeMin, float * rangeMax) { if (m->image == NULL) return false; - Vector2 range(FLT_MAX, -FLT_MAX); - - // Compute range. - FloatImage * img = m->image; + range(0, rangeMin, rangeMax); - const uint count = img->count(); - for (uint p = 0; p < count; p++) { - float c = img->pixel(p); - - if (c < range.x) range.x = c; - if (c > range.y) range.y = c; - } - - if (range.x == range.y) { + if (*rangeMin == *rangeMax) { // Single color image. return false; } - *rangeMin = range.x; - *rangeMax = range.y; - - const float scale = 1.0f / (range.y - range.x); - const float bias = range.x * scale; + const float scale = 1.0f / (*rangeMax - *rangeMin); + const float bias = *rangeMin * scale; if (range.x == 0.0f && range.y == 1.0f) { // Already normalized. @@ -1029,7 +1053,7 @@ bool TexImage::normalizeRange(float * rangeMin, float * rangeMax) //img->clamp(0, 4, 0.0f, 1.0f); return true; -} +}*/ // Ideally you should compress/quantize the RGB and M portions independently. // Once you have M quantized, you would compute the corresponding RGB and quantize that. @@ -1054,7 +1078,6 @@ void TexImage::toRGBM(float range/*= 1*/, float threshold/*= 0.25*/) float B = nv::clamp(b[i] * irange, 0.0f, 1.0f); float M = max(max(R, G), max(B, 1e-6f)); // Avoid division by zero. - //m = quantizeCeil(m, 8); r[i] = R / M; g[i] = G / M; @@ -1233,20 +1256,19 @@ void TexImage::toLUVW(float range/*= 1.0f*/) float G = nv::clamp(g[i] * irange, 0.0f, 1.0f); float B = nv::clamp(b[i] * irange, 0.0f, 1.0f); - float L = max(sqrtf(R*R + G*G + B*B), 1e-6f)); // Avoid division by zero. - //m = quantizeCeil(m, 8); + float L = max(sqrtf(R*R + G*G + B*B), 1e-6f); // Avoid division by zero. r[i] = R / L; g[i] = G / L; b[i] = B / L; - a[i] = L; + a[i] = L / sqrtf(3); } } void TexImage::fromLUVW(float range/*= 1.0f*/) { // Decompression is the same as in RGBM. - fromRGBM(range); + fromRGBM(range * sqrtf(3)); } @@ -1435,10 +1457,52 @@ float nvtt::rmsAlphaError(const TexImage & reference, const TexImage & image) return float(sqrt(mse / count)); } -TexImage nvtt::diff(const TexImage & reference, const TexImage & image) +TexImage nvtt::diff(const TexImage & reference, const TexImage & image, float scale) { - // @@ TODO. - return TexImage(); + const FloatImage * ref = reference.m->image; + const FloatImage * img = image.m->image; + + if (img == NULL || ref == NULL || img->width() != ref->width() || img->height() != ref->height()) { + return TexImage(); + } + nvDebugCheck(img->componentNum() == 4); + nvDebugCheck(ref->componentNum() == 4); + + nvtt::TexImage diffImage; + FloatImage * diff = diffImage.m->image = new FloatImage; + diff->allocate(4, img->width(), img->height()); + + const uint count = img->width() * img->height(); + for (uint i = 0; i < count; i++) + { + float r0 = img->pixel(i, 0); + float g0 = img->pixel(i, 1); + float b0 = img->pixel(i, 2); + //float a0 = img->pixel(i, 3); + float r1 = ref->pixel(i, 0); + float g1 = ref->pixel(i, 1); + float b1 = ref->pixel(i, 2); + float a1 = ref->pixel(i, 3); + + float dr = r0 - r1; + float dg = g0 - g1; + float db = b0 - b1; + //float da = a0 - a1; + + if (reference.alphaMode() == nvtt::AlphaMode_Transparency) + { + dr *= a1; + dg *= a1; + db *= a1; + } + + diff->pixel(i, 0) = dr * scale; + diff->pixel(i, 1) = dg * scale; + diff->pixel(i, 2) = db * scale; + diff->pixel(i, 3) = a1; + } + + return diffImage; } diff --git a/src/nvtt/nvtt.h b/src/nvtt/nvtt.h index 43e9a98..b82becb 100644 --- a/src/nvtt/nvtt.h +++ b/src/nvtt/nvtt.h @@ -395,6 +395,8 @@ namespace nvtt NVTT_API float alphaTestCoverage(float alphaRef = 0.5) const; NVTT_API float average(int channel) const; NVTT_API const float * data() const; + NVTT_API void histogram(int channel, float rangeMin, float rangeMax, int binCount, int * binPtr) const; + NVTT_API void range(int channel, float * rangeMin, float * rangeMax); // Texture data. NVTT_API bool load(const char * fileName); @@ -426,7 +428,7 @@ namespace nvtt NVTT_API void setBorder(float r, float g, float b, float a); NVTT_API void fill(float r, float g, float b, float a); NVTT_API void scaleAlphaToCoverage(float coverage, float alphaRef = 0.5f); - NVTT_API bool normalizeRange(float * rangeMin, float * rangeMax); + //NVTT_API bool normalizeRange(float * rangeMin, float * rangeMax); NVTT_API void toRGBM(float range = 1.0f, float threshold = 0.0f); NVTT_API void fromRGBM(float range = 1.0f); NVTT_API void toYCoCg(); @@ -451,9 +453,9 @@ namespace nvtt NVTT_API bool copyChannel(const TexImage & srcImage, int srcChannel, int dstChannel); // Error compare. - friend float rmsError(const TexImage & reference, const TexImage & img); - friend float rmsAlphaError(const TexImage & reference, const TexImage & img); - friend TexImage diff(const TexImage & reference, const TexImage & img); + NVTT_API friend float rmsError(const TexImage & reference, const TexImage & img); + NVTT_API friend float rmsAlphaError(const TexImage & reference, const TexImage & img); + NVTT_API friend TexImage diff(const TexImage & reference, const TexImage & img, float scale); private: void detach(); @@ -471,7 +473,7 @@ namespace nvtt NVTT_API float rmsError(const TexImage & reference, const TexImage & img); NVTT_API float rmsAlphaError(const TexImage & reference, const TexImage & img); - NVTT_API TexImage diff(const TexImage & reference, const TexImage & img); + NVTT_API TexImage diff(const TexImage & reference, const TexImage & img, float scale); } // nvtt namespace diff --git a/src/nvtt/squish/colourfit.cpp b/src/nvtt/squish/colourfit.cpp index 7df7047..f67a67f 100644 --- a/src/nvtt/squish/colourfit.cpp +++ b/src/nvtt/squish/colourfit.cpp @@ -38,21 +38,21 @@ void ColourFit::SetColourSet( ColourSet const* colours, int flags ) m_flags = flags; } -void ColourFit::Compress( void* block ) +void ColourFit::Compress( Vec3 * start, Vec3 * end ) { bool isDxt1 = ( ( m_flags & kDxt1 ) != 0 ); if( isDxt1 ) { - Compress3( block ); + Compress3( start, end ); if( !m_colours->IsTransparent() ) { - Compress4( block ); + Compress4( start, end ); } } else { - Compress4( block ); + Compress4( start, end ); } } diff --git a/src/nvtt/squish/colourfit.h b/src/nvtt/squish/colourfit.h index 4c4dc6b..9e6281b 100644 --- a/src/nvtt/squish/colourfit.h +++ b/src/nvtt/squish/colourfit.h @@ -40,11 +40,11 @@ public: void SetColourSet( ColourSet const* colours, int flags ); - void Compress( void* block ); + void Compress( Vec3 * start, Vec3 * end ); protected: - virtual void Compress3( void* block ) = 0; - virtual void Compress4( void* block ) = 0; + virtual bool Compress3( Vec3 * start, Vec3 * end ) = 0; + virtual bool Compress4( Vec3 * start, Vec3 * end ) = 0; ColourSet const* m_colours; int m_flags; diff --git a/src/nvtt/squish/weightedclusterfit.cpp b/src/nvtt/squish/weightedclusterfit.cpp index 9181249..23e4fa6 100644 --- a/src/nvtt/squish/weightedclusterfit.cpp +++ b/src/nvtt/squish/weightedclusterfit.cpp @@ -129,7 +129,7 @@ float WeightedClusterFit::GetBestError() const #if SQUISH_USE_SIMD -void WeightedClusterFit::Compress3( void* block ) +bool WeightedClusterFit::Compress3( Vec3 * start, Vec3 * end ) { int const count = m_colours->GetCount(); Vec4 const one = VEC4_CONST(1.0f); @@ -212,7 +212,7 @@ void WeightedClusterFit::Compress3( void* block ) if( CompareAnyLessThan( besterror, m_besterror ) ) { // compute indices from cluster sizes. - u8 bestindices[16]; + /*u8 bestindices[16]; { int i = 0; for(; i < b0; i++) { @@ -233,16 +233,22 @@ void WeightedClusterFit::Compress3( void* block ) m_colours->RemapIndices( ordered, bestindices ); - // save the block - WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block ); - + WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );*/ + + *start = beststart.GetVec3(); + *end = bestend.GetVec3(); + // save the error m_besterror = besterror; + + return true; } + + return false; } -void WeightedClusterFit::Compress4( void* block ) +bool WeightedClusterFit::Compress4( Vec3 * start, Vec3 * end ) { int const count = m_colours->GetCount(); Vec4 const one = VEC4_CONST(1.0f); @@ -334,7 +340,7 @@ void WeightedClusterFit::Compress4( void* block ) // save the block if necessary if( CompareAnyLessThan( besterror, m_besterror ) ) { - // compute indices from cluster sizes. + /*// compute indices from cluster sizes. u8 bestindices[16]; { int i = 0; @@ -360,11 +366,18 @@ void WeightedClusterFit::Compress4( void* block ) m_colours->RemapIndices( ordered, bestindices ); // save the block - WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block ); + WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );*/ + + *start = beststart.GetVec3(); + *end = bestend.GetVec3(); // save the error m_besterror = besterror; + + return true; } + + return false; } #else diff --git a/src/nvtt/squish/weightedclusterfit.h b/src/nvtt/squish/weightedclusterfit.h index a0a45fb..66983ba 100644 --- a/src/nvtt/squish/weightedclusterfit.h +++ b/src/nvtt/squish/weightedclusterfit.h @@ -45,8 +45,8 @@ public: float GetBestError() const; // Make them public - virtual void Compress3( void* block ); - virtual void Compress4( void* block ); + bool Compress3( Vec3 * start, Vec3 * end ); + bool Compress4( Vec3 * start, Vec3 * end ); private: diff --git a/src/nvtt/tests/testsuite.cpp b/src/nvtt/tests/testsuite.cpp index 48ec61a..7b73ee8 100644 --- a/src/nvtt/tests/testsuite.cpp +++ b/src/nvtt/tests/testsuite.cpp @@ -145,6 +145,11 @@ static const char * s_witnessImageSet[] = { "specRuin-puzzle.tga" }; +static const char * s_witnessLmapImageSet[] = { + "specruin.dds", +}; + + enum Mode { Mode_BC1, Mode_BC1_Alpha, @@ -152,9 +157,12 @@ enum Mode { Mode_BC3_Alpha, Mode_BC3_YCoCg, Mode_BC3_RGBM, + Mode_BC3_LUVW, Mode_BC1_Normal, Mode_BC3_Normal, Mode_BC5_Normal, + Mode_BC3_Lightmap_1, + Mode_BC3_Lightmap_2, }; static const char * s_modeNames[] = { "BC1", @@ -167,6 +175,8 @@ static const char * s_modeNames[] = { "BC1-Normal", "BC3-Normal", "BC5-Normal", + "BC3-RGBM", + "BC3-LUVW", }; struct Test { @@ -175,26 +185,29 @@ struct Test { Mode modes[4]; }; static Test s_imageTests[] = { - {"DXT Color", 3, {Mode_BC1, Mode_BC3_YCoCg, Mode_BC3_RGBM}}, + {"DXT Color", 1, {Mode_BC1, Mode_BC3_YCoCg, Mode_BC3_RGBM, Mode_BC3_LUVW}}, {"DXT Alpha", 3, {Mode_BC1_Alpha, Mode_BC2_Alpha, Mode_BC3_Alpha}}, {"DXT Normal", 3, {Mode_BC1_Normal, Mode_BC3_Normal, Mode_BC5_Normal}}, + {"DXT Lightmap", 2, {Mode_BC3_Lightmap_1, Mode_BC3_Lightmap_2}}, }; -const int s_testCount = ARRAY_SIZE(s_imageTests); +const int s_imageTestCount = ARRAY_SIZE(s_imageTests); struct ImageSet { const char * name; + const char * basePath; const char ** fileNames; int fileCount; }; static ImageSet s_imageSets[] = { - {"Kodak", s_kodakImageSet, ARRAY_SIZE(s_kodakImageSet)}, // 0 - {"Waterloo", s_waterlooImageSet, ARRAY_SIZE(s_waterlooImageSet)}, // 1 - {"Epic", s_epicImageSet, ARRAY_SIZE(s_epicImageSet)}, // 2 - {"Farbraush", s_farbrauschImageSet, ARRAY_SIZE(s_farbrauschImageSet)}, // 3 - {"Lugaru", s_lugaruImageSet, ARRAY_SIZE(s_lugaruImageSet)}, // 4 - {"Quake3", s_quake3ImageSet, ARRAY_SIZE(s_quake3ImageSet)}, // 5 - {"Witness", s_witnessImageSet, ARRAY_SIZE(s_witnessImageSet)} // 6 + {"Kodak", "kodak", s_kodakImageSet, ARRAY_SIZE(s_kodakImageSet)}, // 0 + {"Waterloo", "waterloo", s_waterlooImageSet, ARRAY_SIZE(s_waterlooImageSet)}, // 1 + {"Epic", "epic", s_epicImageSet, ARRAY_SIZE(s_epicImageSet)}, // 2 + {"Farbraush", "farbrausch", s_farbrauschImageSet, ARRAY_SIZE(s_farbrauschImageSet)}, // 3 + {"Lugaru", "lugaru", s_lugaruImageSet, ARRAY_SIZE(s_lugaruImageSet)}, // 4 + {"Quake3", "quake3", s_quake3ImageSet, ARRAY_SIZE(s_quake3ImageSet)}, // 5 + {"Witness", "witness", s_witnessImageSet, ARRAY_SIZE(s_witnessImageSet)}, // 6 + {"Lightmap", "lightmap", s_witnessLmapImageSet, ARRAY_SIZE(s_witnessLmapImageSet)}, // 7 }; const int s_imageSetCount = sizeof(s_imageSets)/sizeof(s_imageSets[0]); @@ -227,9 +240,10 @@ struct MyOutputHandler : public nvtt::OutputHandler nvtt::TexImage decompress(Mode mode, nvtt::Decoder decoder) { nvtt::Format format; - if (mode == Mode_BC1) format = nvtt::Format_BC1; + if (mode == Mode_BC1 || mode == Mode_BC1_Alpha || mode == Mode_BC1_Normal) format = nvtt::Format_BC1; + else if (mode == Mode_BC2_Alpha) format = nvtt::Format_BC2; else if (mode == Mode_BC5_Normal) format = nvtt::Format_BC5; - else format = nvtt::Format_BC3; + else format = nvtt::Format_BC3; nvtt::TexImage img; img.setImage2D(format, decoder, m_width, m_height, m_data); @@ -263,7 +277,7 @@ int main(int argc, char *argv[]) bool nocuda = false; bool showHelp = false; nvtt::Decoder decoder = nvtt::Decoder_Reference; - const char * basePath = ""; + Path basePath = ""; const char * outPath = "output"; const char * regressPath = NULL; @@ -274,6 +288,14 @@ int main(int argc, char *argv[]) { if (i+1 < argc && argv[i+1][0] != '-') { setIndex = atoi(argv[i+1]); + + for (int j = 0; j < s_imageSetCount; j++) { + if (strCaseCmp(s_imageSets[j].name, argv[i+1]) == 0) { + setIndex = j; + break; + } + } + i++; } } @@ -327,7 +349,7 @@ int main(int argc, char *argv[]) } // Validate inputs. - if (testIndex >= s_testCount) { + if (testIndex >= s_imageTestCount) { printf("Invalid test %d\n", testIndex); return 0; } @@ -343,17 +365,14 @@ int main(int argc, char *argv[]) printf("Input options:\n"); printf(" -path \tInput image path.\n"); printf(" -regress \tRegression directory.\n"); - printf(" -set [0:5] \tImage set.\n"); - printf(" 0: \tKodak.\n"); - printf(" 1: \tWaterloo.\n"); - printf(" 2: \tEpic.\n"); - printf(" 3: \tFarbrausch.\n"); - printf(" 4: \tLugaru.\n"); - printf(" 5: \tQuake 3.\n"); - printf(" -test [0:2] \tCompression tests to run."); - printf(" 0: \tDXT Color.\n"); - printf(" 1: \tDXT Alpha.\n"); - printf(" 2: \tDXT Normal.\n"); + printf(" -set [0:%d] \tImage set.\n", s_imageSetCount-1); + for (int i = 0; i < s_imageSetCount; i++) { + printf(" %i: \t%s.\n", i, s_imageSets[i].name); + } + printf(" -test [0:%d] \tCompression tests to run.", s_imageTestCount); + for (int i = 0; i < s_imageTestCount; i++) { + printf(" %i: \t%s.\n", i, s_imageTests[i].name); + } printf(" -dec x \tDecompressor.\n"); printf(" 0: \tReference.\n"); printf(" 1: \tNVIDIA.\n"); @@ -397,7 +416,9 @@ int main(int argc, char *argv[]) nvtt::Context context; context.enableCudaAcceleration(!nocuda); - FileSystem::changeDirectory(basePath); + basePath.append(set.basePath); + + FileSystem::changeDirectory(basePath.str()); FileSystem::createDirectory(outPath); //Path csvFileName; @@ -406,7 +427,7 @@ int main(int argc, char *argv[]) //TextWriter csvWriter(&csvStream); Path graphFileName; - graphFileName.format("%s/result-%d.txt", outPath, setIndex); + graphFileName.format("%s/chart.txt", outPath/*, test.name*/); StdOutputStream graphStream(graphFileName.str()); TextWriter graphWriter(&graphStream); @@ -434,7 +455,7 @@ int main(int argc, char *argv[]) { const char * colors[] = { "3D7930", "952826", "3D1FC1", - "3D7930", "952826", "3D1FC1", // pick other colors... + "FF9900", "999999", "999999", // pick other colors... }; graphWriter << colors[t]; if (t != test.count-1) graphWriter << ","; @@ -484,10 +505,10 @@ int main(int argc, char *argv[]) for (int t = 0; t < test.count; t++) { Mode mode = test.modes[t]; - if (mode == Mode_BC1) { + if (mode == Mode_BC1 || mode == Mode_BC1_Alpha || mode == Mode_BC1_Normal) { compressionOptions.setFormat(nvtt::Format_BC1); } - else if (mode == Mode_BC3_Alpha || mode == Mode_BC3_YCoCg || mode == Mode_BC3_RGBM) { + else if (mode == Mode_BC3_Alpha || mode == Mode_BC3_YCoCg || mode == Mode_BC3_RGBM || mode == Mode_BC3_LUVW || mode == Mode_BC3_Lightmap_1 || mode == Mode_BC3_Lightmap_2) { compressionOptions.setFormat(nvtt::Format_BC3); } else if (mode == Mode_BC3_Normal) { @@ -497,10 +518,10 @@ int main(int argc, char *argv[]) compressionOptions.setFormat(nvtt::Format_BC5); } - if (mode == Mode_BC3_Alpha) { + if (mode == Mode_BC3_Alpha || mode == Mode_BC3_Lightmap_1 || mode == Mode_BC3_Lightmap_2) { // Lightmap's alpha channel is coverage. img.setAlphaMode(nvtt::AlphaMode_Transparency); } - if (mode == Mode_BC3_Normal || mode == Mode_BC5_Normal) { + if (mode == Mode_BC1_Normal || mode == Mode_BC3_Normal || mode == Mode_BC5_Normal) { img.setNormalMap(true); } @@ -528,6 +549,56 @@ int main(int argc, char *argv[]) else if (mode == Mode_BC3_RGBM) { tmp.toRGBM(); } + else if (mode == Mode_BC3_LUVW) { + tmp.toLUVW(); + } + else if (mode == Mode_BC3_Lightmap_1) { + tmp.toRGBM(4); + + /*float rmin, rmax; + tmp.range(0, &rmin, &rmax); + + float gmin, gmax; + tmp.range(1, &gmin, &gmax); + + float bmin, bmax; + tmp.range(2, &bmin, &bmax); + + float lmin, lmax; + tmp.range(3, &lmin, &lmax); + + printf("rmin: %.3f rmax: %.3f\n", rmin, rmax); + printf("gmin: %.3f gmax: %.3f\n", gmin, gmax); + printf("bmin: %.3f bmax: %.3f\n", bmin, bmax); + printf("lmin: %.3f lmax: %.3f\n", lmin, lmax); + + const int N = 32; + int chistogram[N]; + int lhistogram[N]; + memset(chistogram, 0, sizeof(chistogram)); + memset(lhistogram, 0, sizeof(lhistogram)); + + tmp.histogram(0, 0, 1, N, chistogram); + tmp.histogram(1, 0, 1, N, chistogram); + tmp.histogram(2, 0, 1, N, chistogram); + tmp.histogram(3, 0, 1, N, lhistogram); + + printf("Color histogram:\n"); + for (int i = 0; i < N; i++) { + printf("%d, ", chistogram[i]); + } + printf("\n"); + + printf("Luminance histogram:\n"); + for (int i = 0; i < N; i++) { + printf("%d, ", lhistogram[i]); + } + printf("\n");*/ + } + else if (mode == Mode_BC3_Lightmap_2) { + tmp.toLUVW(4); + } + printf("Compressing: \t'%s'\n", set.fileNames[i]); @@ -540,12 +611,8 @@ int main(int argc, char *argv[]) totalTime += timer.elapsed(); nvtt::TexImage img_out = outputHandler.decompress(mode, decoder); - if (mode == Mode_BC3_Alpha) { - img_out.setAlphaMode(nvtt::AlphaMode_Transparency); - } - if (mode == Mode_BC3_Normal || mode == Mode_BC5_Normal) { - img_out.setNormalMap(true); - } + img_out.setAlphaMode(img.alphaMode()); + img_out.setNormalMap(img.isNormalMap()); if (mode == Mode_BC3_YCoCg) { img_out.scaleBias(0, 1.0, -0.5); @@ -555,11 +622,30 @@ int main(int argc, char *argv[]) else if (mode == Mode_BC3_RGBM) { img_out.fromRGBM(); } + else if (mode == Mode_BC3_LUVW) { + img_out.fromLUVW(); + } + else if (mode == Mode_BC3_Lightmap_1) { + img_out.fromRGBM(4); + } + else if (mode == Mode_BC3_Lightmap_2) { + img_out.fromLUVW(4); + } + + + Path outputFilePath; + outputFilePath.format("%s/%s", outPath, s_modeNames[test.modes[t]]); + FileSystem::createDirectory(outputFilePath.str()); Path outputFileName; - outputFileName.format("%s/%s", outPath, set.fileNames[i]); + outputFileName.format("%s/%s", outputFilePath.str(), set.fileNames[i]); outputFileName.stripExtension(); - outputFileName.append(".png"); + if (mode == Mode_BC3_Lightmap_1 || mode == Mode_BC3_Lightmap_2) { + outputFileName.append(".dds"); + } + else { + outputFileName.append(".png"); + } if (!img_out.save(outputFileName.str())) { printf("Error saving file '%s'.\n", outputFileName.str()); @@ -573,6 +659,12 @@ int main(int argc, char *argv[]) graphWriter << rmse; if (i != set.fileCount-1) graphWriter << ","; + + outputFileName.stripExtension(); + outputFileName.append("_diff.png"); + nvtt::diff(img, img_out, 4.0f).save(outputFileName.str()); + + // Output csv file //csvWriter << "\"" << fileNames[i] << "\"," << rmse << "\n"; @@ -615,7 +707,7 @@ int main(int argc, char *argv[]) printf(" Total Time: \t%.3f sec\n", totalTime); printf(" Average RMSE:\t%.4f\n", totalRMSE); - if (t != s_testCount-1) graphWriter << "|"; + if (t != test.count-1) graphWriter << "|"; } /*if (regressPath != NULL)