diff --git a/src/nvtt/CompressionOptions.cpp b/src/nvtt/CompressionOptions.cpp index 0cf2113..8fb251c 100644 --- a/src/nvtt/CompressionOptions.cpp +++ b/src/nvtt/CompressionOptions.cpp @@ -76,13 +76,11 @@ void CompressionOptions::setQuality(Quality quality, float errorThreshold /*= 0. /// The choice for these values is subjective. In many case uniform color weights /// (1.0, 1.0, 1.0) work very well. A popular choice is to use the NTSC luma encoding /// weights (0.2126, 0.7152, 0.0722), but I think that blue contributes to our -/// perception more than a 7%. A better choice in my opinion is (3, 4, 2). Ideally -/// the compressor should use a non linear colour metric as described here: -/// http://www.compuphase.com/cmetric.htm +/// perception more than a 7%. A better choice in my opinion is (3, 4, 2). void CompressionOptions::setColorWeights(float red, float green, float blue) { float total = red + green + blue; - float x = blue / total; + float x = red / total; float y = green / total; m.colorWeight.set(x, y, 1.0f - x - y); diff --git a/src/nvtt/cuda/CompressKernel.cu b/src/nvtt/cuda/CompressKernel.cu index 4db1141..9d805e6 100644 --- a/src/nvtt/cuda/CompressKernel.cu +++ b/src/nvtt/cuda/CompressKernel.cu @@ -49,6 +49,7 @@ __device__ inline void swap(T & a, T & b) } __constant__ float3 kColorMetric = { 1.0f, 1.0f, 1.0f }; +__constant__ float3 kColorMetricSqr = { 1.0f, 1.0f, 1.0f }; @@ -121,7 +122,7 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum // Sort colors along the best fit line. colorSums(colors, sums); - float3 axis = bestFitLine(colors, sums[0]); + float3 axis = bestFitLine(colors, sums[0], kColorMetric); dps[idx] = dot(colors[idx], axis); @@ -164,7 +165,7 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum // Sort colors along the best fit line. colorSums(colors, sums); - float3 axis = bestFitLine(colors, sums[0]); + float3 axis = bestFitLine(colors, sums[0], kColorMetric); dps[idx] = dot(rawColors[idx], axis); @@ -239,7 +240,7 @@ __device__ float evalPermutation4(const float3 * colors, uint permutation, ushor // compute the error float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum); - return dot(e, kColorMetric); + return dot(e, kColorMetricSqr); } __device__ float evalPermutation3(const float3 * colors, uint permutation, ushort * start, ushort * end) @@ -279,7 +280,7 @@ __device__ float evalPermutation3(const float3 * colors, uint permutation, ushor // compute the error float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum); - return dot(e, kColorMetric); + return dot(e, kColorMetricSqr); } __constant__ float alphaTable4[4] = { 9.0f, 0.0f, 6.0f, 3.0f }; @@ -320,7 +321,7 @@ __device__ float evalPermutation4(const float3 * colors, float3 color_sum, uint // compute the error float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum); - return (1.0f / 9.0f) * dot(e, kColorMetric); + return (1.0f / 9.0f) * dot(e, kColorMetricSqr); } __device__ float evalPermutation3(const float3 * colors, float3 color_sum, uint permutation, ushort * start, ushort * end) @@ -356,7 +357,7 @@ __device__ float evalPermutation3(const float3 * colors, float3 color_sum, uint // compute the error float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum); - return (1.0f / 4.0f) * dot(e, kColorMetric); + return (1.0f / 4.0f) * dot(e, kColorMetricSqr); } __device__ float evalPermutation4(const float3 * colors, const float * weights, float3 color_sum, uint permutation, ushort * start, ushort * end) @@ -396,7 +397,7 @@ __device__ float evalPermutation4(const float3 * colors, const float * weights, // compute the error float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum); - return dot(e, kColorMetric); + return dot(e, kColorMetricSqr); } /* @@ -437,7 +438,7 @@ __device__ float evalPermutation3(const float3 * colors, const float * weights, // compute the error float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum); - return dot(e, kColorMetric); + return dot(e, kColorMetricSqr); } */ @@ -963,6 +964,13 @@ extern "C" void setupCompressKernel(const float weights[3]) { // Set constants. cudaMemcpyToSymbol(kColorMetric, weights, sizeof(float) * 3, 0); + + float weightsSqr[3]; + weightsSqr[0] = weights[0] * weights[0]; + weightsSqr[1] = weights[1] * weights[1]; + weightsSqr[2] = weights[2] * weights[2]; + + cudaMemcpyToSymbol(kColorMetricSqr, weights, sizeof(float) * 3, 0); } diff --git a/src/nvtt/cuda/CudaMath.h b/src/nvtt/cuda/CudaMath.h index 363b7b5..ecb8f4c 100644 --- a/src/nvtt/cuda/CudaMath.h +++ b/src/nvtt/cuda/CudaMath.h @@ -166,14 +166,14 @@ inline __device__ void colorSums(const float3 * colors, float3 * sums) #endif } -inline __device__ float3 bestFitLine(const float3 * colors, float3 color_sum) +inline __device__ float3 bestFitLine(const float3 * colors, float3 color_sum, float3 colorMetric) { // Compute covariance matrix of the given colors. #if __DEVICE_EMULATION__ float covariance[6] = {0, 0, 0, 0, 0, 0}; for (int i = 0; i < 16; i++) { - float3 a = colors[i] - color_sum * (1.0f / 16.0f); + float3 a = (colors[i] - color_sum * (1.0f / 16.0f)) * colorMetric; covariance[0] += a.x * a.x; covariance[1] += a.x * a.y; covariance[2] += a.x * a.z; @@ -185,7 +185,7 @@ inline __device__ float3 bestFitLine(const float3 * colors, float3 color_sum) const int idx = threadIdx.x; - float3 diff = colors[idx] - color_sum * (1.0f / 16.0f); + float3 diff = (colors[idx] - color_sum * (1.0f / 16.0f)) * colorMetric; // @@ Eliminate two-way bank conflicts here. // @@ It seems that doing that and unrolling the reduction doesn't help... diff --git a/src/nvtt/squish/clusterfit.cpp b/src/nvtt/squish/clusterfit.cpp index 9f0b51d..3f4fad1 100644 --- a/src/nvtt/squish/clusterfit.cpp +++ b/src/nvtt/squish/clusterfit.cpp @@ -36,30 +36,18 @@ ClusterFit::ClusterFit( ColourSet const* colours, int flags ) // initialise the best error #if SQUISH_USE_SIMD m_besterror = VEC4_CONST( FLT_MAX ); + Vec3 metric = m_metric.GetVec3(); #else m_besterror = FLT_MAX; + Vec3 metric = m_metric; #endif -/* // initialise the metric - bool perceptual = ( ( m_flags & kColourMetricPerceptual ) != 0 ); -#if SQUISH_USE_SIMD - if( perceptual ) - m_metric = Vec4( 0.2126f, 0.7152f, 0.0722f, 0.0f ); - else - m_metric = VEC4_CONST( 1.0f ); -#else - if( perceptual ) - m_metric = Vec3( 0.2126f, 0.7152f, 0.0722f ); - else - m_metric = Vec3( 1.0f ); -#endif -*/ // cache some values int const count = m_colours->GetCount(); Vec3 const* values = m_colours->GetPoints(); // get the covariance matrix - Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights() ); + Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights(), metric ); // compute the principle component Vec3 principle = ComputePrincipleComponent( covariance ); diff --git a/src/nvtt/squish/fastclusterfit.cpp b/src/nvtt/squish/fastclusterfit.cpp index 60c5e7d..c2e3103 100644 --- a/src/nvtt/squish/fastclusterfit.cpp +++ b/src/nvtt/squish/fastclusterfit.cpp @@ -37,8 +37,10 @@ FastClusterFit::FastClusterFit( ColourSet const* colours, int flags ) : // initialise the best error #if SQUISH_USE_SIMD m_besterror = VEC4_CONST( FLT_MAX ); + Vec3 metric = m_metric.GetVec3(); #else m_besterror = FLT_MAX; + Vec3 metric = m_metric; #endif // cache some values @@ -46,7 +48,7 @@ FastClusterFit::FastClusterFit( ColourSet const* colours, int flags ) : Vec3 const* values = m_colours->GetPoints(); // get the covariance matrix - Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights() ); + Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights(), metric ); // compute the principle component Vec3 principle = ComputePrincipleComponent( covariance ); diff --git a/src/nvtt/squish/maths.cpp b/src/nvtt/squish/maths.cpp index d1a0051..87b4cd9 100644 --- a/src/nvtt/squish/maths.cpp +++ b/src/nvtt/squish/maths.cpp @@ -23,18 +23,12 @@ -------------------------------------------------------------------------- */ -/*! @file - - The symmetric eigensystem solver algorithm is from - http://www.geometrictools.com/Documentation/EigenSymmetric3x3.pdf -*/ - #include "maths.h" #include namespace squish { -Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weights ) +Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weights, Vec3::Arg metric ) { // compute the centroid float total = 0.0f; @@ -50,7 +44,7 @@ Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weight Sym3x3 covariance( 0.0f ); for( int i = 0; i < n; ++i ) { - Vec3 a = points[i] - centroid; + Vec3 a = (points[i] - centroid) * metric; Vec3 b = weights[i]*a; covariance[0] += a.X()*b.X(); @@ -65,166 +59,6 @@ Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weight return covariance; } -/* -static Vec3 GetMultiplicity1Evector( Sym3x3 const& matrix, float evalue ) -{ - // compute M - Sym3x3 m; - m[0] = matrix[0] - evalue; - m[1] = matrix[1]; - m[2] = matrix[2]; - m[3] = matrix[3] - evalue; - m[4] = matrix[4]; - m[5] = matrix[5] - evalue; - - // compute U - Sym3x3 u; - u[0] = m[3]*m[5] - m[4]*m[4]; - u[1] = m[2]*m[4] - m[1]*m[5]; - u[2] = m[1]*m[4] - m[2]*m[3]; - u[3] = m[0]*m[5] - m[2]*m[2]; - u[4] = m[1]*m[2] - m[4]*m[0]; - u[5] = m[0]*m[3] - m[1]*m[1]; - - // find the largest component - float mc = std::fabs( u[0] ); - int mi = 0; - for( int i = 1; i < 6; ++i ) - { - float c = std::fabs( u[i] ); - if( c > mc ) - { - mc = c; - mi = i; - } - } - - // pick the column with this component - switch( mi ) - { - case 0: - return Vec3( u[0], u[1], u[2] ); - - case 1: - case 3: - return Vec3( u[1], u[3], u[4] ); - - default: - return Vec3( u[2], u[4], u[5] ); - } -} - -static Vec3 GetMultiplicity2Evector( Sym3x3 const& matrix, float evalue ) -{ - // compute M - Sym3x3 m; - m[0] = matrix[0] - evalue; - m[1] = matrix[1]; - m[2] = matrix[2]; - m[3] = matrix[3] - evalue; - m[4] = matrix[4]; - m[5] = matrix[5] - evalue; - - // find the largest component - float mc = std::fabs( m[0] ); - int mi = 0; - for( int i = 1; i < 6; ++i ) - { - float c = std::fabs( m[i] ); - if( c > mc ) - { - mc = c; - mi = i; - } - } - - // pick the first eigenvector based on this index - switch( mi ) - { - case 0: - case 1: - return Vec3( -m[1], m[0], 0.0f ); - - case 2: - return Vec3( m[2], 0.0f, -m[0] ); - - case 3: - case 4: - return Vec3( 0.0f, -m[4], m[3] ); - - default: - return Vec3( 0.0f, -m[5], m[4] ); - } -} - -Vec3 ComputePrincipleComponent( Sym3x3 const& matrix ) -{ - // compute the cubic coefficients - float c0 = matrix[0]*matrix[3]*matrix[5] - + 2.0f*matrix[1]*matrix[2]*matrix[4] - - matrix[0]*matrix[4]*matrix[4] - - matrix[3]*matrix[2]*matrix[2] - - matrix[5]*matrix[1]*matrix[1]; - float c1 = matrix[0]*matrix[3] + matrix[0]*matrix[5] + matrix[3]*matrix[5] - - matrix[1]*matrix[1] - matrix[2]*matrix[2] - matrix[4]*matrix[4]; - float c2 = matrix[0] + matrix[3] + matrix[5]; - - // compute the quadratic coefficients - float a = c1 - ( 1.0f/3.0f )*c2*c2; - float b = ( -2.0f/27.0f )*c2*c2*c2 + ( 1.0f/3.0f )*c1*c2 - c0; - - // compute the root count check - float Q = 0.25f*b*b + ( 1.0f/27.0f )*a*a*a; - - // test the multiplicity - if( FLT_EPSILON < Q ) - { - // only one root, which implies we have a multiple of the identity - return Vec3( 1.0f ); - } - else if( Q < -FLT_EPSILON ) - { - // three distinct roots - float theta = std::atan2( std::sqrt( -Q ), -0.5f*b ); - float rho = std::sqrt( 0.25f*b*b - Q ); - - float rt = std::pow( rho, 1.0f/3.0f ); - float ct = std::cos( theta/3.0f ); - float st = std::sin( theta/3.0f ); - - float l1 = ( 1.0f/3.0f )*c2 + 2.0f*rt*ct; - float l2 = ( 1.0f/3.0f )*c2 - rt*( ct + ( float )sqrt( 3.0f )*st ); - float l3 = ( 1.0f/3.0f )*c2 - rt*( ct - ( float )sqrt( 3.0f )*st ); - - // pick the larger - if( std::fabs( l2 ) > std::fabs( l1 ) ) - l1 = l2; - if( std::fabs( l3 ) > std::fabs( l1 ) ) - l1 = l3; - - // get the eigenvector - return GetMultiplicity1Evector( matrix, l1 ); - } - else // if( -FLT_EPSILON <= Q && Q <= FLT_EPSILON ) - { - // two roots - float rt; - if( b < 0.0f ) - rt = -std::pow( -0.5f*b, 1.0f/3.0f ); - else - rt = std::pow( 0.5f*b, 1.0f/3.0f ); - - float l1 = ( 1.0f/3.0f )*c2 + rt; // repeated - float l2 = ( 1.0f/3.0f )*c2 - 2.0f*rt; - - // get the eigenvector - if( std::fabs( l1 ) > std::fabs( l2 ) ) - return GetMultiplicity2Evector( matrix, l1 ); - else - return GetMultiplicity1Evector( matrix, l2 ); - } -} -*/ Vec3 ComputePrincipleComponent( Sym3x3 const& matrix ) { diff --git a/src/nvtt/squish/maths.h b/src/nvtt/squish/maths.h index 357d62f..087a889 100644 --- a/src/nvtt/squish/maths.h +++ b/src/nvtt/squish/maths.h @@ -231,7 +231,7 @@ private: float m_x[6]; }; -Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weights ); +Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weights, Vec3::Arg metric ); Vec3 ComputePrincipleComponent( Sym3x3 const& matrix ); } // namespace squish diff --git a/src/nvtt/squish/weightedclusterfit.cpp b/src/nvtt/squish/weightedclusterfit.cpp index 5b4a560..3ad504e 100644 --- a/src/nvtt/squish/weightedclusterfit.cpp +++ b/src/nvtt/squish/weightedclusterfit.cpp @@ -38,8 +38,10 @@ WeightedClusterFit::WeightedClusterFit( ColourSet const* colours, int flags ) : // initialise the best error #if SQUISH_USE_SIMD m_besterror = VEC4_CONST( FLT_MAX ); + Vec3 metric = m_metric.GetVec3(); #else m_besterror = FLT_MAX; + Vec3 metric = m_metric; #endif // cache some values @@ -47,7 +49,7 @@ WeightedClusterFit::WeightedClusterFit( ColourSet const* colours, int flags ) : Vec3 const* values = m_colours->GetPoints(); // get the covariance matrix - Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights() ); + Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights(), metric ); // compute the principle component Vec3 principle = ComputePrincipleComponent( covariance ); diff --git a/src/nvtt/tools/compress.cpp b/src/nvtt/tools/compress.cpp index e5659c8..597049b 100644 --- a/src/nvtt/tools/compress.cpp +++ b/src/nvtt/tools/compress.cpp @@ -386,7 +386,15 @@ int main(int argc, char *argv[]) //compressionOptions.setQuality(nvtt::Quality_Highest); } compressionOptions.enableHardwareCompression(!nocuda); - compressionOptions.setColorWeights(1, 1, 1); + + if (normal) + { + compressionOptions.setColorWeights(4, 4, 2); + } + else + { + compressionOptions.setColorWeights(1, 1, 1); + } if (externalCompressor != NULL) {