diff --git a/ChangeLog b/ChangeLog index cb25b3d..4416609 100644 --- a/ChangeLog +++ b/ChangeLog @@ -3,6 +3,7 @@ NVIDIA Texture Tools version 2.0.2 * Fix indexMirror error reported by Chris Lambert. * Fix vc8 post build command, reported by Richard Sim. * Fix RGBA modes with less than 32 bpp by Viktor Linder. + * Fix alpha decompression by amorilia. See issue 40. NVIDIA Texture Tools version 2.0.1 * Fix memory leaks. diff --git a/VERSION b/VERSION index 38f77a6..e9307ca 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.0.1 +2.0.2 diff --git a/src/nvimage/ColorBlock.cpp b/src/nvimage/ColorBlock.cpp index 16faf15..fac8859 100644 --- a/src/nvimage/ColorBlock.cpp +++ b/src/nvimage/ColorBlock.cpp @@ -307,15 +307,6 @@ void ColorBlock::boundsRangeAlpha(Color32 * start, Color32 * end) const } -void ColorBlock::bestFitRange(Color32 * start, Color32 * end) const -{ - nvDebugCheck(start != NULL); - nvDebugCheck(end != NULL); - - Vector3 axis = bestFitLine().direction(); - computeRange(axis, start, end); -} - /// Sort colors by abosolute value in their 16 bit representation. void ColorBlock::sortColorsByAbsoluteValue() { @@ -393,19 +384,6 @@ void ColorBlock::sortColors(const Vector3 & axis) } -/// Get least squares line that best approxiamtes the points of the color block. -Line3 ColorBlock::bestFitLine() const -{ - Array pointArray(16); - - for(int i = 0; i < 16; i++) { - pointArray.append(Vector3(m_color[i].r, m_color[i].g, m_color[i].b)); - } - - return Fit::bestLine(pointArray); -} - - /// Get the volume of the color block. float ColorBlock::volume() const { diff --git a/src/nvimage/ColorBlock.h b/src/nvimage/ColorBlock.h index cdda4d6..00f9c8e 100644 --- a/src/nvimage/ColorBlock.h +++ b/src/nvimage/ColorBlock.h @@ -4,7 +4,6 @@ #define NV_IMAGE_COLORBLOCK_H #include -#include // Line3 namespace nv { @@ -33,16 +32,13 @@ namespace nv void luminanceRange(Color32 * start, Color32 * end) const; void boundsRange(Color32 * start, Color32 * end) const; void boundsRangeAlpha(Color32 * start, Color32 * end) const; - void bestFitRange(Color32 * start, Color32 * end) const; void sortColorsByAbsoluteValue(); void computeRange(const Vector3 & axis, Color32 * start, Color32 * end) const; void sortColors(const Vector3 & axis); - Line3 bestFitLine() const; float volume() const; - Line3 diameterLine() const; // Accessors const Color32 * colors() const; diff --git a/src/nvimage/HoleFilling.cpp b/src/nvimage/HoleFilling.cpp index a825a24..863dc16 100644 --- a/src/nvimage/HoleFilling.cpp +++ b/src/nvimage/HoleFilling.cpp @@ -296,7 +296,7 @@ static bool downsample(const FloatImage * src, const BitMap * srcMask, const Flo return true; } -// This is the filter used in the Lumigraph paper. The Unreal engine uses something similar. +// This is the filter used in the Lumigraph paper. void nv::fillPullPush(FloatImage * img, const BitMap * bmap) { nvCheck(img != NULL); @@ -644,8 +644,8 @@ struct LocalPixels -// This is a cubic extrapolation filter from Charles Bloom (DoPixelSeamFix). -void nv::fillCubicExtrapolate(int passCount, FloatImage * img, BitMap * bmap, int coverageIndex /*= -1*/) +// This is a quadratic extrapolation filter from Charles Bloom (DoPixelSeamFix). Used with his permission. +void nv::fillQuadraticExtrapolate(int passCount, FloatImage * img, BitMap * bmap, int coverageIndex /*= -1*/) { nvCheck(passCount > 0); nvCheck(img != NULL); diff --git a/src/nvimage/HoleFilling.h b/src/nvimage/HoleFilling.h index af667a7..b437e87 100644 --- a/src/nvimage/HoleFilling.h +++ b/src/nvimage/HoleFilling.h @@ -89,7 +89,7 @@ namespace nv NVIMAGE_API void fillPullPush(FloatImage * img, const BitMap * bmap); NVIMAGE_API void fillExtrapolate(int passCount, FloatImage * img, BitMap * bmap); - NVIMAGE_API void fillCubicExtrapolate(int passCount, FloatImage * img, BitMap * bmap, int coverageIndex = -1); + NVIMAGE_API void fillQuadraticExtrapolate(int passCount, FloatImage * img, BitMap * bmap, int coverageIndex = -1); } // nv namespace diff --git a/src/nvmath/CMakeLists.txt b/src/nvmath/CMakeLists.txt index c9c3eae..7ea4a80 100644 --- a/src/nvmath/CMakeLists.txt +++ b/src/nvmath/CMakeLists.txt @@ -7,8 +7,6 @@ SET(MATH_SRCS Quaternion.h Box.h Color.h - Eigen.h Eigen.cpp - Fitting.h Fitting.cpp Montecarlo.h Montecarlo.cpp Random.h Random.cpp SphericalHarmonic.h SphericalHarmonic.cpp diff --git a/src/nvmath/Eigen.cpp b/src/nvmath/Eigen.cpp deleted file mode 100644 index 8247918..0000000 --- a/src/nvmath/Eigen.cpp +++ /dev/null @@ -1,533 +0,0 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#include "Eigen.h" - -using namespace nv; - -static const float EPS = 0.00001f; -static const int MAX_ITER = 100; - -static void semi_definite_symmetric_eigen(const float *mat, int n, float *eigen_vec, float *eigen_val); - - -// Use power method to find the first eigenvector. -// http://www.miislita.com/information-retrieval-tutorial/matrix-tutorial-3-eigenvalues-eigenvectors.html -Vector3 nv::firstEigenVector(float matrix[6]) -{ - // Number of iterations. @@ Use a variable number of iterations. - const int NUM = 8; - - Vector3 v(1, 1, 1); - for(int i = 0; i < NUM; i++) { - float x = v.x() * matrix[0] + v.y() * matrix[1] + v.z() * matrix[2]; - float y = v.x() * matrix[1] + v.y() * matrix[3] + v.z() * matrix[4]; - float z = v.x() * matrix[2] + v.y() * matrix[4] + v.z() * matrix[5]; - - float norm = max(max(x, y), z); - float iv = 1.0f / norm; - if (norm == 0.0f) { - return Vector3(zero); - } - - v.set(x*iv, y*iv, z*iv); - } - - return v; -} - - -/// Solve eigen system. -void Eigen::solve() { - semi_definite_symmetric_eigen(matrix, N, eigen_vec, eigen_val); -} - -/// Solve eigen system. -void Eigen3::solve() { - // @@ Use lengyel code that seems to be more optimized. -#if 1 - float v[3*3]; - semi_definite_symmetric_eigen(matrix, 3, v, eigen_val); - - eigen_vec[0].set(v[0], v[1], v[2]); - eigen_vec[1].set(v[3], v[4], v[5]); - eigen_vec[2].set(v[6], v[7], v[8]); -#else - const int maxSweeps = 32; - const float epsilon = 1.0e-10f; - - float m11 = matrix[0]; // m(0,0); - float m12 = matrix[1]; // m(0,1); - float m13 = matrix[2]; // m(0,2); - float m22 = matrix[3]; // m(1,1); - float m23 = matrix[4]; // m(1,2); - float m33 = matrix[5]; // m(2,2); - - //r.SetIdentity(); - eigen_vec[0].set(1, 0, 0); - eigen_vec[1].set(0, 1, 0); - eigen_vec[2].set(0, 0, 1); - - for (int a = 0; a < maxSweeps; a++) - { - // Exit if off-diagonal entries small enough - if ((fabs(m12) < epsilon) && (fabs(m13) < epsilon) && (fabs(m23) < epsilon)) - { - break; - } - - // Annihilate (1,2) entry - if (m12 != 0.0f) - { - float u = (m22 - m11) * 0.5f / m12; - float u2 = u * u; - float u2p1 = u2 + 1.0f; - float t = (u2p1 != u2) ? ((u < 0.0f) ? -1.0f : 1.0f) * (sqrt(u2p1) - fabs(u)) : 0.5f / u; - float c = 1.0f / sqrt(t * t + 1.0f); - float s = c * t; - - m11 -= t * m12; - m22 += t * m12; - m12 = 0.0f; - - float temp = c * m13 - s * m23; - m23 = s * m13 + c * m23; - m13 = temp; - - for (int i = 0; i < 3; i++) - { - float temp = c * eigen_vec[i].x - s * eigen_vec[i].y; - eigen_vec[i].y = s * eigen_vec[i].x + c * eigen_vec[i].y; - eigen_vec[i].x = temp; - } - } - - // Annihilate (1,3) entry - if (m13 != 0.0f) - { - float u = (m33 - m11) * 0.5f / m13; - float u2 = u * u; - float u2p1 = u2 + 1.0f; - float t = (u2p1 != u2) ? ((u < 0.0f) ? -1.0f : 1.0f) * (sqrt(u2p1) - fabs(u)) : 0.5f / u; - float c = 1.0f / sqrt(t * t + 1.0f); - float s = c * t; - - m11 -= t * m13; - m33 += t * m13; - m13 = 0.0f; - - float temp = c * m12 - s * m23; - m23 = s * m12 + c * m23; - m12 = temp; - - for (int i = 0; i < 3; i++) - { - float temp = c * eigen_vec[i].x - s * eigen_vec[i].z; - eigen_vec[i].z = s * eigen_vec[i].x + c * eigen_vec[i].z; - eigen_vec[i].x = temp; - } - } - - // Annihilate (2,3) entry - if (m23 != 0.0f) - { - float u = (m33 - m22) * 0.5f / m23; - float u2 = u * u; - float u2p1 = u2 + 1.0f; - float t = (u2p1 != u2) ? ((u < 0.0f) ? -1.0f : 1.0f) * (sqrt(u2p1) - fabs(u)) : 0.5f / u; - float c = 1.0f / sqrt(t * t + 1.0f); - float s = c * t; - - m22 -= t * m23; - m33 += t * m23; - m23 = 0.0f; - - float temp = c * m12 - s * m13; - m13 = s * m12 + c * m13; - m12 = temp; - - for (int i = 0; i < 3; i++) - { - float temp = c * eigen_vec[i].y - s * eigen_vec[i].z; - eigen_vec[i].z = s * eigen_vec[i].y + c * eigen_vec[i].z; - eigen_vec[i].y = temp; - } - } - } - - eigen_val[0] = m11; - eigen_val[1] = m22; - eigen_val[2] = m33; -#endif -} - - -/*--------------------------------------------------------------------------- - Functions ----------------------------------------------------------------------------*/ - - -/** @@ I don't remember where did I get this function. - * computes the eigen values and eigen vectors - * of a semi definite symmetric matrix - * - * - matrix is stored in column symmetric storage, i.e. - * matrix = { m11, m12, m22, m13, m23, m33, m14, m24, m34, m44 ... } - * size = n(n+1)/2 - * - * - eigen_vectors (return) = { v1, v2, v3, ..., vn } where vk = vk0, vk1, ..., vkn - * size = n^2, must be allocated by caller - * - * - eigen_values (return) are in decreasing order - * size = n, must be allocated by caller - */ - -void semi_definite_symmetric_eigen( - const float *mat, int n, float *eigen_vec, float *eigen_val -) { - float *a,*v; - float a_norm,a_normEPS,thr,thr_nn; - int nb_iter = 0; - int jj; - int i,j,k,ij,ik,l,m,lm,mq,lq,ll,mm,imv,im,iq,ilv,il,nn; - int *index; - float a_ij,a_lm,a_ll,a_mm,a_im,a_il; - float a_lm_2; - float v_ilv,v_imv; - float x; - float sinx,sinx_2,cosx,cosx_2,sincos; - float delta; - - // Number of entries in mat - - nn = (n*(n+1))/2; - - // Step 1: Copy mat to a - - a = new float[nn]; - - for( ij=0; ij a_normEPS && nb_iter < MAX_ITER ) { - - nb_iter++; - thr_nn = thr / nn; - - for( l=1 ; l< n; l++ ) { - for( m=l+1; m<=n; m++ ) { - - // compute sinx and cosx - - lq = (l*l-l)/2; - mq = (m*m-m)/2; - - lm = l+mq; - a_lm = a[lm]; - a_lm_2 = a_lm*a_lm; - - if( a_lm_2 < thr_nn ) { - continue ; - } - - ll = l+lq; - mm = m+mq; - a_ll = a[ll]; - a_mm = a[mm]; - - delta = a_ll - a_mm; - - if( delta == 0.0f ) { - x = - PI/4 ; - } else { - x = - atanf( (a_lm+a_lm) / delta ) / 2.0f ; - } - - sinx = sinf(x); - cosx = cosf(x); - sinx_2 = sinx*sinx; - cosx_2 = cosx*cosx; - sincos = sinx*cosx; - - // rotate L and M columns - - ilv = n*(l-1); - imv = n*(m-1); - - for( i=1; i<=n;i++ ) { - if( (i!=l) && (i!=m) ) { - iq = (i*i-i)/2; - - if( i // swap -#include -#include - -namespace nv -{ - - // Compute first eigen vector using the power method. - Vector3 firstEigenVector(float matrix[6]); - - /// Generic eigen-solver. - class Eigen - { - public: - - /// Ctor. - Eigen(uint n) : N(n) - { - uint size = n * (n + 1) / 2; - matrix = new float[size]; - eigen_vec = new float[N*N]; - eigen_val = new float[N]; - } - - /// Dtor. - ~Eigen() - { - delete [] matrix; - delete [] eigen_vec; - delete [] eigen_val; - } - - NVMATH_API void solve(); - - /// Matrix accesor. - float & operator()(uint x, uint y) - { - if( x > y ) { - swap(x, y); - } - return matrix[y * (y + 1) / 2 + x]; - } - - /// Matrix const accessor. - float operator()(uint x, uint y) const - { - if( x > y ) { - swap(x, y); - } - return matrix[y * (y + 1) / 2 + x]; - } - - Vector3 eigenVector3(uint i) const - { - nvCheck(3 == N); - nvCheck(i < N); - return Vector3(eigen_vec[i*N+0], eigen_vec[i*N+1], eigen_vec[i*N+2]); - } - - Vector4 eigenVector4(uint i) const - { - nvCheck(4 == N); - nvCheck(i < N); - return Vector4(eigen_vec[i*N+0], eigen_vec[i*N+1], eigen_vec[i*N+2], eigen_vec[i*N+3]); - } - - float eigenValue(uint i) const - { - nvCheck(i < N); - return eigen_val[i]; - } - - private: - const uint N; - float * matrix; - float * eigen_vec; - float * eigen_val; - }; - - - /// 3x3 eigen-solver. - /// Based on Eric Lengyel's code: - /// http://www.terathon.com/code/linear.html - class Eigen3 - { - public: - - /** Ctor. */ - Eigen3() {} - - NVMATH_API void solve(); - - /// Matrix accesor. - float & operator()(uint x, uint y) - { - nvDebugCheck( x < 3 && y < 3 ); - if( x > y ) { - swap(x, y); - } - return matrix[y * (y + 1) / 2 + x]; - } - - /// Matrix const accessor. - float operator()(uint x, uint y) const - { - nvDebugCheck( x < 3 && y < 3 ); - if( x > y ) { - swap(x, y); - } - return matrix[y * (y + 1) / 2 + x]; - } - - /// Get ith eigen vector. - Vector3 eigenVector(uint i) const - { - nvCheck(i < 3); - return eigen_vec[i]; - } - - /** Get ith eigen value. */ - float eigenValue(uint i) const - { - nvCheck(i < 3); - return eigen_val[i]; - } - - private: - float matrix[3+2+1]; - Vector3 eigen_vec[3]; - float eigen_val[3]; - }; - -} // nv namespace - -#endif // NV_MATH_EIGEN_H diff --git a/src/nvmath/Fitting.cpp b/src/nvmath/Fitting.cpp deleted file mode 100644 index 7f83c09..0000000 --- a/src/nvmath/Fitting.cpp +++ /dev/null @@ -1,134 +0,0 @@ -// License: Wild Magic License Version 3 -// http://geometrictools.com/License/WildMagic3License.pdf - -#include "Fitting.h" -#include "Eigen.h" - -using namespace nv; - - -/** Fit a 3d line to the given set of points. - * - * Based on code from: - * http://geometrictools.com/ - */ -Line3 Fit::bestLine(const Array & pointArray) -{ - nvDebugCheck(pointArray.count() > 0); - - Line3 line; - - const uint pointCount = pointArray.count(); - const float inv_num = 1.0f / pointCount; - - // compute the mean of the points - Vector3 center(zero); - for(uint i = 0; i < pointCount; i++) { - center += pointArray[i]; - } - line.setOrigin(center * inv_num); - - // compute the covariance matrix of the points - float covariance[6] = {0, 0, 0, 0, 0, 0}; - for(uint i = 0; i < pointCount; i++) { - Vector3 diff = pointArray[i] - line.origin(); - covariance[0] += diff.x() * diff.x(); - covariance[1] += diff.x() * diff.y(); - covariance[2] += diff.x() * diff.z(); - covariance[3] += diff.y() * diff.y(); - covariance[4] += diff.y() * diff.z(); - covariance[5] += diff.z() * diff.z(); - } - - line.setDirection(normalizeSafe(firstEigenVector(covariance), Vector3(zero), 0.0f)); - - // @@ This variant is from David Eberly... I'm not sure how that works. - /*sum_xx *= inv_num; - sum_xy *= inv_num; - sum_xz *= inv_num; - sum_yy *= inv_num; - sum_yz *= inv_num; - sum_zz *= inv_num; - - // set up the eigensolver - Eigen3 ES; - ES(0,0) = sum_yy + sum_zz; - ES(0,1) = -sum_xy; - ES(0,2) = -sum_xz; - ES(1,1) = sum_xx + sum_zz; - ES(1,2) = -sum_yz; - ES(2,2) = sum_xx + sum_yy; - - // compute eigenstuff, smallest eigenvalue is in last position - ES.solve(); - - line.setDirection(ES.eigenVector(2)); - - nvCheck( isNormalized(line.direction()) ); - */ - return line; -} - - -/** Fit a 3d plane to the given set of points. - * - * Based on code from: - * http://geometrictools.com/ - */ -Vector4 Fit::bestPlane(const Array & pointArray) -{ - Vector3 center(zero); - - const uint pointCount = pointArray.count(); - const float inv_num = 1.0f / pointCount; - - // compute the mean of the points - for(uint i = 0; i < pointCount; i++) { - center += pointArray[i]; - } - center *= inv_num; - - // compute the covariance matrix of the points - float sum_xx = 0.0f; - float sum_xy = 0.0f; - float sum_xz = 0.0f; - float sum_yy = 0.0f; - float sum_yz = 0.0f; - float sum_zz = 0.0f; - - for(uint i = 0; i < pointCount; i++) { - Vector3 diff = pointArray[i] - center; - sum_xx += diff.x() * diff.x(); - sum_xy += diff.x() * diff.y(); - sum_xz += diff.x() * diff.z(); - sum_yy += diff.y() * diff.y(); - sum_yz += diff.y() * diff.z(); - sum_zz += diff.z() * diff.z(); - } - - sum_xx *= inv_num; - sum_xy *= inv_num; - sum_xz *= inv_num; - sum_yy *= inv_num; - sum_yz *= inv_num; - sum_zz *= inv_num; - - // set up the eigensolver - Eigen3 ES; - ES(0,0) = sum_yy + sum_zz; - ES(0,1) = -sum_xy; - ES(0,2) = -sum_xz; - ES(1,1) = sum_xx + sum_zz; - ES(1,2) = -sum_yz; - ES(2,2) = sum_xx + sum_yy; - - // compute eigenstuff, greatest eigenvalue is in first position - ES.solve(); - - Vector3 normal = ES.eigenVector(0); - nvCheck(isNormalized(normal)); - - float offset = dot(normal, center); - - return Vector4(normal, offset); -} diff --git a/src/nvmath/Fitting.h b/src/nvmath/Fitting.h deleted file mode 100644 index dfd6fde..0000000 --- a/src/nvmath/Fitting.h +++ /dev/null @@ -1,78 +0,0 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#ifndef NV_MATH_FITTING_H -#define NV_MATH_FITTING_H - -#include - -namespace nv -{ - - /// 3D Line. - struct Line3 - { - /// Ctor. - Line3() : m_origin(zero), m_direction(zero) - { - } - - /// Copy ctor. - Line3(const Line3 & l) : m_origin(l.m_origin), m_direction(l.m_direction) - { - } - - /// Ctor. - Line3(Vector3::Arg o, Vector3::Arg d) : m_origin(o), m_direction(d) - { - } - - /// Normalize the line. - void normalize() - { - m_direction = nv::normalize(m_direction); - } - - /// Project a point onto the line. - Vector3 projectPoint(Vector3::Arg point) const - { - nvDebugCheck(isNormalized(m_direction)); - - Vector3 v = point - m_origin; - return m_origin + m_direction * dot(m_direction, v); - } - - /// Compute distance to line. - float distanceToPoint(Vector3::Arg point) const - { - nvDebugCheck(isNormalized(m_direction)); - - Vector3 v = point - m_origin; - Vector3 l = v - m_direction * dot(m_direction, v); - - return length(l); - } - - const Vector3 & origin() const { return m_origin; } - void setOrigin(Vector3::Arg value) { m_origin = value; } - - const Vector3 & direction() const { return m_direction; } - void setDirection(Vector3::Arg value) { m_direction = value; } - - - private: - Vector3 m_origin; - Vector3 m_direction; - }; - - - namespace Fit - { - - NVMATH_API Line3 bestLine(const Array & pointArray); - NVMATH_API Vector4 bestPlane(const Array & pointArray); - - } // Fit namespace - -} // nv namespace - -#endif // _PI_MATHLIB_FITTING_H_ diff --git a/src/nvtt/CompressDXT.cpp b/src/nvtt/CompressDXT.cpp index 06af8c2..355ac7e 100644 --- a/src/nvtt/CompressDXT.cpp +++ b/src/nvtt/CompressDXT.cpp @@ -97,7 +97,6 @@ void nv::fastCompressDXT1a(const Image * image, const OutputOptions::Private & o for (uint y = 0; y < h; y += 4) { for (uint x = 0; x < w; x += 4) { rgba.init(image, x, y); - QuickCompress::compressDXT1a(rgba, &block); if (outputOptions.outputHandler != NULL) { @@ -119,7 +118,7 @@ void nv::fastCompressDXT3(const Image * image, const nvtt::OutputOptions::Privat for (uint y = 0; y < h; y += 4) { for (uint x = 0; x < w; x += 4) { rgba.init(image, x, y); - compressBlock_BoundsRange(rgba, &block); + QuickCompress::compressDXT3(rgba, &block); if (outputOptions.outputHandler != NULL) { outputOptions.outputHandler->writeData(&block, sizeof(block)); @@ -140,7 +139,8 @@ void nv::fastCompressDXT5(const Image * image, const nvtt::OutputOptions::Privat for (uint y = 0; y < h; y += 4) { for (uint x = 0; x < w; x += 4) { rgba.init(image, x, y); - compressBlock_BoundsRange(rgba, &block); + //QuickCompress::compressDXT5(rgba, &block); // @@ Use fast version!! + nv::compressBlock_BoundsRange(rgba, &block); if (outputOptions.outputHandler != NULL) { outputOptions.outputHandler->writeData(&block, sizeof(block)); @@ -164,8 +164,9 @@ void nv::fastCompressDXT5n(const Image * image, const nvtt::OutputOptions::Priva // copy X coordinate to alpha channel and Y coordinate to green channel. rgba.swizzleDXT5n(); - - compressBlock_BoundsRange(rgba, &block); + + //QuickCompress::compressDXT5(rgba, &block); // @@ Use fast version!! + nv::compressBlock_BoundsRange(rgba, &block); if (outputOptions.outputHandler != NULL) { outputOptions.outputHandler->writeData(&block, sizeof(block)); @@ -286,7 +287,7 @@ void nv::compressDXT3(const Image * image, const OutputOptions::Private & output rgba.init(image, x, y); // Compress explicit alpha. - compressBlock(rgba, &block.alpha); + QuickCompress::compressDXT3A(rgba, &block.alpha); // Compress color. squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha); @@ -317,14 +318,13 @@ void nv::compressDXT5(const Image * image, const OutputOptions::Private & output rgba.init(image, x, y); // Compress alpha. - uint error; if (compressionOptions.quality == Quality_Highest) { - error = compressBlock_BruteForce(rgba, &block.alpha); + compressBlock_BruteForce(rgba, &block.alpha); } else { - error = compressBlock_Iterative(rgba, &block.alpha); + QuickCompress::compressDXT5A(rgba, &block.alpha); } // Compress color. @@ -359,10 +359,13 @@ void nv::compressDXT5n(const Image * image, const OutputOptions::Private & outpu rgba.swizzleDXT5n(); // Compress X. - uint error = compressBlock_Iterative(rgba, &block.alpha); if (compressionOptions.quality == Quality_Highest) { - error = compressBlock_BruteForce(rgba, &block.alpha); + compressBlock_BruteForce(rgba, &block.alpha); + } + else + { + QuickCompress::compressDXT5A(rgba, &block.alpha); } // Compress Y. @@ -384,23 +387,19 @@ void nv::compressBC4(const Image * image, const nvtt::OutputOptions::Private & o ColorBlock rgba; AlphaBlockDXT5 block; - uint totalError = 0; - for (uint y = 0; y < h; y += 4) { for (uint x = 0; x < w; x += 4) { rgba.init(image, x, y); - //error = compressBlock_BoundsRange(rgba, &block); - uint error = compressBlock_Iterative(rgba, &block); - if (compressionOptions.quality == Quality_Highest) { - // Try brute force algorithm. - error = compressBlock_BruteForce(rgba, &block); + compressBlock_BruteForce(rgba, &block); + } + else + { + QuickCompress::compressDXT5A(rgba, &block); } - - totalError += error; if (outputOptions.outputHandler != NULL) { outputOptions.outputHandler->writeData(&block, sizeof(block)); @@ -429,18 +428,15 @@ void nv::compressBC5(const Image * image, const nvtt::OutputOptions::Private & o ycolor.init(image, x, y); ycolor.splatY(); - // @@ Compute normal error, instead of separate xy errors. - uint xerror, yerror; - if (compressionOptions.quality == Quality_Highest) { - xerror = compressBlock_BruteForce(xcolor, &block.x); - yerror = compressBlock_BruteForce(ycolor, &block.y); + compressBlock_BruteForce(xcolor, &block.x); + compressBlock_BruteForce(ycolor, &block.y); } else { - xerror = compressBlock_Iterative(xcolor, &block.x); - yerror = compressBlock_Iterative(ycolor, &block.y); + QuickCompress::compressDXT5A(xcolor, &block.x); + QuickCompress::compressDXT5A(ycolor, &block.y); } if (outputOptions.outputHandler != NULL) { diff --git a/src/nvtt/FastCompressDXT.cpp b/src/nvtt/FastCompressDXT.cpp index b45165f..e27dd68 100644 --- a/src/nvtt/FastCompressDXT.cpp +++ b/src/nvtt/FastCompressDXT.cpp @@ -163,32 +163,6 @@ inline void vectorEnd() #endif -inline static uint paletteError(const ColorBlock & rgba, Color32 palette[4]) -{ - uint error = 0; - - const VectorColor vcolor0 = loadColor(palette[0]); - const VectorColor vcolor1 = loadColor(palette[1]); - const VectorColor vcolor2 = loadColor(palette[2]); - const VectorColor vcolor3 = loadColor(palette[3]); - - for(uint i = 0; i < 16; i++) { - const VectorColor vcolor = loadColor(rgba.color(i)); - - uint d0 = colorDistance(vcolor, vcolor0); - uint d1 = colorDistance(vcolor, vcolor1); - uint d2 = colorDistance(vcolor, vcolor2); - uint d3 = colorDistance(vcolor, vcolor3); - - error += min(min(d0, d1), min(d2, d3)); - } - - vectorEnd(); - return error; -} - - - inline static uint computeIndices(const ColorBlock & rgba, const Color32 palette[4]) { const VectorColor vcolor0 = loadColor(palette[0]); @@ -222,91 +196,6 @@ inline static uint computeIndices(const ColorBlock & rgba, const Color32 palette return indices; } -inline static uint computeIndicesAlpha(const ColorBlock & rgba, const Color32 palette[4]) -{ - const VectorColor vcolor0 = loadColor(palette[0]); - const VectorColor vcolor1 = loadColor(palette[1]); - const VectorColor vcolor2 = loadColor(palette[2]); - const VectorColor vcolor3 = loadColor(palette[3]); - - uint indices = 0; - for(int i = 0; i < 16; i++) { - const VectorColor vcolor = premultiplyAlpha(loadColor(rgba.color(i))); - - uint d0 = colorDistance(vcolor0, vcolor); - uint d1 = colorDistance(vcolor1, vcolor); - uint d2 = colorDistance(vcolor2, vcolor); - uint d3 = colorDistance(vcolor3, vcolor); - - uint b0 = d0 > d3; - uint b1 = d1 > d2; - uint b2 = d0 > d2; - uint b3 = d1 > d3; - uint b4 = d2 > d3; - - uint x0 = b1 & b2; - uint x1 = b0 & b3; - uint x2 = b0 & b4; - - indices |= (x2 | ((x0 | x1) << 1)) << (2 * i); - } - - vectorEnd(); - return indices; -} - - -inline static Color16 saturate16(int r, int g, int b) -{ - Color16 c; - c.r = clamp(0, 31, r); - c.g = clamp(0, 63, g); - c.b = clamp(0, 31, b); - return c; -} - - -// Compressor that uses the luminance axis. -void nv::compressBlock_LuminanceAxis(const ColorBlock & rgba, BlockDXT1 * block) -{ - Color32 c0, c1; - rgba.luminanceRange(&c0, &c1); - - block->col0 = toColor16(c0); - block->col1 = toColor16(c1); - - // Use 4 color mode only. - if (block->col0.u < block->col1.u) { - swap(block->col0.u, block->col1.u); - } - - Color32 palette[4]; - block->evaluatePalette4(palette); - - block->indices = computeIndices(rgba, palette); -} - - -// Compressor that uses diameter axis. -void nv::compressBlock_DiameterAxis(const ColorBlock & rgba, BlockDXT1 * block) -{ - Color32 c0, c1; - rgba.diameterRange(&c0, &c1); - - block->col0 = toColor16(c0); - block->col1 = toColor16(c1); - - // Use 4 color mode only. - if (block->col0.u < block->col1.u) { - swap(block->col0.u, block->col1.u); - } - - Color32 palette[4]; - block->evaluatePalette4(palette); - - block->indices = computeIndices(rgba, palette); -} - // Compressor that uses bounding box. void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT1 * block) @@ -330,734 +219,13 @@ void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT1 * block) block->indices = computeIndices(rgba, palette); } -// Compressor that uses bounding box and takes alpha into account. -void nv::compressBlock_BoundsRangeAlpha(const ColorBlock & rgba, BlockDXT1 * block) -{ - Color32 c0, c1; - rgba.boundsRange(&c1, &c0); - - if (rgba.hasAlpha()) - { - block->col0 = toColor16(c1); - block->col1 = toColor16(c0); - } - else - { - block->col0 = toColor16(c0); - block->col1 = toColor16(c1); - } - - Color32 palette[4]; - block->evaluatePalette(palette); - - block->indices = computeIndicesAlpha(rgba, palette); -} -// Compressor that uses the best fit axis. -void nv::compressBlock_BestFitAxis(const ColorBlock & rgba, BlockDXT1 * block) -{ - Color32 c0, c1; - rgba.bestFitRange(&c0, &c1); - - block->col0 = toColor16(c0); - block->col1 = toColor16(c1); - - // Use 4 color mode only. - if (block->col0.u < block->col1.u) { - swap(block->col0.u, block->col1.u); - } - else if (block->col0.u == block->col1.u) { - block->col0.u++; - } - - Color32 palette[4]; - block->evaluatePalette4(palette); - - block->indices = computeIndices(rgba, palette); -} - - -// Compressor that tests all input color pairs. -void nv::compressBlock_TestAllPairs(const ColorBlock & rgba, BlockDXT1 * block) -{ - uint best_error = uint(-1); - Color16 best_col0, best_col1; - - Color32 palette[4]; - - // Test all color pairs. - for(uint i = 0; i < 16; i++) { - block->col0 = toColor16(rgba.color(i)); - - for(uint ii = 0; ii < 16; ii++) { - if( i != ii ) { - block->col1 = toColor16(rgba.color(ii)); - block->evaluatePalette(palette); - - const uint error = paletteError(rgba, palette); - if(error < best_error) { - best_error = error; - best_col0 = block->col0; - best_col1 = block->col1; - } - } - } - } - - block->col0 = best_col0; - block->col1 = best_col1; - block->evaluatePalette(palette); - - block->indices = computeIndices(rgba, palette); -} - - -// Compressor that tests all pairs in the best fit axis. -void nv::compressBlock_AnalyzeBestFitAxis(const ColorBlock & rgba, BlockDXT1 * block) -{ - uint best_error = uint(-1); - Color16 best_col0, best_col1; - - Color32 palette[4]; - - - // Find bounds of the search space. - int r_min = 32; - int r_max = 0; - int g_min = 64; - int g_max = 0; - int b_min = 32; - int b_max = 0; - - for(uint i = 0; i < 16; i++) { - Color16 color = toColor16(rgba.color(i)); - - r_min = min(r_min, int(color.r)); - r_max = max(r_max, int(color.r)); - g_min = min(g_min, int(color.g)); - g_max = max(g_max, int(color.g)); - b_min = min(b_min, int(color.b)); - b_max = max(b_max, int(color.b)); - } - - const int r_pad = 4 * max(1, (r_max - r_min)); - const int g_pad = 4 * max(1, (g_max - g_min)); - const int b_pad = 4 * max(1, (b_max - b_min)); - - r_min = max(0, r_min - r_pad); - r_max = min(31, r_max + r_pad); - g_min = max(0, g_min - g_pad); - g_max = min(63, g_max + g_pad); - b_min = max(0, b_min - b_pad); - b_max = min(31, b_max + b_pad); - - const Line3 line = rgba.bestFitLine(); - - if( fabs(line.direction().x()) > fabs(line.direction().y()) && fabs(line.direction().x()) > fabs(line.direction().z()) ) { - for(int r0 = r_min; r0 <= r_max; r0++) { - const float x0 = float((r0 << 3) | (r0 >> 2)); - const float t0 = (x0 - line.origin().x()) / line.direction().x(); - const float y0 = line.origin().y() + t0 * line.direction().y(); - const float z0 = line.origin().z() + t0 * line.direction().z(); - - const int g0 = clamp(int(y0), 0, 255) >> 2; - const int b0 = clamp(int(z0), 0, 255) >> 3; - - for(int r1 = r_min; r1 <= r_max; r1++) { - const float x1 = float((r1 << 3) | (r1 >> 2)); - const float t1 = (x1 - line.origin().x()) / line.direction().x(); - const float y1 = line.origin().y() + t1 * line.direction().y(); - const float z1 = line.origin().z() + t1 * line.direction().z(); - - const int g1 = clamp(int(y1), 0, 255) >> 2; - const int b1 = clamp(int(z1), 0, 255) >> 3; - - // Test one pixel around. - for (int i0 = -1; i0 <= 1; i0++) { - for (int j0 = -1; j0 <= 1; j0++) { - for (int i1 = -1; i1 <= 1; i1++) { - for (int j1 = -1; j1 <= 1; j1++) { - if( g0+i0 >= 0 && g0+i0 < 64 && g1+i1 >= 0 && g1+i1 < 64 && - b0+j0 >= 0 && b0+j0 < 32 && b1+j1 >= 0 && b1+j1 < 32 ) - { - block->col0.r = r0; - block->col0.g = g0 + i0; - block->col0.b = b0 + j0; - block->col1.r = r1; - block->col1.g = g1 + i1; - block->col1.b = b1 + j1; - block->evaluatePalette(palette); - - const uint error = paletteError(rgba, palette); - if(error < best_error) { - best_error = error; - best_col0 = block->col0; - best_col1 = block->col1; - } - } - } - } - } - } - } - } - } - else if( fabs(line.direction().y()) > fabs(line.direction().z()) ) { - for(int g0 = g_min; g0 <= g_max; g0++) { - const float y0 = float((g0 << 2) | (g0 >> 4)); - const float t0 = (y0 - line.origin().y()) / line.direction().y(); - const float x0 = line.origin().x() + t0 * line.direction().x(); - const float z0 = line.origin().z() + t0 * line.direction().z(); - - const int r0 = clamp(int(x0), 0, 255) >> 3; - const int b0 = clamp(int(z0), 0, 255) >> 3; - - for(int g1 = g_min; g1 <= g_max; g1++) { - const float y1 = float((g1 << 2) | (g1 >> 4)); - const float t1 = (y1 - line.origin().y()) / line.direction().y(); - const float x1 = line.origin().x() + t1 * line.direction().x(); - const float z1 = line.origin().z() + t1 * line.direction().z(); - - const int r1 = clamp(int(x1), 0, 255) >> 2; - const int b1 = clamp(int(z1), 0, 255) >> 3; - - // Test one pixel around. - for (int i0 = -1; i0 <= 1; i0++) { - for (int j0 = -1; j0 <= 1; j0++) { - for (int i1 = -1; i1 <= 1; i1++) { - for (int j1 = -1; j1 <= 1; j1++) { - if( r0+i0 >= 0 && r0+i0 < 32 && r1+i1 >= 0 && r1+i1 < 32 && - b0+j0 >= 0 && b0+j0 < 32 && b1+j1 >= 0 && b1+j1 < 32 ) - { - block->col0.r = r0 + i0; - block->col0.g = g0; - block->col0.b = b0 + j0; - block->col1.r = r1 + i1; - block->col1.g = g1; - block->col1.b = b1 + j1; - block->evaluatePalette(palette); - - const uint error = paletteError(rgba, palette); - if(error < best_error) { - best_error = error; - best_col0 = block->col0; - best_col1 = block->col1; - } - } - } - } - } - } - } - } - } - else { - for(int b0 = b_min; b0 <= b_max; b0++) { - const float z0 = float((b0 << 3) | (b0 >> 2)); - const float t0 = (z0 - line.origin().z()) / line.direction().z(); - const float y0 = line.origin().y() + t0 * line.direction().y(); - const float x0 = line.origin().x() + t0 * line.direction().x(); - - const int g0 = clamp(int(y0), 0, 255) >> 2; - const int r0 = clamp(int(x0), 0, 255) >> 3; - - for(int b1 = b_min; b1 <= b_max; b1++) { - const float z1 = float((b1 << 3) | (b1 >> 2)); - const float t1 = (z1 - line.origin().z()) / line.direction().z(); - const float y1 = line.origin().y() + t1 * line.direction().y(); - const float x1 = line.origin().x() + t1 * line.direction().x(); - - const int g1 = clamp(int(y1), 0, 255) >> 2; - const int r1 = clamp(int(x1), 0, 255) >> 3; - - // Test one pixel around. - for (int i0 = -1; i0 <= 1; i0++) { - for (int j0 = -1; j0 <= 1; j0++) { - for (int i1 = -1; i1 <= 1; i1++) { - for (int j1 = -1; j1 <= 1; j1++) { - if( g0+i0 >= 0 && g0+i0 < 64 && g1+i1 >= 0 && g1+i1 < 64 && - r0+j0 >= 0 && r0+j0 < 32 && r1+j1 >= 0 && r1+j1 < 32 ) - { - block->col0.r = r0 + j0; - block->col0.g = g0 + i0; - block->col0.b = b0; - block->col1.r = r1 + j1; - block->col1.g = g1 + i1; - block->col1.b = b1; - block->evaluatePalette(palette); - - const uint error = paletteError(rgba, palette); - if(error < best_error) { - best_error = error; - best_col0 = block->col0; - best_col1 = block->col1; - } - } - } - } - } - } - } - } - } - block->col0 = best_col0; - block->col1 = best_col1; - block->evaluatePalette(palette); - - block->indices = computeIndices(rgba, palette); -} -// Improve palette iteratively using alternate 3d search as suggested by Dave Moore. -void nv::refineSolution_3dSearch(const ColorBlock & rgba, BlockDXT1 * block) -{ - Color32 palette[4]; - block->evaluatePalette(palette); - - uint best_error = paletteError(rgba, palette); - Color16 best_col0 = block->col0; - Color16 best_col1 = block->col1; - - const int W = 2; - - while(true) { - bool changed = false; - - const int r0 = best_col0.r; - const int g0 = best_col0.g; - const int b0 = best_col0.b; - - for(int z = -W; z <= W; z++) { - for(int y = -W; y <= W; y++) { - for(int x = -W; x <= W; x++) { - block->col0 = saturate16(r0 + x, g0 + y, b0 + z); - block->evaluatePalette(palette); - - const uint error = paletteError(rgba, palette); - if(error < best_error) { - best_error = error; - best_col0 = block->col0; - best_col1 = block->col1; - changed = true; - } - } - } - } - - const int r1 = best_col1.r; - const int g1 = best_col1.g; - const int b1 = best_col1.b; - - for(int z = -W; z <= W; z++) { - for(int y = -W; y <= W; y++) { - for(int x = -W; x <= W; x++) { - block->col1 = saturate16(r1 + x, g1 + y, b1 + z); - block->evaluatePalette(palette); - - const uint error = paletteError(rgba, palette); - if(error < best_error) { - best_error = error; - best_col0 = block->col0; - best_col1 = block->col1; - changed = true; - } - } - } - } - - if( !changed ) { - // Stop at local minima. - break; - } - } - - block->col0 = best_col0; - block->col1 = best_col1; - block->evaluatePalette(palette); - - block->indices = computeIndices(rgba, palette); -} - - -// Improve the palette iteratively using 6d search as suggested by Charles Bloom. -void nv::refineSolution_6dSearch(const ColorBlock & rgba, BlockDXT1 * block) -{ - Color32 palette[4]; - block->evaluatePalette(palette); - - uint best_error = paletteError(rgba, palette); - Color16 best_col0 = block->col0; - Color16 best_col1 = block->col1; - - const int W = 1; - - while(true) { - bool changed = false; - const int r0 = best_col0.r; - const int g0 = best_col0.g; - const int b0 = best_col0.b; - const int r1 = best_col1.r; - const int g1 = best_col1.g; - const int b1 = best_col1.b; - - for(int z0 = -W; z0 <= W; z0++) { - for(int y0 = -W; y0 <= W; y0++) { - for(int x0 = -W; x0 <= W; x0++) { - for(int z1 = -W; z1 <= W; z1++) { - for(int y1 = -W; y1 <= W; y1++) { - for(int x1 = -W; x1 <= W; x1++) { - - block->col0 = saturate16(r0 + x0, g0 + y0, b0 + z0); - block->col1 = saturate16(r1 + x1, g1 + y1, b1 + z1); - block->evaluatePalette(palette); - - const uint error = paletteError(rgba, palette); - if(error < best_error) { - best_error = error; - best_col0 = block->col0; - best_col1 = block->col1; - changed = true; - } - } - } - } - } - } - } - - if( !changed ) { - // Stop at local minima. - break; - } - } - - block->col0 = best_col0; - block->col1 = best_col1; - block->evaluatePalette(palette); - - block->indices = computeIndices(rgba, palette); -} - -// Improve the palette iteratively using alternate 1d search as suggested by Walt Donovan. -void nv::refineSolution_1dSearch(const ColorBlock & rgba, BlockDXT1 * block) -{ - Color32 palette[4]; - block->evaluatePalette(palette); - - uint best_error = paletteError(rgba, palette); - Color16 best_col0 = block->col0; - Color16 best_col1 = block->col1; - - const int W = 4; - - while(true) { - bool changed = false; - - const int r0 = best_col0.r; - const int g0 = best_col0.g; - const int b0 = best_col0.b; - - for(int z = -W; z <= W; z++) { - block->col0.b = clamp(b0 + z, 0, 31); - block->evaluatePalette(palette); - - const uint error = paletteError(rgba, palette); - if(error < best_error) { - best_error = error; - best_col0 = block->col0; - best_col1 = block->col1; - changed = true; - } - } - - for(int y = -W; y <= W; y++) { - block->col0.g = clamp(g0 + y, 0, 63); - block->evaluatePalette(palette); - - const uint error = paletteError(rgba, palette); - if(error < best_error) { - best_error = error; - best_col0 = block->col0; - best_col1 = block->col1; - changed = true; - } - } - - for(int x = -W; x <= W; x++) { - block->col0.r = clamp(r0 + x, 0, 31); - block->evaluatePalette(palette); - - const uint error = paletteError(rgba, palette); - if(error < best_error) { - best_error = error; - best_col0 = block->col0; - best_col1 = block->col1; - changed = true; - } - } - - - const int r1 = best_col1.r; - const int g1 = best_col1.g; - const int b1 = best_col1.b; - - for(int z = -W; z <= W; z++) { - block->col1.b = clamp(b1 + z, 0, 31); - block->evaluatePalette(palette); - - const uint error = paletteError(rgba, palette); - if(error < best_error) { - best_error = error; - best_col0 = block->col0; - best_col1 = block->col1; - changed = true; - } - } - - for(int y = -W; y <= W; y++) { - block->col1.g = clamp(g1 + y, 0, 63); - block->evaluatePalette(palette); - - const uint error = paletteError(rgba, palette); - if(error < best_error) { - best_error = error; - best_col0 = block->col0; - best_col1 = block->col1; - changed = true; - } - } - - for(int x = -W; x <= W; x++) { - block->col1.r = clamp(r1 + x, 0, 31); - block->evaluatePalette(palette); - - const uint error = paletteError(rgba, palette); - if(error < best_error) { - best_error = error; - best_col0 = block->col0; - best_col1 = block->col1; - changed = true; - } - } - - if( !changed ) { - // Stop at local minima. - break; - } - } - - block->col0 = best_col0; - block->col1 = best_col1; - block->evaluatePalette(palette); - - block->indices = computeIndices(rgba, palette); -} - -static uint computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block) -{ - Color32 colors[4]; - block->evaluatePalette4(colors); - - uint totalError = 0; - - for (uint i = 0; i < 16; i++) - { - uint8 green = rgba.color(i).g; - - uint besterror = 256*256; - uint best; - for(uint p = 0; p < 4; p++) - { - int d = colors[p].g - green; - uint error = d * d; - - if (error < besterror) - { - besterror = error; - best = p; - } - } - - totalError += besterror; - } - - return totalError; -} - -// Brute force compressor for DXT5n -void nv::compressGreenBlock_BruteForce(const ColorBlock & rgba, BlockDXT1 * block) -{ - nvDebugCheck(block != NULL); - - uint8 ming = 63; - uint8 maxg = 0; - - // Get min/max green. - for (uint i = 0; i < 16; i++) - { - uint8 green = rgba.color(i).g >> 2; - ming = min(ming, green); - maxg = max(maxg, green); - } - - block->col0.r = 31; - block->col1.r = 31; - block->col0.g = maxg; - block->col1.g = ming; - block->col0.b = 0; - block->col1.b = 0; - - if (maxg - ming > 4) - { - int besterror = computeGreenError(rgba, block); - int bestg0 = maxg; - int bestg1 = ming; - - for (int g0 = ming+5; g0 < maxg; g0++) - { - for (int g1 = ming; g1 < g0-4; g1++) - { - if ((maxg-g0) + (g1-ming) > besterror) - continue; - - block->col0.g = g0; - block->col1.g = g1; - int error = computeGreenError(rgba, block); - - if (error < besterror) - { - besterror = error; - bestg0 = g0; - bestg1 = g1; - } - } - } - - block->col0.g = bestg0; - block->col1.g = bestg1; - } - - Color32 palette[4]; - block->evaluatePalette(palette); - block->indices = computeIndices(rgba, palette); -} - - - -uint nv::blockError(const ColorBlock & rgba, const BlockDXT1 & block) -{ - Color32 palette[4]; - block.evaluatePalette(palette); - - VectorColor vcolors[4]; - vcolors[0] = loadColor(palette[0]); - vcolors[1] = loadColor(palette[1]); - vcolors[2] = loadColor(palette[2]); - vcolors[3] = loadColor(palette[3]); - - uint error = 0; - for(uint i = 0; i < 16; i++) { - const VectorColor vcolor = loadColor(rgba.color(i)); - - int idx = (block.indices >> (2 * i)) & 3; - - uint d = colorDistance(vcolor, vcolors[idx]); - error += d; - } - - //nvDebugCheck(error == paletteError(rgba, palette)); - - vectorEnd(); - return error; -} - - -uint nv::blockError(const ColorBlock & rgba, const AlphaBlockDXT5 & block) -{ - uint8 palette[8]; - block.evaluatePalette(palette); - - uint8 indices[16]; - block.indices(indices); - - uint error = 0; - for(uint i = 0; i < 16; i++) { - int d = palette[indices[i]] - rgba.color(i).a; - error += uint(d * d); - } - - return error; -} - - - -void nv::optimizeEndPoints(const ColorBlock & rgba, BlockDXT1 * block) -{ - float alpha2_sum = 0.0f; - float beta2_sum = 0.0f; - float alphabeta_sum = 0.0f; - Vector3 alphax_sum(zero); - Vector3 betax_sum(zero); - - for( int i = 0; i < 16; ++i ) - { - const uint bits = block->indices >> (2 * i); - - float beta = float(bits & 1); - if (bits & 2) beta = (1 + beta) / 3.0f; - float alpha = 1.0f - beta; - - const Vector3 x = toVector4(rgba.color(i)).xyz(); - - alpha2_sum += alpha * alpha; - beta2_sum += beta * beta; - alphabeta_sum += alpha * beta; - alphax_sum += alpha * x; - betax_sum += beta * x; - } - - float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); - - Vector3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; - Vector3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; - - Vector3 zero(0, 0, 0); - Vector3 one(1, 1, 1); - a = min(one, max(zero, a)); - b = min(one, max(zero, b)); - - BlockDXT1 B; - - // Round a,b to 565. - B.col0.r = uint16(a.x() * 31); - B.col0.g = uint16(a.y() * 63); - B.col0.b = uint16(a.z() * 31); - B.col1.r = uint16(b.x() * 31); - B.col1.g = uint16(b.y() * 63); - B.col1.b = uint16(b.z() * 31); - B.indices = block->indices; - - // Force 4 color mode. - if (B.col0.u < B.col1.u) - { - swap(B.col0.u, B.col1.u); - B.indices ^= 0x55555555; - } - else if (B.col0.u == B.col1.u) - { - block->indices = 0; - } - - if (blockError(rgba, B) < blockError(rgba, *block)) - { - *block = B; - } -} - // Encode DXT3 block. void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT3 * block) { @@ -1284,161 +452,5 @@ uint nv::compressBlock_BruteForce(const ColorBlock & rgba, AlphaBlockDXT5 * bloc return computeAlphaIndices(rgba, block); } -static void optimizeAlpha8(const ColorBlock & rgba, AlphaBlockDXT5 * block) -{ - float alpha2_sum = 0; - float beta2_sum = 0; - float alphabeta_sum = 0; - float alphax_sum = 0; - float betax_sum = 0; - - for (int i = 0; i < 16; i++) - { - uint idx = block->index(i); - float alpha; - if (idx < 2) alpha = 1.0f - idx; - else alpha = (8.0f - idx) / 7.0f; - - float beta = 1 - alpha; - - alpha2_sum += alpha * alpha; - beta2_sum += beta * beta; - alphabeta_sum += alpha * beta; - alphax_sum += alpha * rgba.color(i).a; - betax_sum += beta * rgba.color(i).a; - } - - const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); - - float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; - float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; - - uint alpha0 = uint(min(max(a, 0.0f), 255.0f)); - uint alpha1 = uint(min(max(b, 0.0f), 255.0f)); - - if (alpha0 < alpha1) - { - swap(alpha0, alpha1); - - // Flip indices: - for (int i = 0; i < 16; i++) - { - uint idx = block->index(i); - if (idx < 2) block->setIndex(i, 1 - idx); - else block->setIndex(i, 9 - idx); - } - } - else if (alpha0 == alpha1) - { - for (int i = 0; i < 16; i++) - { - block->setIndex(i, 0); - } - } - - block->alpha0 = alpha0; - block->alpha1 = alpha1; -} - - -static void optimizeAlpha6(const ColorBlock & rgba, AlphaBlockDXT5 * block) -{ - float alpha2_sum = 0; - float beta2_sum = 0; - float alphabeta_sum = 0; - float alphax_sum = 0; - float betax_sum = 0; - - for (int i = 0; i < 16; i++) - { - uint8 x = rgba.color(i).a; - if (x == 0 || x == 255) continue; - - uint bits = block->index(i); - if (bits == 6 || bits == 7) continue; - - float alpha; - if (bits == 0) alpha = 1.0f; - else if (bits == 1) alpha = 0.0f; - else alpha = (6.0f - block->index(i)) / 5.0f; - - float beta = 1 - alpha; - - alpha2_sum += alpha * alpha; - beta2_sum += beta * beta; - alphabeta_sum += alpha * beta; - alphax_sum += alpha * x; - betax_sum += beta * x; - } - - const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); - - float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; - float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; - - uint alpha0 = uint(min(max(a, 0.0f), 255.0f)); - uint alpha1 = uint(min(max(b, 0.0f), 255.0f)); - - if (alpha0 > alpha1) - { - swap(alpha0, alpha1); - } - - block->alpha0 = alpha0; - block->alpha1 = alpha1; -} - - - -static bool sameIndices(const AlphaBlockDXT5 & block0, const AlphaBlockDXT5 & block1) -{ - const uint64 mask = ~uint64(0xFFFF); - return (block0.u | mask) == (block1.u | mask); -} -uint nv::compressBlock_Iterative(const ColorBlock & rgba, AlphaBlockDXT5 * resultblock) -{ - uint8 alpha0 = 0; - uint8 alpha1 = 255; - - // Get min/max alpha. - for (uint i = 0; i < 16; i++) - { - uint8 alpha = rgba.color(i).a; - alpha0 = max(alpha0, alpha); - alpha1 = min(alpha1, alpha); - } - - AlphaBlockDXT5 block; - block.alpha0 = alpha0 - (alpha0 - alpha1) / 34; - block.alpha1 = alpha1 + (alpha0 - alpha1) / 34; - uint besterror = computeAlphaIndices(rgba, &block); - - AlphaBlockDXT5 bestblock = block; - - while(true) - { - optimizeAlpha8(rgba, &block); - uint error = computeAlphaIndices(rgba, &block); - - if (error >= besterror) - { - // No improvement, stop. - break; - } - if (sameIndices(block, bestblock)) - { - bestblock = block; - break; - } - - besterror = error; - bestblock = block; - }; - - // Copy best block to result; - *resultblock = bestblock; - - return besterror; -} diff --git a/src/nvtt/FastCompressDXT.h b/src/nvtt/FastCompressDXT.h index be9f619..4eca83d 100644 --- a/src/nvtt/FastCompressDXT.h +++ b/src/nvtt/FastCompressDXT.h @@ -38,40 +38,37 @@ namespace nv // Color compression: // Compressor that uses the extremes of the luminance axis. - void compressBlock_DiameterAxis(const ColorBlock & rgba, BlockDXT1 * block); +// void compressBlock_DiameterAxis(const ColorBlock & rgba, BlockDXT1 * block); // Compressor that uses the extremes of the luminance axis. - void compressBlock_LuminanceAxis(const ColorBlock & rgba, BlockDXT1 * block); +// void compressBlock_LuminanceAxis(const ColorBlock & rgba, BlockDXT1 * block); // Compressor that uses bounding box. void compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT1 * block); // Compressor that uses bounding box and takes alpha into account. - void compressBlock_BoundsRangeAlpha(const ColorBlock & rgba, BlockDXT1 * block); - - // Compressor that uses the best fit axis. - void compressBlock_BestFitAxis(const ColorBlock & rgba, BlockDXT1 * block); +// void compressBlock_BoundsRangeAlpha(const ColorBlock & rgba, BlockDXT1 * block); // Simple, but slow compressor that tests all color pairs. - void compressBlock_TestAllPairs(const ColorBlock & rgba, BlockDXT1 * block); +// void compressBlock_TestAllPairs(const ColorBlock & rgba, BlockDXT1 * block); // Brute force 6d search along the best fit axis. - void compressBlock_AnalyzeBestFitAxis(const ColorBlock & rgba, BlockDXT1 * block); +// void compressBlock_AnalyzeBestFitAxis(const ColorBlock & rgba, BlockDXT1 * block); // Spatial greedy search. - void refineSolution_1dSearch(const ColorBlock & rgba, BlockDXT1 * block); - void refineSolution_3dSearch(const ColorBlock & rgba, BlockDXT1 * block); - void refineSolution_6dSearch(const ColorBlock & rgba, BlockDXT1 * block); +// void refineSolution_1dSearch(const ColorBlock & rgba, BlockDXT1 * block); +// void refineSolution_3dSearch(const ColorBlock & rgba, BlockDXT1 * block); +// void refineSolution_6dSearch(const ColorBlock & rgba, BlockDXT1 * block); // Brute force compressor for DXT5n - void compressGreenBlock_BruteForce(const ColorBlock & rgba, BlockDXT1 * block); +// void compressGreenBlock_BruteForce(const ColorBlock & rgba, BlockDXT1 * block); // Minimize error of the endpoints. - void optimizeEndPoints(const ColorBlock & rgba, BlockDXT1 * block); +// void optimizeEndPoints(const ColorBlock & rgba, BlockDXT1 * block); - uint blockError(const ColorBlock & rgba, const BlockDXT1 & block); - uint blockError(const ColorBlock & rgba, const AlphaBlockDXT5 & block); +// uint blockError(const ColorBlock & rgba, const BlockDXT1 & block); +// uint blockError(const ColorBlock & rgba, const AlphaBlockDXT5 & block); // Alpha compression: void compressBlock(const ColorBlock & rgba, AlphaBlockDXT3 * block); @@ -80,7 +77,7 @@ namespace nv uint compressBlock_BoundsRange(const ColorBlock & rgba, AlphaBlockDXT5 * block); uint compressBlock_BruteForce(const ColorBlock & rgba, AlphaBlockDXT5 * block); - uint compressBlock_Iterative(const ColorBlock & rgba, AlphaBlockDXT5 * block); +// uint compressBlock_Iterative(const ColorBlock & rgba, AlphaBlockDXT5 * block); } // nv namespace diff --git a/src/nvtt/QuickCompressDXT.cpp b/src/nvtt/QuickCompressDXT.cpp index fda8165..5fe51ac 100644 --- a/src/nvtt/QuickCompressDXT.cpp +++ b/src/nvtt/QuickCompressDXT.cpp @@ -288,62 +288,219 @@ static void optimizeEndPoints4(Vector3 block[16], BlockDXT1 * dxtBlock) dxtBlock->indices = computeIndices3(block, a, b); }*/ - -static void optimizeAlpha8(const ColorBlock & rgba, AlphaBlockDXT5 * block) +namespace { - float alpha2_sum = 0; - float beta2_sum = 0; - float alphabeta_sum = 0; - float alphax_sum = 0; - float betax_sum = 0; + static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block) + { + nvDebugCheck(block != NULL); - for (int i = 0; i < 16; i++) + int palette[4]; + palette[0] = (block->col0.g << 2) | (block->col0.g >> 4); + palette[1] = (block->col1.g << 2) | (block->col1.g >> 4); + palette[2] = (2 * palette[0] + palette[1]) / 3; + palette[3] = (2 * palette[1] + palette[0]) / 3; + + int totalError = 0; + + for (int i = 0; i < 16; i++) + { + const int green = rgba.color(i).g; + + int error = abs(green - palette[0]); + error = min(error, abs(green - palette[1])); + error = min(error, abs(green - palette[2])); + error = min(error, abs(green - palette[3])); + + totalError += error; + } + + return totalError; + } + + static uint computeGreenIndices(const ColorBlock & rgba, const Color32 palette[4]) { - uint idx = block->index(i); - float alpha; - if (idx < 2) alpha = 1.0f - idx; - else alpha = (8.0f - idx) / 7.0f; - - float beta = 1 - alpha; + const int color0 = palette[0].g; + const int color1 = palette[1].g; + const int color2 = palette[2].g; + const int color3 = palette[3].g; - alpha2_sum += alpha * alpha; - beta2_sum += beta * beta; - alphabeta_sum += alpha * beta; - alphax_sum += alpha * rgba.color(i).a; - betax_sum += beta * rgba.color(i).a; + uint indices = 0; + for (int i = 0; i < 16; i++) + { + const int color = rgba.color(i).g; + + uint d0 = abs(color0 - color); + uint d1 = abs(color1 - color); + uint d2 = abs(color2 - color); + uint d3 = abs(color3 - color); + + uint b0 = d0 > d3; + uint b1 = d1 > d2; + uint b2 = d0 > d2; + uint b3 = d1 > d3; + uint b4 = d2 > d3; + + uint x0 = b1 & b2; + uint x1 = b0 & b3; + uint x2 = b0 & b4; + + indices |= (x2 | ((x0 | x1) << 1)) << (2 * i); + } + + return indices; } - const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); +} // namespace - float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; - float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; +namespace +{ + + static uint computeAlphaIndices(const ColorBlock & rgba, AlphaBlockDXT5 * block) + { + uint8 alphas[8]; + block->evaluatePalette(alphas); - uint alpha0 = uint(min(max(a, 0.0f), 255.0f)); - uint alpha1 = uint(min(max(b, 0.0f), 255.0f)); + uint totalError = 0; - if (alpha0 < alpha1) + for (uint i = 0; i < 16; i++) + { + uint8 alpha = rgba.color(i).a; + + uint besterror = 256*256; + uint best = 8; + for(uint p = 0; p < 8; p++) + { + int d = alphas[p] - alpha; + uint error = d * d; + + if (error < besterror) + { + besterror = error; + best = p; + } + } + nvDebugCheck(best < 8); + + totalError += besterror; + block->setIndex(i, best); + } + + return totalError; + } + + static void optimizeAlpha8(const ColorBlock & rgba, AlphaBlockDXT5 * block) { - swap(alpha0, alpha1); - - // Flip indices: + float alpha2_sum = 0; + float beta2_sum = 0; + float alphabeta_sum = 0; + float alphax_sum = 0; + float betax_sum = 0; + for (int i = 0; i < 16; i++) { uint idx = block->index(i); - if (idx < 2) block->setIndex(i, 1 - idx); - else block->setIndex(i, 9 - idx); + float alpha; + if (idx < 2) alpha = 1.0f - idx; + else alpha = (8.0f - idx) / 7.0f; + + float beta = 1 - alpha; + + alpha2_sum += alpha * alpha; + beta2_sum += beta * beta; + alphabeta_sum += alpha * beta; + alphax_sum += alpha * rgba.color(i).a; + betax_sum += beta * rgba.color(i).a; } + + const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); + + float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; + float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; + + uint alpha0 = uint(min(max(a, 0.0f), 255.0f)); + uint alpha1 = uint(min(max(b, 0.0f), 255.0f)); + + if (alpha0 < alpha1) + { + swap(alpha0, alpha1); + + // Flip indices: + for (int i = 0; i < 16; i++) + { + uint idx = block->index(i); + if (idx < 2) block->setIndex(i, 1 - idx); + else block->setIndex(i, 9 - idx); + } + } + else if (alpha0 == alpha1) + { + for (int i = 0; i < 16; i++) + { + block->setIndex(i, 0); + } + } + + block->alpha0 = alpha0; + block->alpha1 = alpha1; } - else if (alpha0 == alpha1) + + /* + static void optimizeAlpha6(const ColorBlock & rgba, AlphaBlockDXT5 * block) { + float alpha2_sum = 0; + float beta2_sum = 0; + float alphabeta_sum = 0; + float alphax_sum = 0; + float betax_sum = 0; + for (int i = 0; i < 16; i++) { - block->setIndex(i, 0); + uint8 x = rgba.color(i).a; + if (x == 0 || x == 255) continue; + + uint bits = block->index(i); + if (bits == 6 || bits == 7) continue; + + float alpha; + if (bits == 0) alpha = 1.0f; + else if (bits == 1) alpha = 0.0f; + else alpha = (6.0f - block->index(i)) / 5.0f; + + float beta = 1 - alpha; + + alpha2_sum += alpha * alpha; + beta2_sum += beta * beta; + alphabeta_sum += alpha * beta; + alphax_sum += alpha * x; + betax_sum += beta * x; + } + + const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); + + float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; + float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; + + uint alpha0 = uint(min(max(a, 0.0f), 255.0f)); + uint alpha1 = uint(min(max(b, 0.0f), 255.0f)); + + if (alpha0 > alpha1) + { + swap(alpha0, alpha1); } + + block->alpha0 = alpha0; + block->alpha1 = alpha1; } + */ + + static bool sameIndices(const AlphaBlockDXT5 & block0, const AlphaBlockDXT5 & block1) + { + const uint64 mask = ~uint64(0xFFFF); + return (block0.u | mask) == (block1.u | mask); + } + +} // namespace - block->alpha0 = alpha0; - block->alpha1 = alpha1; -} @@ -436,66 +593,6 @@ void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock) } -static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block) -{ - nvDebugCheck(block != NULL); - - int palette[4]; - palette[0] = (block->col0.g << 2) | (block->col0.g >> 4); - palette[1] = (block->col1.g << 2) | (block->col1.g >> 4); - palette[2] = (2 * palette[0] + palette[1]) / 3; - palette[3] = (2 * palette[1] + palette[0]) / 3; - - int totalError = 0; - - for (int i = 0; i < 16; i++) - { - const int green = rgba.color(i).g; - - int error = abs(green - palette[0]); - error = min(error, abs(green - palette[1])); - error = min(error, abs(green - palette[2])); - error = min(error, abs(green - palette[3])); - - totalError += error; - } - - return totalError; -} - -static uint computeGreenIndices(const ColorBlock & rgba, const Color32 palette[4]) -{ - const int color0 = palette[0].g; - const int color1 = palette[1].g; - const int color2 = palette[2].g; - const int color3 = palette[3].g; - - uint indices = 0; - for (int i = 0; i < 16; i++) - { - const int color = rgba.color(i).g; - - uint d0 = abs(color0 - color); - uint d1 = abs(color1 - color); - uint d2 = abs(color2 - color); - uint d3 = abs(color3 - color); - - uint b0 = d0 > d3; - uint b1 = d1 > d2; - uint b2 = d0 > d2; - uint b3 = d1 > d3; - uint b4 = d2 > d3; - - uint x0 = b1 & b2; - uint x1 = b0 & b3; - uint x2 = b0 & b4; - - indices |= (x2 | ((x0 | x1) << 1)) << (2 * i); - } - - return indices; -} - // Brute force green channel compressor void QuickCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block) { @@ -558,6 +655,7 @@ void QuickCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block) void QuickCompress::compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock) { + // @@ Round instead of truncate. When rounding take into account bit expansion. dxtBlock->alpha0 = rgba.color(0).a >> 4; dxtBlock->alpha1 = rgba.color(1).a >> 4; dxtBlock->alpha2 = rgba.color(2).a >> 4; @@ -582,9 +680,49 @@ void QuickCompress::compressDXT3(const ColorBlock & rgba, BlockDXT3 * dxtBlock) compressDXT3A(rgba, &dxtBlock->alpha); } + void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock) { - // @@ TODO + uint8 alpha0 = 0; + uint8 alpha1 = 255; + + // Get min/max alpha. + for (uint i = 0; i < 16; i++) + { + uint8 alpha = rgba.color(i).a; + alpha0 = max(alpha0, alpha); + alpha1 = min(alpha1, alpha); + } + + AlphaBlockDXT5 block; + block.alpha0 = alpha0 - (alpha0 - alpha1) / 34; + block.alpha1 = alpha1 + (alpha0 - alpha1) / 34; + uint besterror = computeAlphaIndices(rgba, &block); + + AlphaBlockDXT5 bestblock = block; + + while(true) + { + optimizeAlpha8(rgba, &block); + uint error = computeAlphaIndices(rgba, &block); + + if (error >= besterror) + { + // No improvement, stop. + break; + } + if (sameIndices(block, bestblock)) + { + bestblock = block; + break; + } + + besterror = error; + bestblock = block; + }; + + // Copy best block to result; + *dxtBlock = bestblock; } void QuickCompress::compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock) diff --git a/src/nvtt/cuda/CudaCompressDXT.cpp b/src/nvtt/cuda/CudaCompressDXT.cpp index 5fc7f8c..193cf03 100644 --- a/src/nvtt/cuda/CudaCompressDXT.cpp +++ b/src/nvtt/cuda/CudaCompressDXT.cpp @@ -24,13 +24,12 @@ #include #include #include -#include #include #include #include #include #include -#include +#include #include "CudaCompressDXT.h" #include "CudaUtils.h" @@ -228,7 +227,7 @@ void CudaCompressor::compressDXT3(const Image * image, const OutputOptions::Priv for (uint i = 0; i < count; i++) { ColorBlock rgba(blockLinearImage + (bn + i) * 16); - compressBlock(rgba, alphaBlocks + i); + QuickCompress::compressDXT5A(rgba, alphaBlocks + i); } // Check for errors. @@ -312,7 +311,7 @@ void CudaCompressor::compressDXT5(const Image * image, const OutputOptions::Priv for (uint i = 0; i < count; i++) { ColorBlock rgba(blockLinearImage + (bn + i) * 16); - compressBlock_Iterative(rgba, alphaBlocks + i); + QuickCompress::compressDXT5A(rgba, alphaBlocks + i); } // Check for errors.