// Copyright NVIDIA Corporation 2007 -- Ignacio Castano // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without // restriction, including without limitation the rights to use, // copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following // conditions: // // The above copyright notice and this permission notice shall be // included in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. #include #include #include #include "FastCompressDXT.h" #if defined(__SSE2__) #include #endif #if defined(__SSE__) #include #endif #if defined(__MMX__) #include #endif #undef __VEC__ #if defined(__VEC__) #include #undef bool #endif // Online Resources: // - http://www.jasondorie.com/ImageLib.zip // - http://homepage.hispeed.ch/rscheidegger/dri_experimental/s3tc_index.html // - http://www.sjbrown.co.uk/?article=dxt using namespace nv; #if defined(__SSE2__) && 0 // @@ TODO typedef __m128i VectorColor; inline static __m128i loadColor(Color32 c) { return ...; } inline static __m128i absoluteDifference(__m128i a, __m128i b) { return ...; } inline uint colorDistance(__m128i a, __m128i b) { return 0; } #elif defined(__MMX__) && 0 typedef __m64 VectorColor; inline static __m64 loadColor(Color32 c) { return _mm_unpacklo_pi8(_mm_cvtsi32_si64(c), _mm_setzero_si64()); } inline static __m64 absoluteDifference(__m64 a, __m64 b) { // = |a-b| or |b-a| return _mm_or_si64(_mm_subs_pu16(a, b), _mm_subs_pu16(b, a)); } inline uint colorDistance(__m64 a, __m64 b) { union { __m64 v; uint16 part[4]; } s; s.v = absoluteDifference(a, b); // @@ This is very slow! return s.part[0] + s.part[1] + s.part[2] + s.part[3]; } #define vectorEnd _mm_empty #elif defined(__VEC__) typedef vector signed int VectorColor; inline static vector signed int loadColor(Color32 c) { return (vector signed int) (c.r, c.g, c.b, c.a); } // Get the absolute distance between the given colors. inline static uint colorDistance(vector signed int c0, vector signed int c1) { int result; vector signed int v = vec_sums(vec_abs(vec_sub(c0, c1)), (vector signed int)0); vec_ste(vec_splat(v, 3), 0, &result); return result; } inline void vectorEnd() { } #else typedef Color32 VectorColor; inline static Color32 loadColor(Color32 c) { return c; } inline static Color32 premultiplyAlpha(Color32 c) { Color32 pm; pm.r = (c.r * c.a) >> 8; pm.g = (c.g * c.a) >> 8; pm.b = (c.b * c.a) >> 8; pm.a = c.a; return pm; } inline static uint sqr(uint s) { return s*s; } // Get the absolute distance between the given colors. inline static uint colorDistance(Color32 c0, Color32 c1) { return sqr(c0.r - c1.r) + sqr(c0.g - c1.g) + sqr(c0.b - c1.b); //return abs(c0.r - c1.r) + abs(c0.g - c1.g) + abs(c0.b - c1.b); } inline void vectorEnd() { } #endif inline static uint computeIndices(const ColorBlock & rgba, const Color32 palette[4]) { const VectorColor vcolor0 = loadColor(palette[0]); const VectorColor vcolor1 = loadColor(palette[1]); const VectorColor vcolor2 = loadColor(palette[2]); const VectorColor vcolor3 = loadColor(palette[3]); uint indices = 0; for(int i = 0; i < 16; i++) { const VectorColor vcolor = loadColor(rgba.color(i)); uint d0 = colorDistance(vcolor0, vcolor); uint d1 = colorDistance(vcolor1, vcolor); uint d2 = colorDistance(vcolor2, vcolor); uint d3 = colorDistance(vcolor3, vcolor); uint b0 = d0 > d3; uint b1 = d1 > d2; uint b2 = d0 > d2; uint b3 = d1 > d3; uint b4 = d2 > d3; uint x0 = b1 & b2; uint x1 = b0 & b3; uint x2 = b0 & b4; indices |= (x2 | ((x0 | x1) << 1)) << (2 * i); } vectorEnd(); return indices; } // Compressor that uses bounding box. void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT1 * block) { Color32 c0, c1; rgba.boundsRange(&c1, &c0); block->col0 = toColor16(c0); block->col1 = toColor16(c1); nvDebugCheck(block->col0.u > block->col1.u); // Use 4 color mode only. //if (block->col0.u < block->col1.u) { // swap(block->col0.u, block->col1.u); //} Color32 palette[4]; block->evaluatePalette4(palette); block->indices = computeIndices(rgba, palette); } // Encode DXT3 block. void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT3 * block) { compressBlock_BoundsRange(rgba, &block->color); compressBlock(rgba, &block->alpha); } // Encode DXT3 alpha block. void nv::compressBlock(const ColorBlock & rgba, AlphaBlockDXT3 * block) { block->alpha0 = rgba.color(0).a >> 4; block->alpha1 = rgba.color(1).a >> 4; block->alpha2 = rgba.color(2).a >> 4; block->alpha3 = rgba.color(3).a >> 4; block->alpha4 = rgba.color(4).a >> 4; block->alpha5 = rgba.color(5).a >> 4; block->alpha6 = rgba.color(6).a >> 4; block->alpha7 = rgba.color(7).a >> 4; block->alpha8 = rgba.color(8).a >> 4; block->alpha9 = rgba.color(9).a >> 4; block->alphaA = rgba.color(10).a >> 4; block->alphaB = rgba.color(11).a >> 4; block->alphaC = rgba.color(12).a >> 4; block->alphaD = rgba.color(13).a >> 4; block->alphaE = rgba.color(14).a >> 4; block->alphaF = rgba.color(15).a >> 4; } static uint computeAlphaIndices(const ColorBlock & rgba, AlphaBlockDXT5 * block) { uint8 alphas[8]; block->evaluatePalette(alphas); uint totalError = 0; for (uint i = 0; i < 16; i++) { uint8 alpha = rgba.color(i).a; uint besterror = 256*256; uint best = 8; for(uint p = 0; p < 8; p++) { int d = alphas[p] - alpha; uint error = d * d; if (error < besterror) { besterror = error; best = p; } } nvDebugCheck(best < 8); totalError += besterror; block->setIndex(i, best); } return totalError; } static uint computeAlphaError(const ColorBlock & rgba, const AlphaBlockDXT5 * block) { uint8 alphas[8]; block->evaluatePalette(alphas); uint totalError = 0; for (uint i = 0; i < 16; i++) { uint8 alpha = rgba.color(i).a; uint besterror = 256*256; uint best; for(uint p = 0; p < 8; p++) { int d = alphas[p] - alpha; uint error = d * d; if (error < besterror) { besterror = error; best = p; } } totalError += besterror; } return totalError; } void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT5 * block) { Color32 c0, c1; rgba.boundsRangeAlpha(&c1, &c0); block->color.col0 = toColor16(c0); block->color.col1 = toColor16(c1); nvDebugCheck(block->color.col0.u > block->color.col1.u); Color32 palette[4]; block->color.evaluatePalette4(palette); block->color.indices = computeIndices(rgba, palette); nvDebugCheck(c0.a <= c1.a); block->alpha.alpha0 = c0.a; block->alpha.alpha1 = c1.a; computeAlphaIndices(rgba, &block->alpha); } uint nv::compressBlock_BoundsRange(const ColorBlock & rgba, AlphaBlockDXT5 * block) { uint8 alpha0 = 0; uint8 alpha1 = 255; // Get min/max alpha. for (uint i = 0; i < 16; i++) { uint8 alpha = rgba.color(i).a; alpha0 = max(alpha0, alpha); alpha1 = min(alpha1, alpha); } alpha0 = alpha0 - (alpha0 - alpha1) / 32; alpha1 = alpha1 + (alpha0 - alpha1) / 32; AlphaBlockDXT5 block0; block0.alpha0 = alpha0; block0.alpha1 = alpha1; uint error0 = computeAlphaIndices(rgba, &block0); AlphaBlockDXT5 block1; block1.alpha0 = alpha1; block1.alpha1 = alpha0; uint error1 = computeAlphaIndices(rgba, &block1); if (error0 < error1) { *block = block0; return error0; } else { *block = block1; return error1; } } uint nv::compressBlock_BruteForce(const ColorBlock & rgba, AlphaBlockDXT5 * block) { uint8 mina = 255; uint8 maxa = 0; // Get min/max alpha. for (uint i = 0; i < 16; i++) { uint8 alpha = rgba.color(i).a; mina = min(mina, alpha); maxa = max(maxa, alpha); } block->alpha0 = maxa; block->alpha1 = mina; /*int centroidDist = 256; int centroid; // Get the closest to the centroid. for (uint i = 0; i < 16; i++) { uint8 alpha = rgba.color(i).a; int dist = abs(alpha - (maxa + mina) / 2); if (dist < centroidDist) { centroidDist = dist; centroid = alpha; } }*/ if (maxa - mina > 8) { int besterror = computeAlphaError(rgba, block); int besta0 = maxa; int besta1 = mina; for (int a0 = mina+9; a0 < maxa; a0++) { for (int a1 = mina; a1 < a0-8; a1++) //for (int a1 = mina; a1 < maxa; a1++) { //nvCheck(abs(a1-a0) > 8); //if (abs(a0 - a1) < 8) continue; //if ((maxa-a0) + (a1-mina) + min(abs(centroid-a0), abs(centroid-a1)) > besterror) if ((maxa-a0) + (a1-mina) > besterror) continue; block->alpha0 = a0; block->alpha1 = a1; int error = computeAlphaError(rgba, block); if (error < besterror) { besterror = error; besta0 = a0; besta1 = a1; } } } block->alpha0 = besta0; block->alpha1 = besta1; } return computeAlphaIndices(rgba, block); }