457 lines
9.8 KiB
C++
457 lines
9.8 KiB
C++
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person
|
|
// obtaining a copy of this software and associated documentation
|
|
// files (the "Software"), to deal in the Software without
|
|
// restriction, including without limitation the rights to use,
|
|
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
// copies of the Software, and to permit persons to whom the
|
|
// Software is furnished to do so, subject to the following
|
|
// conditions:
|
|
//
|
|
// The above copyright notice and this permission notice shall be
|
|
// included in all copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
// OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
#include <nvmath/Color.h>
|
|
#include <nvimage/ColorBlock.h>
|
|
#include <nvimage/BlockDXT.h>
|
|
|
|
#include "FastCompressDXT.h"
|
|
|
|
#if defined(__SSE2__)
|
|
#include <emmintrin.h>
|
|
#endif
|
|
|
|
#if defined(__SSE__)
|
|
#include <xmmintrin.h>
|
|
#endif
|
|
|
|
#if defined(__MMX__)
|
|
#include <mmintrin.h>
|
|
#endif
|
|
|
|
#undef __VEC__
|
|
#if defined(__VEC__)
|
|
#include <altivec.h>
|
|
#undef bool
|
|
#endif
|
|
// Online Resources:
|
|
// - http://www.jasondorie.com/ImageLib.zip
|
|
// - http://homepage.hispeed.ch/rscheidegger/dri_experimental/s3tc_index.html
|
|
// - http://www.sjbrown.co.uk/?article=dxt
|
|
|
|
using namespace nv;
|
|
|
|
|
|
#if defined(__SSE2__) && 0
|
|
|
|
// @@ TODO
|
|
|
|
typedef __m128i VectorColor;
|
|
|
|
inline static __m128i loadColor(Color32 c)
|
|
{
|
|
return ...;
|
|
}
|
|
|
|
inline static __m128i absoluteDifference(__m128i a, __m128i b)
|
|
{
|
|
return ...;
|
|
}
|
|
|
|
inline uint colorDistance(__m128i a, __m128i b)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
#elif defined(__MMX__) && 0
|
|
|
|
typedef __m64 VectorColor;
|
|
|
|
inline static __m64 loadColor(Color32 c)
|
|
{
|
|
return _mm_unpacklo_pi8(_mm_cvtsi32_si64(c), _mm_setzero_si64());
|
|
}
|
|
|
|
inline static __m64 absoluteDifference(__m64 a, __m64 b)
|
|
{
|
|
// = |a-b| or |b-a|
|
|
return _mm_or_si64(_mm_subs_pu16(a, b), _mm_subs_pu16(b, a));
|
|
}
|
|
|
|
inline uint colorDistance(__m64 a, __m64 b)
|
|
{
|
|
union {
|
|
__m64 v;
|
|
uint16 part[4];
|
|
} s;
|
|
|
|
s.v = absoluteDifference(a, b);
|
|
|
|
// @@ This is very slow!
|
|
return s.part[0] + s.part[1] + s.part[2] + s.part[3];
|
|
}
|
|
|
|
#define vectorEnd _mm_empty
|
|
|
|
#elif defined(__VEC__)
|
|
|
|
typedef vector signed int VectorColor;
|
|
|
|
inline static vector signed int loadColor(Color32 c)
|
|
{
|
|
return (vector signed int) (c.r, c.g, c.b, c.a);
|
|
}
|
|
|
|
// Get the absolute distance between the given colors.
|
|
inline static uint colorDistance(vector signed int c0, vector signed int c1)
|
|
{
|
|
int result;
|
|
vector signed int v = vec_sums(vec_abs(vec_sub(c0, c1)), (vector signed int)0);
|
|
vec_ste(vec_splat(v, 3), 0, &result);
|
|
return result;
|
|
}
|
|
|
|
inline void vectorEnd()
|
|
{
|
|
}
|
|
|
|
#else
|
|
|
|
typedef Color32 VectorColor;
|
|
|
|
inline static Color32 loadColor(Color32 c)
|
|
{
|
|
return c;
|
|
}
|
|
|
|
inline static Color32 premultiplyAlpha(Color32 c)
|
|
{
|
|
Color32 pm;
|
|
pm.r = (c.r * c.a) >> 8;
|
|
pm.g = (c.g * c.a) >> 8;
|
|
pm.b = (c.b * c.a) >> 8;
|
|
pm.a = c.a;
|
|
return pm;
|
|
}
|
|
|
|
inline static uint sqr(uint s)
|
|
{
|
|
return s*s;
|
|
}
|
|
|
|
// Get the absolute distance between the given colors.
|
|
inline static uint colorDistance(Color32 c0, Color32 c1)
|
|
{
|
|
return sqr(c0.r - c1.r) + sqr(c0.g - c1.g) + sqr(c0.b - c1.b);
|
|
//return abs(c0.r - c1.r) + abs(c0.g - c1.g) + abs(c0.b - c1.b);
|
|
}
|
|
|
|
inline void vectorEnd()
|
|
{
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
inline static uint computeIndices(const ColorBlock & rgba, const Color32 palette[4])
|
|
{
|
|
const VectorColor vcolor0 = loadColor(palette[0]);
|
|
const VectorColor vcolor1 = loadColor(palette[1]);
|
|
const VectorColor vcolor2 = loadColor(palette[2]);
|
|
const VectorColor vcolor3 = loadColor(palette[3]);
|
|
|
|
uint indices = 0;
|
|
for(int i = 0; i < 16; i++) {
|
|
const VectorColor vcolor = loadColor(rgba.color(i));
|
|
|
|
uint d0 = colorDistance(vcolor0, vcolor);
|
|
uint d1 = colorDistance(vcolor1, vcolor);
|
|
uint d2 = colorDistance(vcolor2, vcolor);
|
|
uint d3 = colorDistance(vcolor3, vcolor);
|
|
|
|
uint b0 = d0 > d3;
|
|
uint b1 = d1 > d2;
|
|
uint b2 = d0 > d2;
|
|
uint b3 = d1 > d3;
|
|
uint b4 = d2 > d3;
|
|
|
|
uint x0 = b1 & b2;
|
|
uint x1 = b0 & b3;
|
|
uint x2 = b0 & b4;
|
|
|
|
indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
|
|
}
|
|
|
|
vectorEnd();
|
|
return indices;
|
|
}
|
|
|
|
|
|
// Compressor that uses bounding box.
|
|
void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT1 * block)
|
|
{
|
|
Color32 c0, c1;
|
|
rgba.boundsRange(&c1, &c0);
|
|
|
|
block->col0 = toColor16(c0);
|
|
block->col1 = toColor16(c1);
|
|
|
|
nvDebugCheck(block->col0.u > block->col1.u);
|
|
|
|
// Use 4 color mode only.
|
|
//if (block->col0.u < block->col1.u) {
|
|
// swap(block->col0.u, block->col1.u);
|
|
//}
|
|
|
|
Color32 palette[4];
|
|
block->evaluatePalette4(palette);
|
|
|
|
block->indices = computeIndices(rgba, palette);
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Encode DXT3 block.
|
|
void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT3 * block)
|
|
{
|
|
compressBlock_BoundsRange(rgba, &block->color);
|
|
compressBlock(rgba, &block->alpha);
|
|
}
|
|
|
|
// Encode DXT3 alpha block.
|
|
void nv::compressBlock(const ColorBlock & rgba, AlphaBlockDXT3 * block)
|
|
{
|
|
block->alpha0 = rgba.color(0).a >> 4;
|
|
block->alpha1 = rgba.color(1).a >> 4;
|
|
block->alpha2 = rgba.color(2).a >> 4;
|
|
block->alpha3 = rgba.color(3).a >> 4;
|
|
block->alpha4 = rgba.color(4).a >> 4;
|
|
block->alpha5 = rgba.color(5).a >> 4;
|
|
block->alpha6 = rgba.color(6).a >> 4;
|
|
block->alpha7 = rgba.color(7).a >> 4;
|
|
block->alpha8 = rgba.color(8).a >> 4;
|
|
block->alpha9 = rgba.color(9).a >> 4;
|
|
block->alphaA = rgba.color(10).a >> 4;
|
|
block->alphaB = rgba.color(11).a >> 4;
|
|
block->alphaC = rgba.color(12).a >> 4;
|
|
block->alphaD = rgba.color(13).a >> 4;
|
|
block->alphaE = rgba.color(14).a >> 4;
|
|
block->alphaF = rgba.color(15).a >> 4;
|
|
}
|
|
|
|
|
|
|
|
static uint computeAlphaIndices(const ColorBlock & rgba, AlphaBlockDXT5 * block)
|
|
{
|
|
uint8 alphas[8];
|
|
block->evaluatePalette(alphas);
|
|
|
|
uint totalError = 0;
|
|
|
|
for (uint i = 0; i < 16; i++)
|
|
{
|
|
uint8 alpha = rgba.color(i).a;
|
|
|
|
uint besterror = 256*256;
|
|
uint best = 8;
|
|
for(uint p = 0; p < 8; p++)
|
|
{
|
|
int d = alphas[p] - alpha;
|
|
uint error = d * d;
|
|
|
|
if (error < besterror)
|
|
{
|
|
besterror = error;
|
|
best = p;
|
|
}
|
|
}
|
|
nvDebugCheck(best < 8);
|
|
|
|
totalError += besterror;
|
|
block->setIndex(i, best);
|
|
}
|
|
|
|
return totalError;
|
|
}
|
|
|
|
static uint computeAlphaError(const ColorBlock & rgba, const AlphaBlockDXT5 * block)
|
|
{
|
|
uint8 alphas[8];
|
|
block->evaluatePalette(alphas);
|
|
|
|
uint totalError = 0;
|
|
|
|
for (uint i = 0; i < 16; i++)
|
|
{
|
|
uint8 alpha = rgba.color(i).a;
|
|
|
|
uint besterror = 256*256;
|
|
uint best;
|
|
for(uint p = 0; p < 8; p++)
|
|
{
|
|
int d = alphas[p] - alpha;
|
|
uint error = d * d;
|
|
|
|
if (error < besterror)
|
|
{
|
|
besterror = error;
|
|
best = p;
|
|
}
|
|
}
|
|
|
|
totalError += besterror;
|
|
}
|
|
|
|
return totalError;
|
|
}
|
|
|
|
|
|
void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT5 * block)
|
|
{
|
|
Color32 c0, c1;
|
|
rgba.boundsRangeAlpha(&c1, &c0);
|
|
|
|
block->color.col0 = toColor16(c0);
|
|
block->color.col1 = toColor16(c1);
|
|
|
|
nvDebugCheck(block->color.col0.u > block->color.col1.u);
|
|
|
|
Color32 palette[4];
|
|
block->color.evaluatePalette4(palette);
|
|
|
|
block->color.indices = computeIndices(rgba, palette);
|
|
|
|
nvDebugCheck(c0.a <= c1.a);
|
|
|
|
block->alpha.alpha0 = c0.a;
|
|
block->alpha.alpha1 = c1.a;
|
|
|
|
computeAlphaIndices(rgba, &block->alpha);
|
|
}
|
|
|
|
|
|
uint nv::compressBlock_BoundsRange(const ColorBlock & rgba, AlphaBlockDXT5 * block)
|
|
{
|
|
uint8 alpha0 = 0;
|
|
uint8 alpha1 = 255;
|
|
|
|
// Get min/max alpha.
|
|
for (uint i = 0; i < 16; i++)
|
|
{
|
|
uint8 alpha = rgba.color(i).a;
|
|
alpha0 = max(alpha0, alpha);
|
|
alpha1 = min(alpha1, alpha);
|
|
}
|
|
|
|
alpha0 = alpha0 - (alpha0 - alpha1) / 32;
|
|
alpha1 = alpha1 + (alpha0 - alpha1) / 32;
|
|
|
|
AlphaBlockDXT5 block0;
|
|
block0.alpha0 = alpha0;
|
|
block0.alpha1 = alpha1;
|
|
uint error0 = computeAlphaIndices(rgba, &block0);
|
|
|
|
AlphaBlockDXT5 block1;
|
|
block1.alpha0 = alpha1;
|
|
block1.alpha1 = alpha0;
|
|
uint error1 = computeAlphaIndices(rgba, &block1);
|
|
|
|
if (error0 < error1)
|
|
{
|
|
*block = block0;
|
|
return error0;
|
|
}
|
|
else
|
|
{
|
|
*block = block1;
|
|
return error1;
|
|
}
|
|
}
|
|
|
|
uint nv::compressBlock_BruteForce(const ColorBlock & rgba, AlphaBlockDXT5 * block)
|
|
{
|
|
uint8 mina = 255;
|
|
uint8 maxa = 0;
|
|
|
|
// Get min/max alpha.
|
|
for (uint i = 0; i < 16; i++)
|
|
{
|
|
uint8 alpha = rgba.color(i).a;
|
|
mina = min(mina, alpha);
|
|
maxa = max(maxa, alpha);
|
|
}
|
|
|
|
block->alpha0 = maxa;
|
|
block->alpha1 = mina;
|
|
|
|
/*int centroidDist = 256;
|
|
int centroid;
|
|
|
|
// Get the closest to the centroid.
|
|
for (uint i = 0; i < 16; i++)
|
|
{
|
|
uint8 alpha = rgba.color(i).a;
|
|
int dist = abs(alpha - (maxa + mina) / 2);
|
|
if (dist < centroidDist)
|
|
{
|
|
centroidDist = dist;
|
|
centroid = alpha;
|
|
}
|
|
}*/
|
|
|
|
if (maxa - mina > 8)
|
|
{
|
|
int besterror = computeAlphaError(rgba, block);
|
|
int besta0 = maxa;
|
|
int besta1 = mina;
|
|
|
|
for (int a0 = mina+9; a0 < maxa; a0++)
|
|
{
|
|
for (int a1 = mina; a1 < a0-8; a1++)
|
|
//for (int a1 = mina; a1 < maxa; a1++)
|
|
{
|
|
//nvCheck(abs(a1-a0) > 8);
|
|
|
|
//if (abs(a0 - a1) < 8) continue;
|
|
//if ((maxa-a0) + (a1-mina) + min(abs(centroid-a0), abs(centroid-a1)) > besterror)
|
|
if ((maxa-a0) + (a1-mina) > besterror)
|
|
continue;
|
|
|
|
block->alpha0 = a0;
|
|
block->alpha1 = a1;
|
|
int error = computeAlphaError(rgba, block);
|
|
|
|
if (error < besterror)
|
|
{
|
|
besterror = error;
|
|
besta0 = a0;
|
|
besta1 = a1;
|
|
}
|
|
}
|
|
}
|
|
|
|
block->alpha0 = besta0;
|
|
block->alpha1 = besta1;
|
|
}
|
|
|
|
return computeAlphaIndices(rgba, block);
|
|
}
|
|
|
|
|
|
|