Cleanup simple compressors. Move code from FastCompress to QuickCompress.

This commit is contained in:
castano 2008-03-20 01:39:02 +00:00
parent cc8656f12b
commit 065c5f0689
5 changed files with 280 additions and 914 deletions

View File

@ -97,7 +97,6 @@ void nv::fastCompressDXT1a(const Image * image, const OutputOptions::Private & o
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
QuickCompress::compressDXT1a(rgba, &block);
if (outputOptions.outputHandler != NULL) {
@ -119,7 +118,7 @@ void nv::fastCompressDXT3(const Image * image, const nvtt::OutputOptions::Privat
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
compressBlock_BoundsRange(rgba, &block);
QuickCompress::compressDXT3(rgba, &block);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -140,7 +139,8 @@ void nv::fastCompressDXT5(const Image * image, const nvtt::OutputOptions::Privat
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
compressBlock_BoundsRange(rgba, &block);
//QuickCompress::compressDXT5(rgba, &block); // @@ Use fast version!!
nv::compressBlock_BoundsRange(rgba, &block);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -164,8 +164,9 @@ void nv::fastCompressDXT5n(const Image * image, const nvtt::OutputOptions::Priva
// copy X coordinate to alpha channel and Y coordinate to green channel.
rgba.swizzleDXT5n();
compressBlock_BoundsRange(rgba, &block);
//QuickCompress::compressDXT5(rgba, &block); // @@ Use fast version!!
nv::compressBlock_BoundsRange(rgba, &block);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -286,7 +287,7 @@ void nv::compressDXT3(const Image * image, const OutputOptions::Private & output
rgba.init(image, x, y);
// Compress explicit alpha.
compressBlock(rgba, &block.alpha);
QuickCompress::compressDXT3A(rgba, &block.alpha);
// Compress color.
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
@ -317,14 +318,13 @@ void nv::compressDXT5(const Image * image, const OutputOptions::Private & output
rgba.init(image, x, y);
// Compress alpha.
uint error;
if (compressionOptions.quality == Quality_Highest)
{
error = compressBlock_BruteForce(rgba, &block.alpha);
compressBlock_BruteForce(rgba, &block.alpha);
}
else
{
error = compressBlock_Iterative(rgba, &block.alpha);
QuickCompress::compressDXT5A(rgba, &block.alpha);
}
// Compress color.
@ -359,10 +359,13 @@ void nv::compressDXT5n(const Image * image, const OutputOptions::Private & outpu
rgba.swizzleDXT5n();
// Compress X.
uint error = compressBlock_Iterative(rgba, &block.alpha);
if (compressionOptions.quality == Quality_Highest)
{
error = compressBlock_BruteForce(rgba, &block.alpha);
compressBlock_BruteForce(rgba, &block.alpha);
}
else
{
QuickCompress::compressDXT5A(rgba, &block.alpha);
}
// Compress Y.
@ -384,23 +387,19 @@ void nv::compressBC4(const Image * image, const nvtt::OutputOptions::Private & o
ColorBlock rgba;
AlphaBlockDXT5 block;
uint totalError = 0;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
//error = compressBlock_BoundsRange(rgba, &block);
uint error = compressBlock_Iterative(rgba, &block);
if (compressionOptions.quality == Quality_Highest)
{
// Try brute force algorithm.
error = compressBlock_BruteForce(rgba, &block);
compressBlock_BruteForce(rgba, &block);
}
else
{
QuickCompress::compressDXT5A(rgba, &block);
}
totalError += error;
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -429,18 +428,15 @@ void nv::compressBC5(const Image * image, const nvtt::OutputOptions::Private & o
ycolor.init(image, x, y);
ycolor.splatY();
// @@ Compute normal error, instead of separate xy errors.
uint xerror, yerror;
if (compressionOptions.quality == Quality_Highest)
{
xerror = compressBlock_BruteForce(xcolor, &block.x);
yerror = compressBlock_BruteForce(ycolor, &block.y);
compressBlock_BruteForce(xcolor, &block.x);
compressBlock_BruteForce(ycolor, &block.y);
}
else
{
xerror = compressBlock_Iterative(xcolor, &block.x);
yerror = compressBlock_Iterative(ycolor, &block.y);
QuickCompress::compressDXT5A(xcolor, &block.x);
QuickCompress::compressDXT5A(ycolor, &block.y);
}
if (outputOptions.outputHandler != NULL) {

View File

@ -163,32 +163,6 @@ inline void vectorEnd()
#endif
inline static uint paletteError(const ColorBlock & rgba, Color32 palette[4])
{
uint error = 0;
const VectorColor vcolor0 = loadColor(palette[0]);
const VectorColor vcolor1 = loadColor(palette[1]);
const VectorColor vcolor2 = loadColor(palette[2]);
const VectorColor vcolor3 = loadColor(palette[3]);
for(uint i = 0; i < 16; i++) {
const VectorColor vcolor = loadColor(rgba.color(i));
uint d0 = colorDistance(vcolor, vcolor0);
uint d1 = colorDistance(vcolor, vcolor1);
uint d2 = colorDistance(vcolor, vcolor2);
uint d3 = colorDistance(vcolor, vcolor3);
error += min(min(d0, d1), min(d2, d3));
}
vectorEnd();
return error;
}
inline static uint computeIndices(const ColorBlock & rgba, const Color32 palette[4])
{
const VectorColor vcolor0 = loadColor(palette[0]);
@ -222,91 +196,6 @@ inline static uint computeIndices(const ColorBlock & rgba, const Color32 palette
return indices;
}
inline static uint computeIndicesAlpha(const ColorBlock & rgba, const Color32 palette[4])
{
const VectorColor vcolor0 = loadColor(palette[0]);
const VectorColor vcolor1 = loadColor(palette[1]);
const VectorColor vcolor2 = loadColor(palette[2]);
const VectorColor vcolor3 = loadColor(palette[3]);
uint indices = 0;
for(int i = 0; i < 16; i++) {
const VectorColor vcolor = premultiplyAlpha(loadColor(rgba.color(i)));
uint d0 = colorDistance(vcolor0, vcolor);
uint d1 = colorDistance(vcolor1, vcolor);
uint d2 = colorDistance(vcolor2, vcolor);
uint d3 = colorDistance(vcolor3, vcolor);
uint b0 = d0 > d3;
uint b1 = d1 > d2;
uint b2 = d0 > d2;
uint b3 = d1 > d3;
uint b4 = d2 > d3;
uint x0 = b1 & b2;
uint x1 = b0 & b3;
uint x2 = b0 & b4;
indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
}
vectorEnd();
return indices;
}
inline static Color16 saturate16(int r, int g, int b)
{
Color16 c;
c.r = clamp(0, 31, r);
c.g = clamp(0, 63, g);
c.b = clamp(0, 31, b);
return c;
}
// Compressor that uses the luminance axis.
void nv::compressBlock_LuminanceAxis(const ColorBlock & rgba, BlockDXT1 * block)
{
Color32 c0, c1;
rgba.luminanceRange(&c0, &c1);
block->col0 = toColor16(c0);
block->col1 = toColor16(c1);
// Use 4 color mode only.
if (block->col0.u < block->col1.u) {
swap(block->col0.u, block->col1.u);
}
Color32 palette[4];
block->evaluatePalette4(palette);
block->indices = computeIndices(rgba, palette);
}
// Compressor that uses diameter axis.
void nv::compressBlock_DiameterAxis(const ColorBlock & rgba, BlockDXT1 * block)
{
Color32 c0, c1;
rgba.diameterRange(&c0, &c1);
block->col0 = toColor16(c0);
block->col1 = toColor16(c1);
// Use 4 color mode only.
if (block->col0.u < block->col1.u) {
swap(block->col0.u, block->col1.u);
}
Color32 palette[4];
block->evaluatePalette4(palette);
block->indices = computeIndices(rgba, palette);
}
// Compressor that uses bounding box.
void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT1 * block)
@ -330,513 +219,12 @@ void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT1 * block)
block->indices = computeIndices(rgba, palette);
}
// Compressor that uses bounding box and takes alpha into account.
void nv::compressBlock_BoundsRangeAlpha(const ColorBlock & rgba, BlockDXT1 * block)
{
Color32 c0, c1;
rgba.boundsRange(&c1, &c0);
if (rgba.hasAlpha())
{
block->col0 = toColor16(c1);
block->col1 = toColor16(c0);
}
else
{
block->col0 = toColor16(c0);
block->col1 = toColor16(c1);
}
Color32 palette[4];
block->evaluatePalette(palette);
block->indices = computeIndicesAlpha(rgba, palette);
}
// Compressor that tests all input color pairs.
void nv::compressBlock_TestAllPairs(const ColorBlock & rgba, BlockDXT1 * block)
{
uint best_error = uint(-1);
Color16 best_col0, best_col1;
Color32 palette[4];
// Test all color pairs.
for(uint i = 0; i < 16; i++) {
block->col0 = toColor16(rgba.color(i));
for(uint ii = 0; ii < 16; ii++) {
if( i != ii ) {
block->col1 = toColor16(rgba.color(ii));
block->evaluatePalette(palette);
const uint error = paletteError(rgba, palette);
if(error < best_error) {
best_error = error;
best_col0 = block->col0;
best_col1 = block->col1;
}
}
}
}
block->col0 = best_col0;
block->col1 = best_col1;
block->evaluatePalette(palette);
block->indices = computeIndices(rgba, palette);
}
// Improve palette iteratively using alternate 3d search as suggested by Dave Moore.
void nv::refineSolution_3dSearch(const ColorBlock & rgba, BlockDXT1 * block)
{
Color32 palette[4];
block->evaluatePalette(palette);
uint best_error = paletteError(rgba, palette);
Color16 best_col0 = block->col0;
Color16 best_col1 = block->col1;
const int W = 2;
while(true) {
bool changed = false;
const int r0 = best_col0.r;
const int g0 = best_col0.g;
const int b0 = best_col0.b;
for(int z = -W; z <= W; z++) {
for(int y = -W; y <= W; y++) {
for(int x = -W; x <= W; x++) {
block->col0 = saturate16(r0 + x, g0 + y, b0 + z);
block->evaluatePalette(palette);
const uint error = paletteError(rgba, palette);
if(error < best_error) {
best_error = error;
best_col0 = block->col0;
best_col1 = block->col1;
changed = true;
}
}
}
}
const int r1 = best_col1.r;
const int g1 = best_col1.g;
const int b1 = best_col1.b;
for(int z = -W; z <= W; z++) {
for(int y = -W; y <= W; y++) {
for(int x = -W; x <= W; x++) {
block->col1 = saturate16(r1 + x, g1 + y, b1 + z);
block->evaluatePalette(palette);
const uint error = paletteError(rgba, palette);
if(error < best_error) {
best_error = error;
best_col0 = block->col0;
best_col1 = block->col1;
changed = true;
}
}
}
}
if( !changed ) {
// Stop at local minima.
break;
}
}
block->col0 = best_col0;
block->col1 = best_col1;
block->evaluatePalette(palette);
block->indices = computeIndices(rgba, palette);
}
// Improve the palette iteratively using 6d search as suggested by Charles Bloom.
void nv::refineSolution_6dSearch(const ColorBlock & rgba, BlockDXT1 * block)
{
Color32 palette[4];
block->evaluatePalette(palette);
uint best_error = paletteError(rgba, palette);
Color16 best_col0 = block->col0;
Color16 best_col1 = block->col1;
const int W = 1;
while(true) {
bool changed = false;
const int r0 = best_col0.r;
const int g0 = best_col0.g;
const int b0 = best_col0.b;
const int r1 = best_col1.r;
const int g1 = best_col1.g;
const int b1 = best_col1.b;
for(int z0 = -W; z0 <= W; z0++) {
for(int y0 = -W; y0 <= W; y0++) {
for(int x0 = -W; x0 <= W; x0++) {
for(int z1 = -W; z1 <= W; z1++) {
for(int y1 = -W; y1 <= W; y1++) {
for(int x1 = -W; x1 <= W; x1++) {
block->col0 = saturate16(r0 + x0, g0 + y0, b0 + z0);
block->col1 = saturate16(r1 + x1, g1 + y1, b1 + z1);
block->evaluatePalette(palette);
const uint error = paletteError(rgba, palette);
if(error < best_error) {
best_error = error;
best_col0 = block->col0;
best_col1 = block->col1;
changed = true;
}
}
}
}
}
}
}
if( !changed ) {
// Stop at local minima.
break;
}
}
block->col0 = best_col0;
block->col1 = best_col1;
block->evaluatePalette(palette);
block->indices = computeIndices(rgba, palette);
}
// Improve the palette iteratively using alternate 1d search as suggested by Walt Donovan.
void nv::refineSolution_1dSearch(const ColorBlock & rgba, BlockDXT1 * block)
{
Color32 palette[4];
block->evaluatePalette(palette);
uint best_error = paletteError(rgba, palette);
Color16 best_col0 = block->col0;
Color16 best_col1 = block->col1;
const int W = 4;
while(true) {
bool changed = false;
const int r0 = best_col0.r;
const int g0 = best_col0.g;
const int b0 = best_col0.b;
for(int z = -W; z <= W; z++) {
block->col0.b = clamp(b0 + z, 0, 31);
block->evaluatePalette(palette);
const uint error = paletteError(rgba, palette);
if(error < best_error) {
best_error = error;
best_col0 = block->col0;
best_col1 = block->col1;
changed = true;
}
}
for(int y = -W; y <= W; y++) {
block->col0.g = clamp(g0 + y, 0, 63);
block->evaluatePalette(palette);
const uint error = paletteError(rgba, palette);
if(error < best_error) {
best_error = error;
best_col0 = block->col0;
best_col1 = block->col1;
changed = true;
}
}
for(int x = -W; x <= W; x++) {
block->col0.r = clamp(r0 + x, 0, 31);
block->evaluatePalette(palette);
const uint error = paletteError(rgba, palette);
if(error < best_error) {
best_error = error;
best_col0 = block->col0;
best_col1 = block->col1;
changed = true;
}
}
const int r1 = best_col1.r;
const int g1 = best_col1.g;
const int b1 = best_col1.b;
for(int z = -W; z <= W; z++) {
block->col1.b = clamp(b1 + z, 0, 31);
block->evaluatePalette(palette);
const uint error = paletteError(rgba, palette);
if(error < best_error) {
best_error = error;
best_col0 = block->col0;
best_col1 = block->col1;
changed = true;
}
}
for(int y = -W; y <= W; y++) {
block->col1.g = clamp(g1 + y, 0, 63);
block->evaluatePalette(palette);
const uint error = paletteError(rgba, palette);
if(error < best_error) {
best_error = error;
best_col0 = block->col0;
best_col1 = block->col1;
changed = true;
}
}
for(int x = -W; x <= W; x++) {
block->col1.r = clamp(r1 + x, 0, 31);
block->evaluatePalette(palette);
const uint error = paletteError(rgba, palette);
if(error < best_error) {
best_error = error;
best_col0 = block->col0;
best_col1 = block->col1;
changed = true;
}
}
if( !changed ) {
// Stop at local minima.
break;
}
}
block->col0 = best_col0;
block->col1 = best_col1;
block->evaluatePalette(palette);
block->indices = computeIndices(rgba, palette);
}
static uint computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block)
{
Color32 colors[4];
block->evaluatePalette4(colors);
uint totalError = 0;
for (uint i = 0; i < 16; i++)
{
uint8 green = rgba.color(i).g;
uint besterror = 256*256;
uint best;
for(uint p = 0; p < 4; p++)
{
int d = colors[p].g - green;
uint error = d * d;
if (error < besterror)
{
besterror = error;
best = p;
}
}
totalError += besterror;
}
return totalError;
}
// Brute force compressor for DXT5n
void nv::compressGreenBlock_BruteForce(const ColorBlock & rgba, BlockDXT1 * block)
{
nvDebugCheck(block != NULL);
uint8 ming = 63;
uint8 maxg = 0;
// Get min/max green.
for (uint i = 0; i < 16; i++)
{
uint8 green = rgba.color(i).g >> 2;
ming = min(ming, green);
maxg = max(maxg, green);
}
block->col0.r = 31;
block->col1.r = 31;
block->col0.g = maxg;
block->col1.g = ming;
block->col0.b = 0;
block->col1.b = 0;
if (maxg - ming > 4)
{
int besterror = computeGreenError(rgba, block);
int bestg0 = maxg;
int bestg1 = ming;
for (int g0 = ming+5; g0 < maxg; g0++)
{
for (int g1 = ming; g1 < g0-4; g1++)
{
if ((maxg-g0) + (g1-ming) > besterror)
continue;
block->col0.g = g0;
block->col1.g = g1;
int error = computeGreenError(rgba, block);
if (error < besterror)
{
besterror = error;
bestg0 = g0;
bestg1 = g1;
}
}
}
block->col0.g = bestg0;
block->col1.g = bestg1;
}
Color32 palette[4];
block->evaluatePalette(palette);
block->indices = computeIndices(rgba, palette);
}
uint nv::blockError(const ColorBlock & rgba, const BlockDXT1 & block)
{
Color32 palette[4];
block.evaluatePalette(palette);
VectorColor vcolors[4];
vcolors[0] = loadColor(palette[0]);
vcolors[1] = loadColor(palette[1]);
vcolors[2] = loadColor(palette[2]);
vcolors[3] = loadColor(palette[3]);
uint error = 0;
for(uint i = 0; i < 16; i++) {
const VectorColor vcolor = loadColor(rgba.color(i));
int idx = (block.indices >> (2 * i)) & 3;
uint d = colorDistance(vcolor, vcolors[idx]);
error += d;
}
//nvDebugCheck(error == paletteError(rgba, palette));
vectorEnd();
return error;
}
uint nv::blockError(const ColorBlock & rgba, const AlphaBlockDXT5 & block)
{
uint8 palette[8];
block.evaluatePalette(palette);
uint8 indices[16];
block.indices(indices);
uint error = 0;
for(uint i = 0; i < 16; i++) {
int d = palette[indices[i]] - rgba.color(i).a;
error += uint(d * d);
}
return error;
}
void nv::optimizeEndPoints(const ColorBlock & rgba, BlockDXT1 * block)
{
float alpha2_sum = 0.0f;
float beta2_sum = 0.0f;
float alphabeta_sum = 0.0f;
Vector3 alphax_sum(zero);
Vector3 betax_sum(zero);
for( int i = 0; i < 16; ++i )
{
const uint bits = block->indices >> (2 * i);
float beta = float(bits & 1);
if (bits & 2) beta = (1 + beta) / 3.0f;
float alpha = 1.0f - beta;
const Vector3 x = toVector4(rgba.color(i)).xyz();
alpha2_sum += alpha * alpha;
beta2_sum += beta * beta;
alphabeta_sum += alpha * beta;
alphax_sum += alpha * x;
betax_sum += beta * x;
}
float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
Vector3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
Vector3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
Vector3 zero(0, 0, 0);
Vector3 one(1, 1, 1);
a = min(one, max(zero, a));
b = min(one, max(zero, b));
BlockDXT1 B;
// Round a,b to 565.
B.col0.r = uint16(a.x() * 31);
B.col0.g = uint16(a.y() * 63);
B.col0.b = uint16(a.z() * 31);
B.col1.r = uint16(b.x() * 31);
B.col1.g = uint16(b.y() * 63);
B.col1.b = uint16(b.z() * 31);
B.indices = block->indices;
// Force 4 color mode.
if (B.col0.u < B.col1.u)
{
swap(B.col0.u, B.col1.u);
B.indices ^= 0x55555555;
}
else if (B.col0.u == B.col1.u)
{
block->indices = 0;
}
if (blockError(rgba, B) < blockError(rgba, *block))
{
*block = B;
}
}
// Encode DXT3 block.
void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT3 * block)
@ -1064,161 +452,5 @@ uint nv::compressBlock_BruteForce(const ColorBlock & rgba, AlphaBlockDXT5 * bloc
return computeAlphaIndices(rgba, block);
}
static void optimizeAlpha8(const ColorBlock & rgba, AlphaBlockDXT5 * block)
{
float alpha2_sum = 0;
float beta2_sum = 0;
float alphabeta_sum = 0;
float alphax_sum = 0;
float betax_sum = 0;
for (int i = 0; i < 16; i++)
{
uint idx = block->index(i);
float alpha;
if (idx < 2) alpha = 1.0f - idx;
else alpha = (8.0f - idx) / 7.0f;
float beta = 1 - alpha;
alpha2_sum += alpha * alpha;
beta2_sum += beta * beta;
alphabeta_sum += alpha * beta;
alphax_sum += alpha * rgba.color(i).a;
betax_sum += beta * rgba.color(i).a;
}
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
uint alpha0 = uint(min(max(a, 0.0f), 255.0f));
uint alpha1 = uint(min(max(b, 0.0f), 255.0f));
if (alpha0 < alpha1)
{
swap(alpha0, alpha1);
// Flip indices:
for (int i = 0; i < 16; i++)
{
uint idx = block->index(i);
if (idx < 2) block->setIndex(i, 1 - idx);
else block->setIndex(i, 9 - idx);
}
}
else if (alpha0 == alpha1)
{
for (int i = 0; i < 16; i++)
{
block->setIndex(i, 0);
}
}
block->alpha0 = alpha0;
block->alpha1 = alpha1;
}
static void optimizeAlpha6(const ColorBlock & rgba, AlphaBlockDXT5 * block)
{
float alpha2_sum = 0;
float beta2_sum = 0;
float alphabeta_sum = 0;
float alphax_sum = 0;
float betax_sum = 0;
for (int i = 0; i < 16; i++)
{
uint8 x = rgba.color(i).a;
if (x == 0 || x == 255) continue;
uint bits = block->index(i);
if (bits == 6 || bits == 7) continue;
float alpha;
if (bits == 0) alpha = 1.0f;
else if (bits == 1) alpha = 0.0f;
else alpha = (6.0f - block->index(i)) / 5.0f;
float beta = 1 - alpha;
alpha2_sum += alpha * alpha;
beta2_sum += beta * beta;
alphabeta_sum += alpha * beta;
alphax_sum += alpha * x;
betax_sum += beta * x;
}
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
uint alpha0 = uint(min(max(a, 0.0f), 255.0f));
uint alpha1 = uint(min(max(b, 0.0f), 255.0f));
if (alpha0 > alpha1)
{
swap(alpha0, alpha1);
}
block->alpha0 = alpha0;
block->alpha1 = alpha1;
}
static bool sameIndices(const AlphaBlockDXT5 & block0, const AlphaBlockDXT5 & block1)
{
const uint64 mask = ~uint64(0xFFFF);
return (block0.u | mask) == (block1.u | mask);
}
uint nv::compressBlock_Iterative(const ColorBlock & rgba, AlphaBlockDXT5 * resultblock)
{
uint8 alpha0 = 0;
uint8 alpha1 = 255;
// Get min/max alpha.
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
alpha0 = max(alpha0, alpha);
alpha1 = min(alpha1, alpha);
}
AlphaBlockDXT5 block;
block.alpha0 = alpha0 - (alpha0 - alpha1) / 34;
block.alpha1 = alpha1 + (alpha0 - alpha1) / 34;
uint besterror = computeAlphaIndices(rgba, &block);
AlphaBlockDXT5 bestblock = block;
while(true)
{
optimizeAlpha8(rgba, &block);
uint error = computeAlphaIndices(rgba, &block);
if (error >= besterror)
{
// No improvement, stop.
break;
}
if (sameIndices(block, bestblock))
{
bestblock = block;
break;
}
besterror = error;
bestblock = block;
};
// Copy best block to result;
*resultblock = bestblock;
return besterror;
}

View File

@ -38,37 +38,37 @@ namespace nv
// Color compression:
// Compressor that uses the extremes of the luminance axis.
void compressBlock_DiameterAxis(const ColorBlock & rgba, BlockDXT1 * block);
// void compressBlock_DiameterAxis(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses the extremes of the luminance axis.
void compressBlock_LuminanceAxis(const ColorBlock & rgba, BlockDXT1 * block);
// void compressBlock_LuminanceAxis(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses bounding box.
void compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses bounding box and takes alpha into account.
void compressBlock_BoundsRangeAlpha(const ColorBlock & rgba, BlockDXT1 * block);
// void compressBlock_BoundsRangeAlpha(const ColorBlock & rgba, BlockDXT1 * block);
// Simple, but slow compressor that tests all color pairs.
void compressBlock_TestAllPairs(const ColorBlock & rgba, BlockDXT1 * block);
// void compressBlock_TestAllPairs(const ColorBlock & rgba, BlockDXT1 * block);
// Brute force 6d search along the best fit axis.
void compressBlock_AnalyzeBestFitAxis(const ColorBlock & rgba, BlockDXT1 * block);
// void compressBlock_AnalyzeBestFitAxis(const ColorBlock & rgba, BlockDXT1 * block);
// Spatial greedy search.
void refineSolution_1dSearch(const ColorBlock & rgba, BlockDXT1 * block);
void refineSolution_3dSearch(const ColorBlock & rgba, BlockDXT1 * block);
void refineSolution_6dSearch(const ColorBlock & rgba, BlockDXT1 * block);
// void refineSolution_1dSearch(const ColorBlock & rgba, BlockDXT1 * block);
// void refineSolution_3dSearch(const ColorBlock & rgba, BlockDXT1 * block);
// void refineSolution_6dSearch(const ColorBlock & rgba, BlockDXT1 * block);
// Brute force compressor for DXT5n
void compressGreenBlock_BruteForce(const ColorBlock & rgba, BlockDXT1 * block);
// void compressGreenBlock_BruteForce(const ColorBlock & rgba, BlockDXT1 * block);
// Minimize error of the endpoints.
void optimizeEndPoints(const ColorBlock & rgba, BlockDXT1 * block);
// void optimizeEndPoints(const ColorBlock & rgba, BlockDXT1 * block);
uint blockError(const ColorBlock & rgba, const BlockDXT1 & block);
uint blockError(const ColorBlock & rgba, const AlphaBlockDXT5 & block);
// uint blockError(const ColorBlock & rgba, const BlockDXT1 & block);
// uint blockError(const ColorBlock & rgba, const AlphaBlockDXT5 & block);
// Alpha compression:
void compressBlock(const ColorBlock & rgba, AlphaBlockDXT3 * block);
@ -77,7 +77,7 @@ namespace nv
uint compressBlock_BoundsRange(const ColorBlock & rgba, AlphaBlockDXT5 * block);
uint compressBlock_BruteForce(const ColorBlock & rgba, AlphaBlockDXT5 * block);
uint compressBlock_Iterative(const ColorBlock & rgba, AlphaBlockDXT5 * block);
// uint compressBlock_Iterative(const ColorBlock & rgba, AlphaBlockDXT5 * block);
} // nv namespace

View File

@ -288,62 +288,219 @@ static void optimizeEndPoints4(Vector3 block[16], BlockDXT1 * dxtBlock)
dxtBlock->indices = computeIndices3(block, a, b);
}*/
static void optimizeAlpha8(const ColorBlock & rgba, AlphaBlockDXT5 * block)
namespace
{
float alpha2_sum = 0;
float beta2_sum = 0;
float alphabeta_sum = 0;
float alphax_sum = 0;
float betax_sum = 0;
for (int i = 0; i < 16; i++)
static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block)
{
uint idx = block->index(i);
float alpha;
if (idx < 2) alpha = 1.0f - idx;
else alpha = (8.0f - idx) / 7.0f;
float beta = 1 - alpha;
alpha2_sum += alpha * alpha;
beta2_sum += beta * beta;
alphabeta_sum += alpha * beta;
alphax_sum += alpha * rgba.color(i).a;
betax_sum += beta * rgba.color(i).a;
nvDebugCheck(block != NULL);
int palette[4];
palette[0] = (block->col0.g << 2) | (block->col0.g >> 4);
palette[1] = (block->col1.g << 2) | (block->col1.g >> 4);
palette[2] = (2 * palette[0] + palette[1]) / 3;
palette[3] = (2 * palette[1] + palette[0]) / 3;
int totalError = 0;
for (int i = 0; i < 16; i++)
{
const int green = rgba.color(i).g;
int error = abs(green - palette[0]);
error = min(error, abs(green - palette[1]));
error = min(error, abs(green - palette[2]));
error = min(error, abs(green - palette[3]));
totalError += error;
}
return totalError;
}
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
uint alpha0 = uint(min(max(a, 0.0f), 255.0f));
uint alpha1 = uint(min(max(b, 0.0f), 255.0f));
if (alpha0 < alpha1)
static uint computeGreenIndices(const ColorBlock & rgba, const Color32 palette[4])
{
swap(alpha0, alpha1);
const int color0 = palette[0].g;
const int color1 = palette[1].g;
const int color2 = palette[2].g;
const int color3 = palette[3].g;
// Flip indices:
uint indices = 0;
for (int i = 0; i < 16; i++)
{
const int color = rgba.color(i).g;
uint d0 = abs(color0 - color);
uint d1 = abs(color1 - color);
uint d2 = abs(color2 - color);
uint d3 = abs(color3 - color);
uint b0 = d0 > d3;
uint b1 = d1 > d2;
uint b2 = d0 > d2;
uint b3 = d1 > d3;
uint b4 = d2 > d3;
uint x0 = b1 & b2;
uint x1 = b0 & b3;
uint x2 = b0 & b4;
indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
}
return indices;
}
} // namespace
namespace
{
static uint computeAlphaIndices(const ColorBlock & rgba, AlphaBlockDXT5 * block)
{
uint8 alphas[8];
block->evaluatePalette(alphas);
uint totalError = 0;
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
uint besterror = 256*256;
uint best = 8;
for(uint p = 0; p < 8; p++)
{
int d = alphas[p] - alpha;
uint error = d * d;
if (error < besterror)
{
besterror = error;
best = p;
}
}
nvDebugCheck(best < 8);
totalError += besterror;
block->setIndex(i, best);
}
return totalError;
}
static void optimizeAlpha8(const ColorBlock & rgba, AlphaBlockDXT5 * block)
{
float alpha2_sum = 0;
float beta2_sum = 0;
float alphabeta_sum = 0;
float alphax_sum = 0;
float betax_sum = 0;
for (int i = 0; i < 16; i++)
{
uint idx = block->index(i);
if (idx < 2) block->setIndex(i, 1 - idx);
else block->setIndex(i, 9 - idx);
float alpha;
if (idx < 2) alpha = 1.0f - idx;
else alpha = (8.0f - idx) / 7.0f;
float beta = 1 - alpha;
alpha2_sum += alpha * alpha;
beta2_sum += beta * beta;
alphabeta_sum += alpha * beta;
alphax_sum += alpha * rgba.color(i).a;
betax_sum += beta * rgba.color(i).a;
}
}
else if (alpha0 == alpha1)
{
for (int i = 0; i < 16; i++)
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
uint alpha0 = uint(min(max(a, 0.0f), 255.0f));
uint alpha1 = uint(min(max(b, 0.0f), 255.0f));
if (alpha0 < alpha1)
{
block->setIndex(i, 0);
swap(alpha0, alpha1);
// Flip indices:
for (int i = 0; i < 16; i++)
{
uint idx = block->index(i);
if (idx < 2) block->setIndex(i, 1 - idx);
else block->setIndex(i, 9 - idx);
}
}
else if (alpha0 == alpha1)
{
for (int i = 0; i < 16; i++)
{
block->setIndex(i, 0);
}
}
block->alpha0 = alpha0;
block->alpha1 = alpha1;
}
block->alpha0 = alpha0;
block->alpha1 = alpha1;
}
/*
static void optimizeAlpha6(const ColorBlock & rgba, AlphaBlockDXT5 * block)
{
float alpha2_sum = 0;
float beta2_sum = 0;
float alphabeta_sum = 0;
float alphax_sum = 0;
float betax_sum = 0;
for (int i = 0; i < 16; i++)
{
uint8 x = rgba.color(i).a;
if (x == 0 || x == 255) continue;
uint bits = block->index(i);
if (bits == 6 || bits == 7) continue;
float alpha;
if (bits == 0) alpha = 1.0f;
else if (bits == 1) alpha = 0.0f;
else alpha = (6.0f - block->index(i)) / 5.0f;
float beta = 1 - alpha;
alpha2_sum += alpha * alpha;
beta2_sum += beta * beta;
alphabeta_sum += alpha * beta;
alphax_sum += alpha * x;
betax_sum += beta * x;
}
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
uint alpha0 = uint(min(max(a, 0.0f), 255.0f));
uint alpha1 = uint(min(max(b, 0.0f), 255.0f));
if (alpha0 > alpha1)
{
swap(alpha0, alpha1);
}
block->alpha0 = alpha0;
block->alpha1 = alpha1;
}
*/
static bool sameIndices(const AlphaBlockDXT5 & block0, const AlphaBlockDXT5 & block1)
{
const uint64 mask = ~uint64(0xFFFF);
return (block0.u | mask) == (block1.u | mask);
}
} // namespace
@ -436,66 +593,6 @@ void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
}
static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block)
{
nvDebugCheck(block != NULL);
int palette[4];
palette[0] = (block->col0.g << 2) | (block->col0.g >> 4);
palette[1] = (block->col1.g << 2) | (block->col1.g >> 4);
palette[2] = (2 * palette[0] + palette[1]) / 3;
palette[3] = (2 * palette[1] + palette[0]) / 3;
int totalError = 0;
for (int i = 0; i < 16; i++)
{
const int green = rgba.color(i).g;
int error = abs(green - palette[0]);
error = min(error, abs(green - palette[1]));
error = min(error, abs(green - palette[2]));
error = min(error, abs(green - palette[3]));
totalError += error;
}
return totalError;
}
static uint computeGreenIndices(const ColorBlock & rgba, const Color32 palette[4])
{
const int color0 = palette[0].g;
const int color1 = palette[1].g;
const int color2 = palette[2].g;
const int color3 = palette[3].g;
uint indices = 0;
for (int i = 0; i < 16; i++)
{
const int color = rgba.color(i).g;
uint d0 = abs(color0 - color);
uint d1 = abs(color1 - color);
uint d2 = abs(color2 - color);
uint d3 = abs(color3 - color);
uint b0 = d0 > d3;
uint b1 = d1 > d2;
uint b2 = d0 > d2;
uint b3 = d1 > d3;
uint b4 = d2 > d3;
uint x0 = b1 & b2;
uint x1 = b0 & b3;
uint x2 = b0 & b4;
indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
}
return indices;
}
// Brute force green channel compressor
void QuickCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block)
{
@ -558,6 +655,7 @@ void QuickCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block)
void QuickCompress::compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock)
{
// @@ Round instead of truncate. When rounding take into account bit expansion.
dxtBlock->alpha0 = rgba.color(0).a >> 4;
dxtBlock->alpha1 = rgba.color(1).a >> 4;
dxtBlock->alpha2 = rgba.color(2).a >> 4;
@ -582,9 +680,49 @@ void QuickCompress::compressDXT3(const ColorBlock & rgba, BlockDXT3 * dxtBlock)
compressDXT3A(rgba, &dxtBlock->alpha);
}
void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock)
{
// @@ TODO
uint8 alpha0 = 0;
uint8 alpha1 = 255;
// Get min/max alpha.
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
alpha0 = max(alpha0, alpha);
alpha1 = min(alpha1, alpha);
}
AlphaBlockDXT5 block;
block.alpha0 = alpha0 - (alpha0 - alpha1) / 34;
block.alpha1 = alpha1 + (alpha0 - alpha1) / 34;
uint besterror = computeAlphaIndices(rgba, &block);
AlphaBlockDXT5 bestblock = block;
while(true)
{
optimizeAlpha8(rgba, &block);
uint error = computeAlphaIndices(rgba, &block);
if (error >= besterror)
{
// No improvement, stop.
break;
}
if (sameIndices(block, bestblock))
{
bestblock = block;
break;
}
besterror = error;
bestblock = block;
};
// Copy best block to result;
*dxtBlock = bestblock;
}
void QuickCompress::compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock)

View File

@ -29,7 +29,7 @@
#include <nvimage/BlockDXT.h>
#include <nvtt/CompressionOptions.h>
#include <nvtt/OutputOptions.h>
#include <nvtt/FastCompressDXT.h>
#include <nvtt/QuickCompressDXT.h>
#include "CudaCompressDXT.h"
#include "CudaUtils.h"
@ -230,7 +230,7 @@ void CudaCompressor::compressDXT3(const Image * image, const OutputOptions::Priv
for (uint i = 0; i < count; i++)
{
ColorBlock rgba(blockLinearImage + (bn + i) * 16);
compressBlock(rgba, alphaBlocks + i);
QuickCompress::compressDXT3A(rgba, alphaBlocks + i);
}
// Check for errors.
@ -314,7 +314,7 @@ void CudaCompressor::compressDXT5(const Image * image, const OutputOptions::Priv
for (uint i = 0; i < count; i++)
{
ColorBlock rgba(blockLinearImage + (bn + i) * 16);
compressBlock_Iterative(rgba, alphaBlocks + i);
QuickCompress::compressDXT5A(rgba, alphaBlocks + i);
}
// Check for errors.