Move fast DXT1a compressor to QuickCompress.

This commit is contained in:
castano 2007-09-05 09:18:12 +00:00
parent 793611fe7f
commit 13335f3d97
4 changed files with 162 additions and 27 deletions

View File

@ -89,10 +89,7 @@ void nv::fastCompressDXT1a(const Image * image, const OutputOptions & outputOpti
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
compressBlock_BoundsRangeAlpha(rgba, &block);
// @@ Use iterative optimization.
//optimizeEndPoints(rgba, &block);
QuickCompress::compressDXT1a(rgba, &block);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));

View File

@ -43,25 +43,43 @@ inline static void extractColorBlockRGB(const ColorBlock & rgba, Vector3 block[1
}
}
inline static uint extractColorBlockRGBA(const ColorBlock & rgba, Vector3 block[16])
{
int num = 0;
for (int i = 0; i < 16; i++)
{
const Color32 c = rgba.color(i);
if (c.a > 127)
{
block[num++] = Vector3(c.r, c.g, c.b);
}
}
return num;
}
// find minimum and maximum colors based on bounding box in color space
inline static void findMinMaxColorsBox(Vector3 block[16], Vector3 * __restrict maxColor, Vector3 * __restrict minColor)
inline static void findMinMaxColorsBox(const Vector3 * block, uint num, Vector3 * __restrict maxColor, Vector3 * __restrict minColor)
{
*maxColor = Vector3(0, 0, 0);
*minColor = Vector3(255, 255, 255);
for (int i = 0; i < 16; i++)
for (int i = 0; i < num; i++)
{
*maxColor = max(*maxColor, block[i]);
*minColor = min(*minColor, block[i]);
}
}
inline static void selectDiagonal(Vector3 block[16], Vector3 * __restrict maxColor, Vector3 * __restrict minColor)
inline static void selectDiagonal(const Vector3 * block, uint num, Vector3 * __restrict maxColor, Vector3 * __restrict minColor)
{
Vector3 center = (*maxColor + *minColor) * 0.5;
Vector2 covariance = Vector2(zero);
for (int i = 0; i < 16; i++)
for (int i = 0; i < num; i++)
{
Vector3 t = block[i] - center;
covariance += t.xy() * t.z();
@ -111,21 +129,21 @@ inline static float colorDistance(Vector3::Arg c0, Vector3::Arg c1)
return dot(c0-c1, c0-c1);
}
inline static uint computeIndices(Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor)
inline static uint computeIndices4(Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor)
{
Vector3 c[4];
c[0] = maxColor;
c[1] = minColor;
c[2] = lerp(c[0], c[1], 1.0/3.0);
c[3] = lerp(c[0], c[1], 2.0/3.0);
Vector3 palette[4];
palette[0] = maxColor;
palette[1] = minColor;
palette[2] = lerp(palette[0], palette[1], 1.0f / 3.0f);
palette[3] = lerp(palette[0], palette[1], 2.0f / 3.0f);
uint indices = 0;
for(int i = 0; i < 16; i++)
{
float d0 = colorDistance(c[0], block[i]);
float d1 = colorDistance(c[1], block[i]);
float d2 = colorDistance(c[2], block[i]);
float d3 = colorDistance(c[3], block[i]);
float d0 = colorDistance(palette[0], block[i]);
float d1 = colorDistance(palette[1], block[i]);
float d2 = colorDistance(palette[2], block[i]);
float d3 = colorDistance(palette[3], block[i]);
uint b0 = d0 > d3;
uint b1 = d1 > d2;
@ -143,7 +161,37 @@ inline static uint computeIndices(Vector3 block[16], Vector3::Arg maxColor, Vect
return indices;
}
static void optimizeEndPoints(Vector3 block[16], BlockDXT1 * dxtBlock)
inline static uint computeIndices3(const ColorBlock & rgba, Vector3::Arg maxColor, Vector3::Arg minColor)
{
Vector3 palette[4];
palette[0] = minColor;
palette[1] = maxColor;
palette[2] = (palette[0] + palette[1]) * 0.5f;
uint indices = 0;
for(int i = 0; i < 16; i++)
{
Color32 c = rgba.color(i);
Vector3 color = Vector3(c.r, c.g, c.b);
float d0 = colorDistance(palette[0], color);
float d1 = colorDistance(palette[1], color);
float d2 = colorDistance(palette[2], color);
uint index;
if (c.a < 128) index = 3;
else if (d0 < d1 && d0 < d2) index = 0;
else if (d1 < d2) index = 1;
else index = 2;
indices |= index << (2 * i);
}
return indices;
}
static void optimizeEndPoints4(Vector3 block[16], BlockDXT1 * dxtBlock)
{
float alpha2_sum = 0.0f;
float beta2_sum = 0.0f;
@ -166,7 +214,10 @@ static void optimizeEndPoints(Vector3 block[16], BlockDXT1 * dxtBlock)
betax_sum += beta * block[i];
}
float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float denom = alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum;
if (equal(denom, 0.0f)) return;
float factor = 1.0f / denom;
Vector3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
Vector3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
@ -185,9 +236,57 @@ static void optimizeEndPoints(Vector3 block[16], BlockDXT1 * dxtBlock)
dxtBlock->col0 = Color16(color0);
dxtBlock->col1 = Color16(color1);
dxtBlock->indices = computeIndices(block, a, b);
dxtBlock->indices = computeIndices4(block, a, b);
}
/*static void optimizeEndPoints3(Vector3 block[16], BlockDXT1 * dxtBlock)
{
float alpha2_sum = 0.0f;
float beta2_sum = 0.0f;
float alphabeta_sum = 0.0f;
Vector3 alphax_sum(zero);
Vector3 betax_sum(zero);
for( int i = 0; i < 16; ++i )
{
const uint bits = dxtBlock->indices >> (2 * i);
float beta = (bits & 1);
if (bits & 2) beta = 0.5f;
float alpha = 1.0f - beta;
alpha2_sum += alpha * alpha;
beta2_sum += beta * beta;
alphabeta_sum += alpha * beta;
alphax_sum += alpha * block[i];
betax_sum += beta * block[i];
}
float denom = alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum;
if (equal(denom, 0.0f)) return;
float factor = 1.0f / denom;
Vector3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
Vector3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
a = clamp(a, 0, 255);
b = clamp(b, 0, 255);
uint16 color0 = roundAndExpand(&a);
uint16 color1 = roundAndExpand(&b);
if (color0 < color1)
{
swap(a, b);
swap(color0, color1);
}
dxtBlock->col0 = Color16(color1);
dxtBlock->col1 = Color16(color0);
dxtBlock->indices = computeIndices3(block, a, b);
}*/
void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
{
@ -197,9 +296,9 @@ void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
// find min and max colors
Vector3 maxColor, minColor;
findMinMaxColorsBox(block, &maxColor, &minColor);
findMinMaxColorsBox(block, 16, &maxColor, &minColor);
selectDiagonal(block, &maxColor, &minColor);
selectDiagonal(block, 16, &maxColor, &minColor);
insetBBox(&maxColor, &minColor);
@ -214,9 +313,48 @@ void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
dxtBlock->col0 = Color16(color0);
dxtBlock->col1 = Color16(color1);
dxtBlock->indices = computeIndices(block, maxColor, minColor);
dxtBlock->indices = computeIndices4(block, maxColor, minColor);
optimizeEndPoints(block, dxtBlock);
optimizeEndPoints4(block, dxtBlock);
}
void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
{
if (!rgba.hasAlpha())
{
compressDXT1(rgba, dxtBlock);
}
else
{
// read block
Vector3 block[16];
uint num = extractColorBlockRGBA(rgba, block);
// find min and max colors
Vector3 maxColor, minColor;
findMinMaxColorsBox(block, num, &maxColor, &minColor);
selectDiagonal(block, num, &maxColor, &minColor);
insetBBox(&maxColor, &minColor);
uint16 color0 = roundAndExpand(&maxColor);
uint16 color1 = roundAndExpand(&minColor);
if (color0 < color1)
{
swap(maxColor, minColor);
swap(color0, color1);
}
dxtBlock->col0 = Color16(color1);
dxtBlock->col1 = Color16(color0);
dxtBlock->indices = computeIndices3(rgba, maxColor, minColor);
// optimizeEndPoints(block, dxtBlock);
}
}

View File

@ -33,7 +33,8 @@ namespace nv
namespace QuickCompress
{
void compressDXT1(const ColorBlock & rgba, BlockDXT1 * block);
void compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock);
void compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock);
}
} // nv namespace

View File

@ -74,7 +74,6 @@ struct Error
void addSample(float e)
{
if (fabsf(e) >= 255) nvDebugBreak();
samples++;
mabse += fabsf(e);
maxabse = nv::max(maxabse, fabsf(e));