From 13335f3d975d7253e9d7f89b05bef75868127de4 Mon Sep 17 00:00:00 2001 From: castano Date: Wed, 5 Sep 2007 09:18:12 +0000 Subject: [PATCH] Move fast DXT1a compressor to QuickCompress. --- src/nvimage/nvtt/CompressDXT.cpp | 5 +- src/nvimage/nvtt/QuickCompressDXT.cpp | 180 +++++++++++++++++++++++--- src/nvimage/nvtt/QuickCompressDXT.h | 3 +- src/nvimage/nvtt/tools/imgdiff.cpp | 1 - 4 files changed, 162 insertions(+), 27 deletions(-) diff --git a/src/nvimage/nvtt/CompressDXT.cpp b/src/nvimage/nvtt/CompressDXT.cpp index 75514ef..365ee88 100644 --- a/src/nvimage/nvtt/CompressDXT.cpp +++ b/src/nvimage/nvtt/CompressDXT.cpp @@ -89,10 +89,7 @@ void nv::fastCompressDXT1a(const Image * image, const OutputOptions & outputOpti for (uint x = 0; x < w; x += 4) { rgba.init(image, x, y); - compressBlock_BoundsRangeAlpha(rgba, &block); - - // @@ Use iterative optimization. - //optimizeEndPoints(rgba, &block); + QuickCompress::compressDXT1a(rgba, &block); if (outputOptions.outputHandler != NULL) { outputOptions.outputHandler->writeData(&block, sizeof(block)); diff --git a/src/nvimage/nvtt/QuickCompressDXT.cpp b/src/nvimage/nvtt/QuickCompressDXT.cpp index 96cd612..89b72c0 100644 --- a/src/nvimage/nvtt/QuickCompressDXT.cpp +++ b/src/nvimage/nvtt/QuickCompressDXT.cpp @@ -43,25 +43,43 @@ inline static void extractColorBlockRGB(const ColorBlock & rgba, Vector3 block[1 } } +inline static uint extractColorBlockRGBA(const ColorBlock & rgba, Vector3 block[16]) +{ + int num = 0; + + for (int i = 0; i < 16; i++) + { + const Color32 c = rgba.color(i); + if (c.a > 127) + { + block[num++] = Vector3(c.r, c.g, c.b); + } + } + + return num; +} + + // find minimum and maximum colors based on bounding box in color space -inline static void findMinMaxColorsBox(Vector3 block[16], Vector3 * __restrict maxColor, Vector3 * __restrict minColor) +inline static void findMinMaxColorsBox(const Vector3 * block, uint num, Vector3 * __restrict maxColor, Vector3 * __restrict minColor) { *maxColor = Vector3(0, 0, 0); *minColor = Vector3(255, 255, 255); - for (int i = 0; i < 16; i++) + for (int i = 0; i < num; i++) { *maxColor = max(*maxColor, block[i]); *minColor = min(*minColor, block[i]); } } -inline static void selectDiagonal(Vector3 block[16], Vector3 * __restrict maxColor, Vector3 * __restrict minColor) + +inline static void selectDiagonal(const Vector3 * block, uint num, Vector3 * __restrict maxColor, Vector3 * __restrict minColor) { Vector3 center = (*maxColor + *minColor) * 0.5; Vector2 covariance = Vector2(zero); - for (int i = 0; i < 16; i++) + for (int i = 0; i < num; i++) { Vector3 t = block[i] - center; covariance += t.xy() * t.z(); @@ -111,21 +129,21 @@ inline static float colorDistance(Vector3::Arg c0, Vector3::Arg c1) return dot(c0-c1, c0-c1); } -inline static uint computeIndices(Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor) +inline static uint computeIndices4(Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor) { - Vector3 c[4]; - c[0] = maxColor; - c[1] = minColor; - c[2] = lerp(c[0], c[1], 1.0/3.0); - c[3] = lerp(c[0], c[1], 2.0/3.0); + Vector3 palette[4]; + palette[0] = maxColor; + palette[1] = minColor; + palette[2] = lerp(palette[0], palette[1], 1.0f / 3.0f); + palette[3] = lerp(palette[0], palette[1], 2.0f / 3.0f); uint indices = 0; for(int i = 0; i < 16; i++) { - float d0 = colorDistance(c[0], block[i]); - float d1 = colorDistance(c[1], block[i]); - float d2 = colorDistance(c[2], block[i]); - float d3 = colorDistance(c[3], block[i]); + float d0 = colorDistance(palette[0], block[i]); + float d1 = colorDistance(palette[1], block[i]); + float d2 = colorDistance(palette[2], block[i]); + float d3 = colorDistance(palette[3], block[i]); uint b0 = d0 > d3; uint b1 = d1 > d2; @@ -143,7 +161,37 @@ inline static uint computeIndices(Vector3 block[16], Vector3::Arg maxColor, Vect return indices; } -static void optimizeEndPoints(Vector3 block[16], BlockDXT1 * dxtBlock) +inline static uint computeIndices3(const ColorBlock & rgba, Vector3::Arg maxColor, Vector3::Arg minColor) +{ + Vector3 palette[4]; + palette[0] = minColor; + palette[1] = maxColor; + palette[2] = (palette[0] + palette[1]) * 0.5f; + + uint indices = 0; + for(int i = 0; i < 16; i++) + { + Color32 c = rgba.color(i); + Vector3 color = Vector3(c.r, c.g, c.b); + + float d0 = colorDistance(palette[0], color); + float d1 = colorDistance(palette[1], color); + float d2 = colorDistance(palette[2], color); + + uint index; + if (c.a < 128) index = 3; + else if (d0 < d1 && d0 < d2) index = 0; + else if (d1 < d2) index = 1; + else index = 2; + + indices |= index << (2 * i); + } + + return indices; +} + + +static void optimizeEndPoints4(Vector3 block[16], BlockDXT1 * dxtBlock) { float alpha2_sum = 0.0f; float beta2_sum = 0.0f; @@ -166,7 +214,10 @@ static void optimizeEndPoints(Vector3 block[16], BlockDXT1 * dxtBlock) betax_sum += beta * block[i]; } - float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); + float denom = alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum; + if (equal(denom, 0.0f)) return; + + float factor = 1.0f / denom; Vector3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; Vector3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; @@ -185,9 +236,57 @@ static void optimizeEndPoints(Vector3 block[16], BlockDXT1 * dxtBlock) dxtBlock->col0 = Color16(color0); dxtBlock->col1 = Color16(color1); - dxtBlock->indices = computeIndices(block, a, b); + dxtBlock->indices = computeIndices4(block, a, b); } +/*static void optimizeEndPoints3(Vector3 block[16], BlockDXT1 * dxtBlock) +{ + float alpha2_sum = 0.0f; + float beta2_sum = 0.0f; + float alphabeta_sum = 0.0f; + Vector3 alphax_sum(zero); + Vector3 betax_sum(zero); + + for( int i = 0; i < 16; ++i ) + { + const uint bits = dxtBlock->indices >> (2 * i); + + float beta = (bits & 1); + if (bits & 2) beta = 0.5f; + float alpha = 1.0f - beta; + + alpha2_sum += alpha * alpha; + beta2_sum += beta * beta; + alphabeta_sum += alpha * beta; + alphax_sum += alpha * block[i]; + betax_sum += beta * block[i]; + } + + float denom = alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum; + if (equal(denom, 0.0f)) return; + + float factor = 1.0f / denom; + + Vector3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; + Vector3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; + + a = clamp(a, 0, 255); + b = clamp(b, 0, 255); + + uint16 color0 = roundAndExpand(&a); + uint16 color1 = roundAndExpand(&b); + + if (color0 < color1) + { + swap(a, b); + swap(color0, color1); + } + + dxtBlock->col0 = Color16(color1); + dxtBlock->col1 = Color16(color0); + dxtBlock->indices = computeIndices3(block, a, b); +}*/ + void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock) { @@ -197,9 +296,9 @@ void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock) // find min and max colors Vector3 maxColor, minColor; - findMinMaxColorsBox(block, &maxColor, &minColor); + findMinMaxColorsBox(block, 16, &maxColor, &minColor); - selectDiagonal(block, &maxColor, &minColor); + selectDiagonal(block, 16, &maxColor, &minColor); insetBBox(&maxColor, &minColor); @@ -214,9 +313,48 @@ void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock) dxtBlock->col0 = Color16(color0); dxtBlock->col1 = Color16(color1); - dxtBlock->indices = computeIndices(block, maxColor, minColor); + dxtBlock->indices = computeIndices4(block, maxColor, minColor); - optimizeEndPoints(block, dxtBlock); + optimizeEndPoints4(block, dxtBlock); } +void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock) +{ + if (!rgba.hasAlpha()) + { + compressDXT1(rgba, dxtBlock); + } + else + { + // read block + Vector3 block[16]; + uint num = extractColorBlockRGBA(rgba, block); + + // find min and max colors + Vector3 maxColor, minColor; + findMinMaxColorsBox(block, num, &maxColor, &minColor); + + selectDiagonal(block, num, &maxColor, &minColor); + + insetBBox(&maxColor, &minColor); + + uint16 color0 = roundAndExpand(&maxColor); + uint16 color1 = roundAndExpand(&minColor); + + if (color0 < color1) + { + swap(maxColor, minColor); + swap(color0, color1); + } + + dxtBlock->col0 = Color16(color1); + dxtBlock->col1 = Color16(color0); + dxtBlock->indices = computeIndices3(rgba, maxColor, minColor); + + // optimizeEndPoints(block, dxtBlock); + } +} + + + diff --git a/src/nvimage/nvtt/QuickCompressDXT.h b/src/nvimage/nvtt/QuickCompressDXT.h index 7a19b0d..fcbaf71 100644 --- a/src/nvimage/nvtt/QuickCompressDXT.h +++ b/src/nvimage/nvtt/QuickCompressDXT.h @@ -33,7 +33,8 @@ namespace nv namespace QuickCompress { - void compressDXT1(const ColorBlock & rgba, BlockDXT1 * block); + void compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock); + void compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock); } } // nv namespace diff --git a/src/nvimage/nvtt/tools/imgdiff.cpp b/src/nvimage/nvtt/tools/imgdiff.cpp index 57631e4..ab6e4ad 100644 --- a/src/nvimage/nvtt/tools/imgdiff.cpp +++ b/src/nvimage/nvtt/tools/imgdiff.cpp @@ -74,7 +74,6 @@ struct Error void addSample(float e) { - if (fabsf(e) >= 255) nvDebugBreak(); samples++; mabse += fabsf(e); maxabse = nv::max(maxabse, fabsf(e));