From 921ee354c0ef4331bc1e46d7fa8c6504efb92c60 Mon Sep 17 00:00:00 2001
From: castano <castano@95f4ed2b-212e-0410-8b90-d31948207fce>
Date: Sat, 26 Apr 2008 08:02:30 +0000
Subject: [PATCH] Remove legacy compressors. Add iteration count parameter to
 iterative alpha compressor. Add optimal compressors.

---
 src/nvimage/ColorBlock.cpp    |  21 ++
 src/nvimage/ColorBlock.h      |   1 +
 src/nvtt/CMakeLists.txt       |   4 +-
 src/nvtt/CompressDXT.cpp      |  47 ++--
 src/nvtt/Compressor.cpp       |   1 -
 src/nvtt/FastCompressDXT.cpp  | 456 ----------------------------------
 src/nvtt/FastCompressDXT.h    |  84 -------
 src/nvtt/QuickCompressDXT.cpp | 250 +++----------------
 src/nvtt/QuickCompressDXT.h   |   8 +-
 9 files changed, 81 insertions(+), 791 deletions(-)
 delete mode 100644 src/nvtt/FastCompressDXT.cpp
 delete mode 100644 src/nvtt/FastCompressDXT.h

diff --git a/src/nvimage/ColorBlock.cpp b/src/nvimage/ColorBlock.cpp
index c664fb2..bbf6857 100644
--- a/src/nvimage/ColorBlock.cpp
+++ b/src/nvimage/ColorBlock.cpp
@@ -125,6 +125,27 @@ bool ColorBlock::isSingleColor() const
 	return true;
 }
 
+/// Returns true if the block has a single color, ignoring transparent pixels.
+bool ColorBlock::isSingleColorNoAlpha() const
+{
+	Color32 c;
+	int i;
+	for(i = 0; i < 16; i++)
+	{
+		if (m_color[i].a != 0) c = m_color[i];
+	}
+	
+	for(; i < 16; i++)
+	{
+		if (c != m_color[i])
+		{
+			return false;
+		}
+	}
+	
+	return true;
+}
+
 /// Count number of unique colors in this color block.
 uint ColorBlock::countUniqueColors() const
 {
diff --git a/src/nvimage/ColorBlock.h b/src/nvimage/ColorBlock.h
index 00f9c8e..53ba76b 100644
--- a/src/nvimage/ColorBlock.h
+++ b/src/nvimage/ColorBlock.h
@@ -24,6 +24,7 @@ namespace nv
 		void splatY();
 		
 		bool isSingleColor() const;
+		bool isSingleColorNoAlpha() const;
 		uint countUniqueColors() const;
 		Color32 averageColor() const;
 		bool hasAlpha() const;
diff --git a/src/nvtt/CMakeLists.txt b/src/nvtt/CMakeLists.txt
index 36d9cdd..e9b951a 100644
--- a/src/nvtt/CMakeLists.txt
+++ b/src/nvtt/CMakeLists.txt
@@ -13,10 +13,10 @@ SET(NVTT_SRCS
 	CompressDXT.cpp
 	CompressRGB.h
 	CompressRGB.cpp
-	FastCompressDXT.h
-	FastCompressDXT.cpp
 	QuickCompressDXT.h
 	QuickCompressDXT.cpp
+	OptimalCompressDXT.h
+	OptimalCompressDXT.cpp
 	SingleColorLookup.h
 	CompressionOptions.h
 	CompressionOptions.cpp
diff --git a/src/nvtt/CompressDXT.cpp b/src/nvtt/CompressDXT.cpp
index 8b3c74c..a50e2e0 100644
--- a/src/nvtt/CompressDXT.cpp
+++ b/src/nvtt/CompressDXT.cpp
@@ -29,8 +29,8 @@
 
 #include "nvtt.h"
 #include "CompressDXT.h"
-#include "FastCompressDXT.h"
 #include "QuickCompressDXT.h"
+#include "OptimalCompressDXT.h"
 #include "CompressionOptions.h"
 #include "OutputOptions.h"
 
@@ -69,14 +69,7 @@ void nv::fastCompressDXT1(const Image * image, const OutputOptions::Private & ou
 		for (uint x = 0; x < w; x += 4) {
 			rgba.init(image, x, y);
 			
-			if (rgba.isSingleColor())
-			{
-				QuickCompress::compressDXT1(rgba.color(0), &block);
-			}
-			else
-			{
-				QuickCompress::compressDXT1(rgba, &block);
-			}
+			QuickCompress::compressDXT1(rgba, &block);
 			
 			if (outputOptions.outputHandler != NULL) {
 				outputOptions.outputHandler->writeData(&block, sizeof(block));
@@ -98,15 +91,7 @@ void nv::fastCompressDXT1a(const Image * image, const OutputOptions::Private & o
 		for (uint x = 0; x < w; x += 4) {
 			rgba.init(image, x, y);
 			
-			// @@ We could do better here: check for single RGB, but varying alpha.
-			if (rgba.isSingleColor())
-			{
-				QuickCompress::compressDXT1a(rgba.color(0), &block);
-			}
-			else
-			{
-				QuickCompress::compressDXT1a(rgba, &block);
-			}
+			QuickCompress::compressDXT1a(rgba, &block);
 			
 			if (outputOptions.outputHandler != NULL) {
 				outputOptions.outputHandler->writeData(&block, sizeof(block));
@@ -127,6 +112,7 @@ void nv::fastCompressDXT3(const Image * image, const nvtt::OutputOptions::Privat
 	for (uint y = 0; y < h; y += 4) {
 		for (uint x = 0; x < w; x += 4) {
 			rgba.init(image, x, y);
+
 			QuickCompress::compressDXT3(rgba, &block);
 			
 			if (outputOptions.outputHandler != NULL) {
@@ -148,8 +134,8 @@ void nv::fastCompressDXT5(const Image * image, const nvtt::OutputOptions::Privat
 	for (uint y = 0; y < h; y += 4) {
 		for (uint x = 0; x < w; x += 4) {
 			rgba.init(image, x, y);
-			//QuickCompress::compressDXT5(rgba, &block);	// @@ Use fast version!!
-			nv::compressBlock_BoundsRange(rgba, &block);
+			
+			QuickCompress::compressDXT5(rgba, &block, 0);
 			
 			if (outputOptions.outputHandler != NULL) {
 				outputOptions.outputHandler->writeData(&block, sizeof(block));
@@ -174,8 +160,7 @@ void nv::fastCompressDXT5n(const Image * image, const nvtt::OutputOptions::Priva
 			// copy X coordinate to alpha channel and Y coordinate to green channel.
 			rgba.swizzleDXT5n();
 
-			//QuickCompress::compressDXT5(rgba, &block);	// @@ Use fast version!!
-			nv::compressBlock_BoundsRange(rgba, &block);
+			QuickCompress::compressDXT5(rgba, &block, 0);
 			
 			if (outputOptions.outputHandler != NULL) {
 				outputOptions.outputHandler->writeData(&block, sizeof(block));
@@ -219,7 +204,7 @@ void nv::compressDXT1(const Image * image, const OutputOptions::Private & output
 			
 			if (rgba.isSingleColor())
 			{
-				QuickCompress::compressDXT1(rgba.color(0), &block);
+				OptimalCompress::compressDXT1(rgba.color(0), &block);
 			}
 			else
 			{
@@ -254,7 +239,7 @@ void nv::compressDXT1a(const Image * image, const OutputOptions::Private & outpu
 			
 			if (rgba.isSingleColor())
 			{
-				QuickCompress::compressDXT1a(rgba.color(0), &block);
+				OptimalCompress::compressDXT1a(rgba.color(0), &block);
 			}
 			else
 			{
@@ -288,7 +273,7 @@ void nv::compressDXT3(const Image * image, const OutputOptions::Private & output
 			rgba.init(image, x, y);
 			
 			// Compress explicit alpha.
-			QuickCompress::compressDXT3A(rgba, &block.alpha);
+			OptimalCompress::compressDXT3A(rgba, &block.alpha);
 			
 			// Compress color.
 			squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
@@ -321,7 +306,7 @@ void nv::compressDXT5(const Image * image, const OutputOptions::Private & output
 			// Compress alpha.
 			if (compressionOptions.quality == Quality_Highest)
 			{
-				compressBlock_BruteForce(rgba, &block.alpha);
+				OptimalCompress::compressDXT5A(rgba, &block.alpha);
 			}
 			else
 			{
@@ -360,7 +345,7 @@ void nv::compressDXT5n(const Image * image, const OutputOptions::Private & outpu
 			// Compress X.
 			if (compressionOptions.quality == Quality_Highest)
 			{
-				compressBlock_BruteForce(rgba, &block.alpha);
+				OptimalCompress::compressDXT5A(rgba, &block.alpha);
 			}
 			else
 			{
@@ -368,7 +353,7 @@ void nv::compressDXT5n(const Image * image, const OutputOptions::Private & outpu
 			}
 			
 			// Compress Y.
-			QuickCompress::compressDXT1G(rgba, &block.color);
+			OptimalCompress::compressDXT1G(rgba, &block.color);
 			
 			if (outputOptions.outputHandler != NULL) {
 				outputOptions.outputHandler->writeData(&block, sizeof(block));
@@ -393,7 +378,7 @@ void nv::compressBC4(const Image * image, const nvtt::OutputOptions::Private & o
 
 			if (compressionOptions.quality == Quality_Highest)
 			{
-				compressBlock_BruteForce(rgba, &block);
+				OptimalCompress::compressDXT5A(rgba, &block);
 			}
 			else
 			{
@@ -429,8 +414,8 @@ void nv::compressBC5(const Image * image, const nvtt::OutputOptions::Private & o
 
 			if (compressionOptions.quality == Quality_Highest)
 			{
-				compressBlock_BruteForce(xcolor, &block.x);
-				compressBlock_BruteForce(ycolor, &block.y);
+				OptimalCompress::compressDXT5A(xcolor, &block.x);
+				OptimalCompress::compressDXT5A(ycolor, &block.y);
 			}
 			else
 			{
diff --git a/src/nvtt/Compressor.cpp b/src/nvtt/Compressor.cpp
index c203700..788724f 100644
--- a/src/nvtt/Compressor.cpp
+++ b/src/nvtt/Compressor.cpp
@@ -41,7 +41,6 @@
 #include "OutputOptions.h"
 
 #include "CompressDXT.h"
-#include "FastCompressDXT.h"
 #include "CompressRGB.h"
 #include "cuda/CudaUtils.h"
 #include "cuda/CudaCompressDXT.h"
diff --git a/src/nvtt/FastCompressDXT.cpp b/src/nvtt/FastCompressDXT.cpp
deleted file mode 100644
index e27dd68..0000000
--- a/src/nvtt/FastCompressDXT.cpp
+++ /dev/null
@@ -1,456 +0,0 @@
-// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
-// 
-// Permission is hereby granted, free of charge, to any person
-// obtaining a copy of this software and associated documentation
-// files (the "Software"), to deal in the Software without
-// restriction, including without limitation the rights to use,
-// copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following
-// conditions:
-// 
-// The above copyright notice and this permission notice shall be
-// included in all copies or substantial portions of the Software.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-// OTHER DEALINGS IN THE SOFTWARE.
-
-#include <nvmath/Color.h>
-#include <nvimage/ColorBlock.h>
-#include <nvimage/BlockDXT.h>
-
-#include "FastCompressDXT.h"
-
-#if defined(__SSE2__)
-#include <emmintrin.h>
-#endif
-
-#if defined(__SSE__)
-#include <xmmintrin.h>
-#endif
-
-#if defined(__MMX__)
-#include <mmintrin.h>
-#endif
-
-#undef __VEC__
-#if defined(__VEC__)
-#include <altivec.h>
-#undef bool
-#endif
-// Online Resources:
-// - http://www.jasondorie.com/ImageLib.zip
-// - http://homepage.hispeed.ch/rscheidegger/dri_experimental/s3tc_index.html
-// - http://www.sjbrown.co.uk/?article=dxt
-
-using namespace nv;
-
-
-#if defined(__SSE2__) && 0
-
-// @@ TODO
-
-typedef __m128i VectorColor;
-
-inline static __m128i loadColor(Color32 c)
-{
-	return ...;
-}
-
-inline static __m128i absoluteDifference(__m128i a, __m128i b)
-{
-	return ...;
-}
-	
-inline uint colorDistance(__m128i a, __m128i b)
-{
-	return 0;
-}
-
-#elif defined(__MMX__) && 0
-
-typedef __m64 VectorColor;
-
-inline static __m64 loadColor(Color32 c)
-{
-	return _mm_unpacklo_pi8(_mm_cvtsi32_si64(c), _mm_setzero_si64());
-}
-
-inline static __m64 absoluteDifference(__m64 a, __m64 b)
-{
-	// = |a-b| or |b-a|
-	return _mm_or_si64(_mm_subs_pu16(a, b), _mm_subs_pu16(b, a));
-}
-	
-inline uint colorDistance(__m64 a, __m64 b)
-{
-	union {
-		__m64 v;
-		uint16 part[4];
-	} s;
-	
-	s.v = absoluteDifference(a, b);
-		
-	// @@ This is very slow!
-	return s.part[0] + s.part[1] + s.part[2] + s.part[3];
-}
-
-#define vectorEnd	_mm_empty
-
-#elif defined(__VEC__)
-
-typedef vector signed int VectorColor;
-
-inline static vector signed int loadColor(Color32 c)
-{
-	return (vector signed int) (c.r, c.g, c.b, c.a);
-}
-
-// Get the absolute distance between the given colors.
-inline static uint colorDistance(vector signed int c0, vector signed int c1)
-{
-	int result;
-	vector signed int v = vec_sums(vec_abs(vec_sub(c0, c1)), (vector signed int)0);
-	vec_ste(vec_splat(v, 3), 0, &result);
-	return result;
-}
-
-inline void vectorEnd()
-{
-}
-
-#else
-
-typedef Color32 VectorColor;
-
-inline static Color32 loadColor(Color32 c)
-{
-	return c;
-}
-
-inline static Color32 premultiplyAlpha(Color32 c)
-{
-	Color32 pm;
-	pm.r = (c.r * c.a) >> 8;
-	pm.g = (c.g * c.a) >> 8;
-	pm.b = (c.b * c.a) >> 8;
-	pm.a = c.a;
-	return pm;
-}
-
-inline static uint sqr(uint s)
-{
-	return s*s;
-}
-
-// Get the absolute distance between the given colors.
-inline static uint colorDistance(Color32 c0, Color32 c1)
-{
-	return sqr(c0.r - c1.r) + sqr(c0.g - c1.g) + sqr(c0.b - c1.b);
-	//return abs(c0.r - c1.r) + abs(c0.g - c1.g) + abs(c0.b - c1.b);
-}
-
-inline void vectorEnd()
-{
-}
-
-#endif
-
-
-inline static uint computeIndices(const ColorBlock & rgba, const Color32 palette[4])
-{
-	const VectorColor vcolor0 = loadColor(palette[0]);
-	const VectorColor vcolor1 = loadColor(palette[1]);
-	const VectorColor vcolor2 = loadColor(palette[2]);
-	const VectorColor vcolor3 = loadColor(palette[3]);
-	
-	uint indices = 0;
-	for(int i = 0; i < 16; i++) {
-		const VectorColor vcolor = loadColor(rgba.color(i));
-		
-		uint d0 = colorDistance(vcolor0, vcolor);
-		uint d1 = colorDistance(vcolor1, vcolor);
-		uint d2 = colorDistance(vcolor2, vcolor);
-		uint d3 = colorDistance(vcolor3, vcolor);
-		
-		uint b0 = d0 > d3;
-		uint b1 = d1 > d2;
-		uint b2 = d0 > d2;
-		uint b3 = d1 > d3;
-		uint b4 = d2 > d3;
-		
-		uint x0 = b1 & b2;
-		uint x1 = b0 & b3;
-		uint x2 = b0 & b4;
-		
-		indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
-	}
-
-	vectorEnd();
-	return indices;
-}
-
-
-// Compressor that uses bounding box.
-void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT1 * block)
-{
-	Color32 c0, c1;
-	rgba.boundsRange(&c1, &c0);
-	
-	block->col0 = toColor16(c0);
-	block->col1 = toColor16(c1);
-	
-	nvDebugCheck(block->col0.u > block->col1.u);
-	
-	// Use 4 color mode only.
-	//if (block->col0.u < block->col1.u) {
-	//	swap(block->col0.u, block->col1.u);
-	//}
-	
-	Color32 palette[4];
-	block->evaluatePalette4(palette);
-	
-	block->indices = computeIndices(rgba, palette);
-}
-
-
-
-
-
-
-
-
-// Encode DXT3 block.
-void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT3 * block)
-{
-	compressBlock_BoundsRange(rgba, &block->color);
-	compressBlock(rgba, &block->alpha);
-}
-
-// Encode DXT3 alpha block.
-void nv::compressBlock(const ColorBlock & rgba, AlphaBlockDXT3 * block)
-{
-	block->alpha0 = rgba.color(0).a >> 4;
-	block->alpha1 = rgba.color(1).a >> 4;
-	block->alpha2 = rgba.color(2).a >> 4;
-	block->alpha3 = rgba.color(3).a >> 4;
-	block->alpha4 = rgba.color(4).a >> 4;
-	block->alpha5 = rgba.color(5).a >> 4;
-	block->alpha6 = rgba.color(6).a >> 4;
-	block->alpha7 = rgba.color(7).a >> 4;
-	block->alpha8 = rgba.color(8).a >> 4;
-	block->alpha9 = rgba.color(9).a >> 4;
-	block->alphaA = rgba.color(10).a >> 4;
-	block->alphaB = rgba.color(11).a >> 4;
-	block->alphaC = rgba.color(12).a >> 4;
-	block->alphaD = rgba.color(13).a >> 4;
-	block->alphaE = rgba.color(14).a >> 4;
-	block->alphaF = rgba.color(15).a >> 4;
-}
-
-
-
-static uint computeAlphaIndices(const ColorBlock & rgba, AlphaBlockDXT5 * block)
-{
-	uint8 alphas[8];
-	block->evaluatePalette(alphas);
-
-	uint totalError = 0;
-
-	for (uint i = 0; i < 16; i++)
-	{
-		uint8 alpha = rgba.color(i).a;
-
-		uint besterror = 256*256;
-		uint best = 8;
-		for(uint p = 0; p < 8; p++)
-		{
-			int d = alphas[p] - alpha;
-			uint error = d * d;
-
-			if (error < besterror)
-			{
-				besterror = error;
-				best = p;
-			}
-		}
-		nvDebugCheck(best < 8);
-
-		totalError += besterror;
-		block->setIndex(i, best);
-	}
-
-	return totalError;
-}
-
-static uint computeAlphaError(const ColorBlock & rgba, const AlphaBlockDXT5 * block)
-{
-	uint8 alphas[8];
-	block->evaluatePalette(alphas);
-
-	uint totalError = 0;
-
-	for (uint i = 0; i < 16; i++)
-	{
-		uint8 alpha = rgba.color(i).a;
-
-		uint besterror = 256*256;
-		uint best;
-		for(uint p = 0; p < 8; p++)
-		{
-			int d = alphas[p] - alpha;
-			uint error = d * d;
-
-			if (error < besterror)
-			{
-				besterror = error;
-				best = p;
-			}
-		}
-
-		totalError += besterror;
-	}
-
-	return totalError;
-}
-
-
-void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT5 * block)
-{
-	Color32 c0, c1;
-	rgba.boundsRangeAlpha(&c1, &c0);
-	
-	block->color.col0 = toColor16(c0);
-	block->color.col1 = toColor16(c1);
-	
-	nvDebugCheck(block->color.col0.u > block->color.col1.u);
-	
-	Color32 palette[4];
-	block->color.evaluatePalette4(palette);
-	
-	block->color.indices = computeIndices(rgba, palette);
-	
-	nvDebugCheck(c0.a <= c1.a);
-	
-	block->alpha.alpha0 = c0.a;
-	block->alpha.alpha1 = c1.a;
-	
-	computeAlphaIndices(rgba, &block->alpha);
-}
-
-
-uint nv::compressBlock_BoundsRange(const ColorBlock & rgba, AlphaBlockDXT5 * block)
-{
-	uint8 alpha0 = 0;
-	uint8 alpha1 = 255;
-
-	// Get min/max alpha.
-	for (uint i = 0; i < 16; i++)
-	{
-		uint8 alpha = rgba.color(i).a;
-		alpha0 = max(alpha0, alpha);
-		alpha1 = min(alpha1, alpha);
-	}
-
-	alpha0 = alpha0 - (alpha0 - alpha1) / 32;
-	alpha1 = alpha1 + (alpha0 - alpha1) / 32;
-
-	AlphaBlockDXT5 block0;
-	block0.alpha0 = alpha0;
-	block0.alpha1 = alpha1;
-	uint error0 = computeAlphaIndices(rgba, &block0);
-
-	AlphaBlockDXT5 block1;
-	block1.alpha0 = alpha1;
-	block1.alpha1 = alpha0;
-	uint error1 = computeAlphaIndices(rgba, &block1);
-
-	if (error0 < error1)
-	{
-		*block = block0;
-		return error0;
-	}
-	else
-	{
-		*block = block1;
-		return error1;
-	}
-}
-
-uint nv::compressBlock_BruteForce(const ColorBlock & rgba, AlphaBlockDXT5 * block)
-{
-	uint8 mina = 255;
-	uint8 maxa = 0;
-
-	// Get min/max alpha.
-	for (uint i = 0; i < 16; i++)
-	{
-		uint8 alpha = rgba.color(i).a;
-		mina = min(mina, alpha);
-		maxa = max(maxa, alpha);
-	}
-
-	block->alpha0 = maxa;
-	block->alpha1 = mina;
-
-	/*int centroidDist = 256;
-	int centroid;
-
-	// Get the closest to the centroid.
-	for (uint i = 0; i < 16; i++)
-	{
-		uint8 alpha = rgba.color(i).a;
-		int dist = abs(alpha - (maxa + mina) / 2);
-		if (dist < centroidDist)
-		{
-			centroidDist = dist;
-			centroid = alpha;
-		}
-	}*/
-
-	if (maxa - mina > 8)
-	{
-		int besterror = computeAlphaError(rgba, block);
-		int besta0 = maxa;
-		int besta1 = mina;
-
-		for (int a0 = mina+9; a0 < maxa; a0++)
-		{
-			for (int a1 = mina; a1 < a0-8; a1++)
-			//for (int a1 = mina; a1 < maxa; a1++)
-			{
-				//nvCheck(abs(a1-a0) > 8);
-
-				//if (abs(a0 - a1) < 8) continue;
-				//if ((maxa-a0) + (a1-mina) + min(abs(centroid-a0), abs(centroid-a1)) > besterror)
-				if ((maxa-a0) + (a1-mina) > besterror)
-					continue;
-
-				block->alpha0 = a0;
-				block->alpha1 = a1;
-				int error = computeAlphaError(rgba, block);
-
-				if (error < besterror)
-				{
-					besterror = error;
-					besta0 = a0;
-					besta1 = a1;
-				}
-			}
-		}
-
-		block->alpha0 = besta0;
-		block->alpha1 = besta1;
-	}
-
-	return computeAlphaIndices(rgba, block);
-}
-
-
-
diff --git a/src/nvtt/FastCompressDXT.h b/src/nvtt/FastCompressDXT.h
deleted file mode 100644
index 4eca83d..0000000
--- a/src/nvtt/FastCompressDXT.h
+++ /dev/null
@@ -1,84 +0,0 @@
-// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
-// 
-// Permission is hereby granted, free of charge, to any person
-// obtaining a copy of this software and associated documentation
-// files (the "Software"), to deal in the Software without
-// restriction, including without limitation the rights to use,
-// copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following
-// conditions:
-// 
-// The above copyright notice and this permission notice shall be
-// included in all copies or substantial portions of the Software.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-// OTHER DEALINGS IN THE SOFTWARE.
-
-#ifndef NV_TT_FASTCOMPRESSDXT_H
-#define NV_TT_FASTCOMPRESSDXT_H
-
-#include <nvimage/nvimage.h>
-
-namespace nv
-{
-	struct ColorBlock;
-	struct BlockDXT1;
-	struct BlockDXT3;
-	struct BlockDXT5;
-	struct AlphaBlockDXT3;
-	struct AlphaBlockDXT5;
-
-	// Color compression:
-
-	// Compressor that uses the extremes of the luminance axis.
-//	void compressBlock_DiameterAxis(const ColorBlock & rgba, BlockDXT1 * block);
-
-	// Compressor that uses the extremes of the luminance axis.
-//	void compressBlock_LuminanceAxis(const ColorBlock & rgba, BlockDXT1 * block);
-
-	// Compressor that uses bounding box.
-	void compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT1 * block);
-
-	// Compressor that uses bounding box and takes alpha into account.
-//	void compressBlock_BoundsRangeAlpha(const ColorBlock & rgba, BlockDXT1 * block);
-
-
-	// Simple, but slow compressor that tests all color pairs.
-//	void compressBlock_TestAllPairs(const ColorBlock & rgba, BlockDXT1 * block);
-	
-	// Brute force 6d search along the best fit axis.
-//	void compressBlock_AnalyzeBestFitAxis(const ColorBlock & rgba, BlockDXT1 * block);
-
-	// Spatial greedy search.
-//	void refineSolution_1dSearch(const ColorBlock & rgba, BlockDXT1 * block);
-//	void refineSolution_3dSearch(const ColorBlock & rgba, BlockDXT1 * block);
-//	void refineSolution_6dSearch(const ColorBlock & rgba, BlockDXT1 * block);
-	
-	// Brute force compressor for DXT5n
-//	void compressGreenBlock_BruteForce(const ColorBlock & rgba, BlockDXT1 * block);
-
-	// Minimize error of the endpoints.
-//	void optimizeEndPoints(const ColorBlock & rgba, BlockDXT1 * block);
-	
-//	uint blockError(const ColorBlock & rgba, const BlockDXT1 & block);
-//	uint blockError(const ColorBlock & rgba, const AlphaBlockDXT5 & block);
-
-	// Alpha compression:
-	void compressBlock(const ColorBlock & rgba, AlphaBlockDXT3 * block);
-	void compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT3 * block);
-	void compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT5 * block);
-
-	uint compressBlock_BoundsRange(const ColorBlock & rgba, AlphaBlockDXT5 * block);
-	uint compressBlock_BruteForce(const ColorBlock & rgba, AlphaBlockDXT5 * block);
-//	uint compressBlock_Iterative(const ColorBlock & rgba, AlphaBlockDXT5 * block);
-
-} // nv namespace
-
-#endif // NV_TT_FASTCOMPRESSDXT_H
diff --git a/src/nvtt/QuickCompressDXT.cpp b/src/nvtt/QuickCompressDXT.cpp
index 23cff55..bfd7ee4 100644
--- a/src/nvtt/QuickCompressDXT.cpp
+++ b/src/nvtt/QuickCompressDXT.cpp
@@ -29,7 +29,7 @@
 #include <nvimage/BlockDXT.h>
 
 #include "QuickCompressDXT.h"
-#include "SingleColorLookup.h"
+#include "OptimalCompressDXT.h"
 
 
 using namespace nv;
@@ -290,70 +290,6 @@ static void optimizeEndPoints4(Vector3 block[16], BlockDXT1 * dxtBlock)
 	dxtBlock->indices = computeIndices3(block, a, b);
 }*/
 
-namespace
-{
-	static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block)
-	{
-		nvDebugCheck(block != NULL);
-
-		int palette[4];
-		palette[0] = (block->col0.g << 2) | (block->col0.g >> 4);
-		palette[1] = (block->col1.g << 2) | (block->col1.g >> 4);
-		palette[2] = (2 * palette[0] + palette[1]) / 3;
-		palette[3] = (2 * palette[1] + palette[0]) / 3;
-
-		int totalError = 0;
-
-		for (int i = 0; i < 16; i++)
-		{
-			const int green = rgba.color(i).g;
-			
-			int error = abs(green - palette[0]);
-			error = min(error, abs(green - palette[1]));
-			error = min(error, abs(green - palette[2]));
-			error = min(error, abs(green - palette[3]));
-			
-			totalError += error;
-		}
-
-		return totalError;
-	}
-
-	static uint computeGreenIndices(const ColorBlock & rgba, const Color32 palette[4])
-	{
-		const int color0 = palette[0].g;
-		const int color1 = palette[1].g;
-		const int color2 = palette[2].g;
-		const int color3 = palette[3].g;
-		
-		uint indices = 0;
-		for (int i = 0; i < 16; i++)
-		{
-			const int color = rgba.color(i).g;
-			
-			uint d0 = abs(color0 - color);
-			uint d1 = abs(color1 - color);
-			uint d2 = abs(color2 - color);
-			uint d3 = abs(color3 - color);
-			
-			uint b0 = d0 > d3;
-			uint b1 = d1 > d2;
-			uint b2 = d0 > d2;
-			uint b3 = d1 > d3;
-			uint b4 = d2 > d3;
-			
-			uint x0 = b1 & b2;
-			uint x1 = b0 & b3;
-			uint x2 = b0 & b4;
-			
-			indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
-		}
-
-		return indices;
-	}
-
-} // namespace
-
 namespace
 {
 
@@ -505,79 +441,53 @@ namespace
 
 
 
-
-
-// Single color compressor, based on:
-// https://mollyrocket.com/forums/viewtopic.php?t=392
-void QuickCompress::compressDXT1(Color32 c, BlockDXT1 * dxtBlock)
-{
-	dxtBlock->col0.r = OMatch5[c.r][0];
-	dxtBlock->col0.g = OMatch6[c.g][0];
-	dxtBlock->col0.b = OMatch5[c.b][0];
-	dxtBlock->col1.r = OMatch5[c.r][1];
-	dxtBlock->col1.g = OMatch6[c.g][1];
-	dxtBlock->col1.b = OMatch5[c.b][1];
-	dxtBlock->indices = 0xaaaaaaaa;
-
-	if (dxtBlock->col0.u < dxtBlock->col1.u)
-	{
-		swap(dxtBlock->col0.u, dxtBlock->col1.u);
-		dxtBlock->indices ^= 0x55555555;
-	}
-}
-
 void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
 {
-	// read block
-	Vector3 block[16];
-	extractColorBlockRGB(rgba, block);
-	
-	// find min and max colors
-	Vector3 maxColor, minColor;
-	findMinMaxColorsBox(block, 16, &maxColor, &minColor);
-	
-	selectDiagonal(block, 16, &maxColor, &minColor);
-	
-	insetBBox(&maxColor, &minColor);
-	
-	uint16 color0 = roundAndExpand(&maxColor);
-	uint16 color1 = roundAndExpand(&minColor);
-
-	if (color0 < color1)
+	if (rgba.isSingleColor())
 	{
-		swap(maxColor, minColor);
-		swap(color0, color1);
-	}
-
-	dxtBlock->col0 = Color16(color0);
-	dxtBlock->col1 = Color16(color1);
-	dxtBlock->indices = computeIndices4(block, maxColor, minColor);
-
-	optimizeEndPoints4(block, dxtBlock);
-}
-
-
-void QuickCompress::compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock)
-{
-	if (rgba.a == 0)
-	{
-		dxtBlock->col0.u = 0;
-		dxtBlock->col1.u = 0;
-		dxtBlock->indices = 0xFFFFFFFF;
+		OptimalCompress::compressDXT1(rgba.color(0), dxtBlock);
 	}
 	else
 	{
-		compressDXT1(rgba, dxtBlock);
+		// read block
+		Vector3 block[16];
+		extractColorBlockRGB(rgba, block);
+		
+		// find min and max colors
+		Vector3 maxColor, minColor;
+		findMinMaxColorsBox(block, 16, &maxColor, &minColor);
+		
+		selectDiagonal(block, 16, &maxColor, &minColor);
+		
+		insetBBox(&maxColor, &minColor);
+		
+		uint16 color0 = roundAndExpand(&maxColor);
+		uint16 color1 = roundAndExpand(&minColor);
+
+		if (color0 < color1)
+		{
+			swap(maxColor, minColor);
+			swap(color0, color1);
+		}
+
+		dxtBlock->col0 = Color16(color0);
+		dxtBlock->col1 = Color16(color1);
+		dxtBlock->indices = computeIndices4(block, maxColor, minColor);
+
+		optimizeEndPoints4(block, dxtBlock);
 	}
 }
 
+
 void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
 {
 	if (!rgba.hasAlpha())
 	{
 		compressDXT1(rgba, dxtBlock);
 	}
-	else
+	// @@ Handle single RGB, with varying alpha? We need tables for single color compressor in 3 color mode.
+	//else if (rgba.isSingleColorNoAlpha()) { ... }
+	else 
 	{
 		// read block
 		Vector3 block[16];
@@ -609,95 +519,14 @@ void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
 }
 
 
-// Brute force green channel compressor
-void QuickCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block)
-{
-	nvDebugCheck(block != NULL);
-	
-	uint8 ming = 63;
-	uint8 maxg = 0;
-	
-	// Get min/max green.
-	for (uint i = 0; i < 16; i++)
-	{
-		uint8 green = rgba.color(i).g >> 2;
-		ming = min(ming, green);
-		maxg = max(maxg, green);
-	}
-
-	block->col0.r = 31;
-	block->col1.r = 31;
-	block->col0.g = maxg;
-	block->col1.g = ming;
-	block->col0.b = 0;
-	block->col1.b = 0;
-
-	if (maxg - ming > 4)
-	{
-		int besterror = computeGreenError(rgba, block);
-		int bestg0 = maxg;
-		int bestg1 = ming;
-		
-		for (int g0 = ming+5; g0 < maxg; g0++)
-		{
-			for (int g1 = ming; g1 < g0-4; g1++)
-			{
-				if ((maxg-g0) + (g1-ming) > besterror)
-					continue;
-				
-				block->col0.g = g0;
-				block->col1.g = g1;
-				int error = computeGreenError(rgba, block);
-				
-				if (error < besterror)
-				{
-					besterror = error;
-					bestg0 = g0;
-					bestg1 = g1;
-				}
-			}
-		}
-		
-		block->col0.g = bestg0;
-		block->col1.g = bestg1;
-	}
-	
-	Color32 palette[4];
-	block->evaluatePalette(palette);
-	block->indices = computeGreenIndices(rgba, palette);
-}
-
-
-
-void QuickCompress::compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock)
-{
-	// @@ Round instead of truncate. When rounding take into account bit expansion.
-	dxtBlock->alpha0 = rgba.color(0).a >> 4;
-	dxtBlock->alpha1 = rgba.color(1).a >> 4;
-	dxtBlock->alpha2 = rgba.color(2).a >> 4;
-	dxtBlock->alpha3 = rgba.color(3).a >> 4;
-	dxtBlock->alpha4 = rgba.color(4).a >> 4;
-	dxtBlock->alpha5 = rgba.color(5).a >> 4;
-	dxtBlock->alpha6 = rgba.color(6).a >> 4;
-	dxtBlock->alpha7 = rgba.color(7).a >> 4;
-	dxtBlock->alpha8 = rgba.color(8).a >> 4;
-	dxtBlock->alpha9 = rgba.color(9).a >> 4;
-	dxtBlock->alphaA = rgba.color(10).a >> 4;
-	dxtBlock->alphaB = rgba.color(11).a >> 4;
-	dxtBlock->alphaC = rgba.color(12).a >> 4;
-	dxtBlock->alphaD = rgba.color(13).a >> 4;
-	dxtBlock->alphaE = rgba.color(14).a >> 4;
-	dxtBlock->alphaF = rgba.color(15).a >> 4;
-}
-
 void QuickCompress::compressDXT3(const ColorBlock & rgba, BlockDXT3 * dxtBlock)
 {
 	compressDXT1(rgba, &dxtBlock->color);
-	compressDXT3A(rgba, &dxtBlock->alpha);
+	OptimalCompress::compressDXT3A(rgba, &dxtBlock->alpha);
 }
 
 
-void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock)
+void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock, int iterationCount/*=8*/)
 {
 	uint8 alpha0 = 0;
 	uint8 alpha1 = 255;
@@ -716,8 +545,8 @@ void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtB
 	uint besterror = computeAlphaIndices(rgba, &block);
 	
 	AlphaBlockDXT5 bestblock = block;
-	
-	while(true)
+
+	for (int i = 0; i < iterationCount; i++)
 	{
 		optimizeAlpha8(rgba, &block);
 		uint error = computeAlphaIndices(rgba, &block);
@@ -741,9 +570,8 @@ void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtB
 	*dxtBlock = bestblock;
 }
 
-void QuickCompress::compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock)
+void QuickCompress::compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock, int iterationCount/*=8*/)
 {
 	compressDXT1(rgba, &dxtBlock->color);
-	compressDXT5A(rgba, &dxtBlock->alpha);
+	compressDXT5A(rgba, &dxtBlock->alpha, iterationCount);
 }
-
diff --git a/src/nvtt/QuickCompressDXT.h b/src/nvtt/QuickCompressDXT.h
index dd11616..d5d262d 100644
--- a/src/nvtt/QuickCompressDXT.h
+++ b/src/nvtt/QuickCompressDXT.h
@@ -37,17 +37,13 @@ namespace nv
 
 	namespace QuickCompress
 	{
-		void compressDXT1(Color32 rgba, BlockDXT1 * dxtBlock);
 		void compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock);
-		void compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock);
 		void compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock);
-		void compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block);
 		
-		void compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock);
 		void compressDXT3(const ColorBlock & rgba, BlockDXT3 * dxtBlock);
 		
-		void compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock);
-		void compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock);
+		void compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock, int iterationCount=8);
+		void compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock, int iterationCount=8);
 	}
 } // nv namespace