Tag 2.0.6 for release.

2009-03-19 19:06:30 +00:00
parent 820eb374d5
commit de8f0153c0
306 changed files with 9379 additions and 20606 deletions
--- a/src/nvtt/CMakeLists.txt
+++ b/src/nvtt/CMakeLists.txt
@ -5,8 +5,8 @@ ADD_SUBDIRECTORY(squish)
 SET(NVTT_SRCS
 	nvtt.h 
 	nvtt.cpp
-	Context.h
-	Context.cpp
+	Compressor.h
+	Compressor.cpp
 	nvtt_wrapper.h
 	nvtt_wrapper.cpp
 	CompressDXT.h
@ -24,7 +24,6 @@ SET(NVTT_SRCS
 	InputOptions.cpp
 	OutputOptions.h
 	OutputOptions.cpp
-	Texture.h Texture.cpp
 	cuda/CudaUtils.h
 	cuda/CudaUtils.cpp
 	cuda/CudaMath.h
@ -44,7 +43,8 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})

 ADD_DEFINITIONS(-DNVTT_EXPORTS)

-IF(NVTT_SHARED)	
+IF(NVTT_SHARED)
+	ADD_DEFINITIONS(-DNVTT_SHARED=1)
 	ADD_LIBRARY(nvtt SHARED ${NVTT_SRCS})
 ELSE(NVTT_SHARED)
 	ADD_LIBRARY(nvtt ${NVTT_SRCS})
@ -60,5 +60,54 @@ INSTALL(TARGETS nvtt
 INSTALL(FILES nvtt.h DESTINATION include/nvtt)


-ADD_SUBDIRECTORY(tools)
-ADD_SUBDIRECTORY(tests)
+
+# test executables
+ADD_EXECUTABLE(nvcompress tools/compress.cpp tools/cmdline.h)
+TARGET_LINK_LIBRARIES(nvcompress nvcore nvmath nvimage nvtt)
+
+ADD_EXECUTABLE(nvdecompress tools/decompress.cpp tools/cmdline.h)
+TARGET_LINK_LIBRARIES(nvdecompress nvcore nvmath nvimage)
+
+ADD_EXECUTABLE(nvddsinfo tools/ddsinfo.cpp tools/cmdline.h)
+TARGET_LINK_LIBRARIES(nvddsinfo nvcore nvmath nvimage)
+
+ADD_EXECUTABLE(nvimgdiff tools/imgdiff.cpp tools/cmdline.h)
+TARGET_LINK_LIBRARIES(nvimgdiff nvcore nvmath nvimage)
+
+ADD_EXECUTABLE(nvassemble tools/assemble.cpp tools/cmdline.h)
+TARGET_LINK_LIBRARIES(nvassemble nvcore nvmath nvimage)
+
+ADD_EXECUTABLE(filtertest tests/filtertest.cpp tools/cmdline.h)
+TARGET_LINK_LIBRARIES(filtertest nvcore nvmath nvimage)
+
+ADD_EXECUTABLE(nvzoom tools/resize.cpp tools/cmdline.h)
+TARGET_LINK_LIBRARIES(nvzoom nvcore nvmath nvimage)
+
+INSTALL(TARGETS nvcompress nvdecompress nvddsinfo nvimgdiff nvassemble nvzoom DESTINATION bin)
+
+# UI tools
+IF(QT4_FOUND AND NOT MSVC)
+	SET(QT_USE_QTOPENGL TRUE)
+	INCLUDE_DIRECTORIES(${QT_INCLUDE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
+	
+	SET(SRCS
+		tools/main.cpp
+		tools/configdialog.h
+		tools/configdialog.cpp)
+
+	SET(LIBS
+		nvtt
+		${QT_QTCORE_LIBRARY}
+		${QT_QTGUI_LIBRARY}
+		${QT_QTOPENGL_LIBRARY})
+
+	QT4_WRAP_UI(UICS tools/configdialog.ui)
+	QT4_WRAP_CPP(MOCS tools/configdialog.h)
+	#QT4_ADD_RESOURCES(RCCS tools/configdialog.rc)
+
+	ADD_EXECUTABLE(nvcompressui MACOSX_BUNDLE ${SRCS} ${UICS} ${MOCS})
+	TARGET_LINK_LIBRARIES(nvcompressui ${LIBS})
+
+ENDIF(QT4_FOUND AND NOT MSVC)
+
+
--- a/src/nvtt/CompressDXT.cpp
+++ b/src/nvtt/CompressDXT.cpp
@ -21,6 +21,13 @@
 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 // OTHER DEALINGS IN THE SOFTWARE.

+#include <nvcore/Memory.h>
+
+#include <nvimage/Image.h>
+#include <nvimage/ColorBlock.h>
+#include <nvimage/BlockDXT.h>
+
+#include "nvtt.h"
 #include "CompressDXT.h"
 #include "QuickCompressDXT.h"
 #include "OptimalCompressDXT.h"
@ -29,33 +36,22 @@

 // squish
 #include "squish/colourset.h"
+//#include "squish/clusterfit.h"
 #include "squish/fastclusterfit.h"
 #include "squish/weightedclusterfit.h"

-#include <nvtt/nvtt.h>
-
-#include <nvcore/Memory.h>
-
-#include <nvimage/Image.h>
-#include <nvimage/ColorBlock.h>
-#include <nvimage/BlockDXT.h>
-

 // s3_quant
 #if defined(HAVE_S3QUANT)
-#include "extern/s3tc/s3_quant.h"
+#include "s3tc/s3_quant.h"
 #endif

 // ati tc
 #if defined(HAVE_ATITC)
-#include "extern/atitc/ATI_Compress.h"
-#endif
-
-// squish
-#if defined(HAVE_SQUISH)
-#include "extern/squish/squish.h"
+#include "atitc/ATI_Compress.h"
 #endif

+//#include <time.h>

 using namespace nv;
 using namespace nvtt;
@ -209,9 +205,9 @@ void nv::SlowCompressor::compressDXT1(const CompressionOptions::Private & compre
 	ColorBlock rgba;
 	BlockDXT1 block;

-	nvsquish::WeightedClusterFit fit;
-	//nvsquish::ClusterFit fit;
-	//nvsquish::FastClusterFit fit;
+	//squish::WeightedClusterFit fit;
+	//squish::ClusterFit fit;
+	squish::FastClusterFit fit;
 	fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());

 	for (uint y = 0; y < h; y += 4) {
@ -225,8 +221,8 @@ void nv::SlowCompressor::compressDXT1(const CompressionOptions::Private & compre
 			}
 			else
 			{
-				nvsquish::ColourSet colours((uint8 *)rgba.colors(), 0, true);
-				fit.SetColourSet(&colours, nvsquish::kDxt1);
+				squish::ColourSet colours((uint8 *)rgba.colors(), 0);
+				fit.SetColourSet(&colours, squish::kDxt1);
 				fit.Compress(&block);
 			}
 			
@ -246,7 +242,7 @@ void nv::SlowCompressor::compressDXT1a(const CompressionOptions::Private & compr
 	ColorBlock rgba;
 	BlockDXT1 block;

-	nvsquish::WeightedClusterFit fit;
+	squish::WeightedClusterFit fit;
 	fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());

 	for (uint y = 0; y < h; y += 4) {
@ -269,8 +265,8 @@ void nv::SlowCompressor::compressDXT1a(const CompressionOptions::Private & compr
 			}
 			else
 			{
-				nvsquish::ColourSet colours((uint8 *)rgba.colors(), nvsquish::kDxt1|nvsquish::kWeightColourByAlpha);
-				fit.SetColourSet(&colours, nvsquish::kDxt1);
+				squish::ColourSet colours((uint8 *)rgba.colors(), squish::kDxt1|squish::kWeightColourByAlpha);
+				fit.SetColourSet(&colours, squish::kDxt1);
 				fit.Compress(&block);
 			}
 			
@ -290,15 +286,10 @@ void nv::SlowCompressor::compressDXT3(const CompressionOptions::Private & compre
 	ColorBlock rgba;
 	BlockDXT3 block;
 	
-	nvsquish::WeightedClusterFit fit;
+	squish::WeightedClusterFit fit;
+	//squish::FastClusterFit fit;
 	fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());

-    int flags = 0;
-    if (m_alphaMode == AlphaMode_Transparency)
-    {
-        flags = nvsquish::kWeightColourByAlpha;
-    }
-
 	for (uint y = 0; y < h; y += 4) {
 		for (uint x = 0; x < w; x += 4) {
 			
@ -314,7 +305,7 @@ void nv::SlowCompressor::compressDXT3(const CompressionOptions::Private & compre
 			}
 			else
 			{
-				nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
+				squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
 				fit.SetColourSet(&colours, 0);
 				fit.Compress(&block.color);
 			}
@ -334,15 +325,9 @@ void nv::SlowCompressor::compressDXT5(const CompressionOptions::Private & compre
 	ColorBlock rgba;
 	BlockDXT5 block;

-	nvsquish::WeightedClusterFit fit;
+	squish::WeightedClusterFit fit;
 	fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());

-    int flags = 0;
-    if (m_alphaMode == AlphaMode_Transparency)
-    {
-        flags = nvsquish::kWeightColourByAlpha;
-    }
-
 	for (uint y = 0; y < h; y += 4) {
 		for (uint x = 0; x < w; x += 4) {
 			
@ -365,7 +350,7 @@ void nv::SlowCompressor::compressDXT5(const CompressionOptions::Private & compre
 			}
 			else
 			{
-				nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
+				squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
 				fit.SetColourSet(&colours, 0);
 				fit.Compress(&block.color);
 			}
@ -386,9 +371,6 @@ void nv::SlowCompressor::compressDXT5n(const CompressionOptions::Private & compr
 	ColorBlock rgba;
 	BlockDXT5 block;
 	
-	nvsquish::WeightedClusterFit fit;
-	fit.SetMetric(0, 1, 0);
-
 	for (uint y = 0; y < h; y += 4) {
 		for (uint x = 0; x < w; x += 4) {
 			
@ -407,18 +389,7 @@ void nv::SlowCompressor::compressDXT5n(const CompressionOptions::Private & compr
 			}
 			
 			// Compress Y.
-			//OptimalCompress::compressDXT1G(rgba, &block.color);
-
-			/*if (rgba.isSingleColor())
-			{
-				OptimalCompress::compressDXT1G(rgba.color(0), &block.color);
-			}
-			else*/
-			{
-				nvsquish::ColourSet colours((uint8 *)rgba.colors(), 0);
-				fit.SetColourSet(&colours, 0);
-				fit.Compress(&block.color);
-			}
+			OptimalCompress::compressDXT1G(rgba, &block.color);
 			
 			if (outputOptions.outputHandler != NULL) {
 				outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -624,27 +595,3 @@ void nv::atiCompressDXT1(const Image * image, const OutputOptions::Private & out
 }

 #endif // defined(HAVE_ATITC)
-
-#if defined(HAVE_SQUISH)
-
-void nv::squishCompressDXT1(const Image * image, const OutputOptions::Private & outputOptions)
-{
-	Image img(*image);
-	int count = img.width() * img.height();
-	for (int i = 0; i < count; i++)
-	{
-		Color32 c = img.pixel(i);
-		img.pixel(i) = Color32(c.b, c.g, c.r, c.a);
-	}
-
-	int size = squish::GetStorageRequirements(img.width(), img.height(), squish::kDxt1);
-	void * blocks = malloc(size);
-
-	squish::CompressImage((const squish::u8 *)img.pixels(), img.width(), img.height(), blocks, squish::kDxt1 | squish::kColourClusterFit);
-
-	if (outputOptions.outputHandler != NULL) {
-		outputOptions.outputHandler->writeData(blocks, size);
-	}
-}
-
-#endif // defined(HAVE_SQUISH)
--- a/src/nvtt/CompressDXT.h
+++ b/src/nvtt/CompressDXT.h
@ -32,14 +32,14 @@ namespace nv
 	class Image;
 	class FloatImage;

-	class FastCompressor
-	{
-	public:
-		FastCompressor();
-		~FastCompressor();
+	class FastCompressor
+	{
+	public:
+		FastCompressor();
+		~FastCompressor();
+
+		void setImage(const Image * image, nvtt::AlphaMode alphaMode);

-		void setImage(const Image * image, nvtt::AlphaMode alphaMode);
-
 		void compressDXT1(const nvtt::OutputOptions::Private & outputOptions);
 		void compressDXT1a(const nvtt::OutputOptions::Private & outputOptions);
 		void compressDXT3(const nvtt::OutputOptions::Private & outputOptions);
@ -47,9 +47,9 @@ namespace nv
 		void compressDXT5n(const nvtt::OutputOptions::Private & outputOptions);

 	private:
-		const Image * m_image;
-		nvtt::AlphaMode m_alphaMode;
-	};
+		const Image * m_image;
+		nvtt::AlphaMode m_alphaMode;
+	};

 	class SlowCompressor
 	{
@ -68,7 +68,7 @@ namespace nv
 		void compressBC5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);

 	private:
-		const Image * m_image;
+		const Image * m_image;
 		nvtt::AlphaMode m_alphaMode;
 	};

@ -81,10 +81,6 @@ namespace nv
 	void atiCompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
 #endif

-#if defined(HAVE_SQUISH)
-	void squishCompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
-#endif
-
 } // nv namespace


--- a/src/nvtt/CompressRGB.cpp
+++ b/src/nvtt/CompressRGB.cpp
@ -21,19 +21,16 @@
 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 // OTHER DEALINGS IN THE SOFTWARE.

+#include <nvcore/Debug.h>
+
+#include <nvimage/Image.h>
+#include <nvimage/PixelFormat.h>
+#include <nvmath/Color.h>
+
 #include "CompressRGB.h"
 #include "CompressionOptions.h"
 #include "OutputOptions.h"

-#include <nvimage/Image.h>
-#include <nvimage/FloatImage.h>
-#include <nvimage/PixelFormat.h>
-
-#include <nvmath/Color.h>
-#include <nvmath/Half.h>
-
-#include <nvcore/Debug.h>
-
 using namespace nv;
 using namespace nvtt;

@ -69,53 +66,29 @@ void nv::compressRGB(const Image * image, const OutputOptions::Private & outputO
 	const uint w = image->width();
 	const uint h = image->height();

-	uint bitCount;
-	uint rmask, rshift, rsize;
-	uint gmask, gshift, gsize;
-	uint bmask, bshift, bsize;
-	uint amask, ashift, asize;
-
-	if (compressionOptions.bitcount != 0)
-	{
-		bitCount = compressionOptions.bitcount;
-		nvCheck(bitCount == 8 || bitCount == 16 || bitCount == 24 || bitCount == 32);
-
-		rmask = compressionOptions.rmask;
-		gmask = compressionOptions.gmask;
-		bmask = compressionOptions.bmask;
-		amask = compressionOptions.amask;
-
-		PixelFormat::maskShiftAndSize(rmask, &rshift, &rsize);
-		PixelFormat::maskShiftAndSize(gmask, &gshift, &gsize);
-		PixelFormat::maskShiftAndSize(bmask, &bshift, &bsize);
-		PixelFormat::maskShiftAndSize(amask, &ashift, &asize);
-	}
-	else
-	{
-		rsize = compressionOptions.rsize;
-		gsize = compressionOptions.gsize;
-		bsize = compressionOptions.bsize;
-		asize = compressionOptions.asize;
-
-		bitCount = rsize + gsize + bsize + asize;
-		nvCheck(bitCount <= 32);
-
-		ashift = 0;
-		bshift = ashift + asize;
-		gshift = bshift + bsize;
-		rshift = gshift + gsize;
-
-		rmask = ((1 << rsize) - 1) << rshift;
-		gmask = ((1 << gsize) - 1) << gshift;
-		bmask = ((1 << bsize) - 1) << bshift;
-		amask = ((1 << asize) - 1) << ashift;
-	}
+	const uint bitCount = compressionOptions.bitcount;
+	nvCheck(bitCount == 8 || bitCount == 16 || bitCount == 24 || bitCount == 32);

 	const uint byteCount = bitCount / 8;

+	const uint rmask = compressionOptions.rmask;
+	uint rshift, rsize;
+	PixelFormat::maskShiftAndSize(rmask, &rshift, &rsize);
+	
+	const uint gmask = compressionOptions.gmask;
+	uint gshift, gsize;
+	PixelFormat::maskShiftAndSize(gmask, &gshift, &gsize);
+	
+	const uint bmask = compressionOptions.bmask;
+	uint bshift, bsize;
+	PixelFormat::maskShiftAndSize(bmask, &bshift, &bsize);
+	
+	const uint amask = compressionOptions.amask;
+	uint ashift, asize;
+	PixelFormat::maskShiftAndSize(amask, &ashift, &asize);

 	// Determine pitch.
-	uint pitch = computePitch(w, bitCount);
+	uint pitch = computePitch(w, compressionOptions.bitcount);

 	uint8 * dst = (uint8 *)mem::malloc(pitch + 4);

@ -150,7 +123,7 @@ void nv::compressRGB(const Image * image, const OutputOptions::Private & outputO
 			}
 			
 			// Zero padding.
-			for (uint x = w; x < pitch; x++)
+			for (uint x = w * byteCount; x < pitch; x++)
 			{
 				*(dst + x) = 0;
 			}
@ -165,75 +138,3 @@ void nv::compressRGB(const Image * image, const OutputOptions::Private & outputO
 	mem::free(dst);
 }

-
-void nv::compressRGB(const FloatImage * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
-{
-	nvCheck(image != NULL);
-
-	const uint w = image->width();
-	const uint h = image->height();
-
-	const uint rsize = compressionOptions.rsize;
-	const uint gsize = compressionOptions.gsize;
-	const uint bsize = compressionOptions.bsize;
-	const uint asize = compressionOptions.asize;
-
-	nvCheck(rsize == 0 || rsize == 16 || rsize == 32);
-	nvCheck(gsize == 0 || gsize == 16 || gsize == 32);
-	nvCheck(bsize == 0 || bsize == 16 || bsize == 32);
-	nvCheck(asize == 0 || asize == 16 || asize == 32);
-
-	const uint bitCount = rsize + gsize + bsize + asize;
-	const uint byteCount = bitCount / 8;
-	const uint pitch = w * byteCount;
-
-	uint8 * dst = (uint8 *)mem::malloc(pitch);
-
-	for (uint y = 0; y < h; y++)
-	{
-		const float * rchannel = image->scanline(y, 0);
-		const float * gchannel = image->scanline(y, 1);
-		const float * bchannel = image->scanline(y, 2);
-		const float * achannel = image->scanline(y, 3);
-
-		union FLOAT
-		{
-			float f;
-			uint32 u;
-		};
-
-		uint8 * ptr = dst;
-
-		for (uint x = 0; x < w; x++)
-		{
-			FLOAT r, g, b, a;
-			r.f = rchannel[x];
-			g.f = gchannel[x];
-			b.f = bchannel[x];
-			a.f = achannel[x];
-
-			if (rsize == 32) *((uint32 *)ptr) = r.u;
-			else if (rsize == 16) *((uint16 *)ptr) = half_from_float(r.u);
-			ptr += rsize / 8;
-
-			if (gsize == 32) *((uint32 *)ptr) = g.u;
-			else if (gsize == 16) *((uint16 *)ptr) = half_from_float(g.u);
-			ptr += gsize / 8;
-
-			if (bsize == 32) *((uint32 *)ptr) = b.u;
-			else if (bsize == 16) *((uint16 *)ptr) = half_from_float(b.u);
-			ptr += bsize / 8;
-
-			if (asize == 32) *((uint32 *)ptr) = a.u;
-			else if (asize == 16) *((uint16 *)ptr) = half_from_float(a.u);
-			ptr += asize / 8;
-		}
-
-		if (outputOptions.outputHandler != NULL)
-		{
-			outputOptions.outputHandler->writeData(dst, pitch);
-		}
-	}
-
-	mem::free(dst);
-}
--- a/src/nvtt/CompressRGB.h
+++ b/src/nvtt/CompressRGB.h
@ -29,11 +29,9 @@
 namespace nv
 {
 	class Image;
-	class FloatImage;

 	// Pixel format converter.
 	void compressRGB(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
-	void compressRGB(const FloatImage * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
 	
 } // nv namespace

--- a/src/nvtt/CompressionOptions.cpp
+++ b/src/nvtt/CompressionOptions.cpp
@ -117,36 +117,8 @@ void CompressionOptions::setPixelFormat(uint bitcount, uint rmask, uint gmask, u
 	m.gmask = gmask;
 	m.bmask = bmask;
 	m.amask = amask;
-
-	m.rsize = 0;
-	m.gsize = 0;
-	m.bsize = 0;
-	m.asize = 0;
 }

-void CompressionOptions::setPixelFormat(uint8 rsize, uint8 gsize, uint8 bsize, uint8 asize)
-{
-	nvCheck(rsize <= 32 || gsize <= 32 || bsize <= 32 || asize <= 32);
-
-	m.bitcount = 0;
-	m.rmask = 0;
-	m.gmask = 0;
-	m.bmask = 0;
-	m.amask = 0;
-
-	m.rsize = rsize;
-	m.gsize = gsize;
-	m.bsize = bsize;
-	m.asize = asize;
-}
-
-/// Set pixel type.
-void CompressionOptions::setPixelType(PixelType pixelType)
-{
-	m.pixelType = pixelType;
-}
-
-
 /// Use external compressor.
 void CompressionOptions::setExternalCompressor(const char * name)
 {
--- a/src/nvtt/CompressionOptions.h
+++ b/src/nvtt/CompressionOptions.h
@ -45,12 +45,6 @@ namespace nvtt
 		uint gmask;
 		uint bmask;
 		uint amask;
-		uint8 rsize;
-		uint8 gsize;
-		uint8 bsize;
-		uint8 asize;
-		
-		PixelType pixelType;
 		
 		nv::String externalCompressor;

--- a/src/nvtt/Compressor.cpp
+++ b/src/nvtt/Compressor.cpp
@ -0,0 +1,854 @@
+// Copyright NVIDIA Corporation 2008 -- Ignacio Castano <icastano@nvidia.com>
+// 
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
+// 
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+
+#include <nvtt/nvtt.h>
+
+#include <nvcore/Memory.h>
+#include <nvcore/Ptr.h>
+
+#include <nvimage/DirectDrawSurface.h>
+#include <nvimage/ColorBlock.h>
+#include <nvimage/BlockDXT.h>
+#include <nvimage/Image.h>
+#include <nvimage/FloatImage.h>
+#include <nvimage/Filter.h>
+#include <nvimage/Quantize.h>
+#include <nvimage/NormalMap.h>
+#include <nvimage/PixelFormat.h>
+
+#include "Compressor.h"
+#include "InputOptions.h"
+#include "CompressionOptions.h"
+#include "OutputOptions.h"
+
+#include "CompressDXT.h"
+#include "CompressRGB.h"
+#include "cuda/CudaUtils.h"
+#include "cuda/CudaCompressDXT.h"
+
+
+using namespace nv;
+using namespace nvtt;
+
+
+namespace
+{
+	
+	static int blockSize(Format format)
+	{
+		if (format == Format_DXT1 || format == Format_DXT1a) {
+			return 8;
+		}
+		else if (format == Format_DXT3) {
+			return 16;
+		}
+		else if (format == Format_DXT5 || format == Format_DXT5n) {
+			return 16;
+		}
+		else if (format == Format_BC4) {
+			return 8;
+		}
+		else if (format == Format_BC5) {
+			return 16;
+		}
+		return 0;
+	}
+
+	inline uint computePitch(uint w, uint bitsize)
+	{
+		uint p = w * ((bitsize + 7) / 8);
+
+		// Align to 32 bits.
+		return ((p + 3) / 4) * 4;
+	}
+
+	static int computeImageSize(uint w, uint h, uint d, uint bitCount, Format format)
+	{
+		if (format == Format_RGBA) {
+			return d * h * computePitch(w, bitCount);
+		}
+		else {
+			// @@ Handle 3D textures. DXT and VTC have different behaviors.
+			return ((w + 3) / 4) * ((h + 3) / 4) * blockSize(format);
+		}
+	}
+
+} // namespace
+
+namespace nvtt
+{
+	// Mipmap could be:
+	// - a pointer to an input image.
+	// - a fixed point image.
+	// - a floating point image.
+	struct Mipmap
+	{
+		Mipmap() : m_inputImage(NULL) {}
+		~Mipmap() {}
+
+		// Reference input image.
+		void setFromInput(const InputOptions::Private & inputOptions, uint idx)
+		{
+			m_inputImage = inputOptions.image(idx);
+			m_fixedImage = NULL;
+			m_floatImage = NULL;
+		}
+
+		// Assign and take ownership of given image.
+		void setImage(FloatImage * image)
+		{
+			m_inputImage = NULL;
+			m_fixedImage = NULL;
+			m_floatImage = image;
+		}
+		
+
+		// Convert linear float image to fixed image ready for compression.
+		void toFixedImage(const InputOptions::Private & inputOptions)
+		{
+			if (this->asFixedImage() == NULL)
+			{
+				nvDebugCheck(m_floatImage != NULL);
+
+				if (inputOptions.isNormalMap || inputOptions.outputGamma == 1.0f)
+				{
+					m_fixedImage = m_floatImage->createImage();
+				}
+				else
+				{
+					m_fixedImage = m_floatImage->createImageGammaCorrect(inputOptions.outputGamma);
+				}
+			}
+		}
+
+		// Convert input image to linear float image.
+		void toFloatImage(const InputOptions::Private & inputOptions)
+		{
+			if (m_floatImage == NULL)
+			{
+				nvDebugCheck(this->asFixedImage() != NULL);
+
+				m_floatImage = new FloatImage(this->asFixedImage());
+
+				if (inputOptions.isNormalMap)
+				{
+					// Expand normals to [-1, 1] range.
+				//	floatImage->expandNormals(0);
+				}
+				else if (inputOptions.inputGamma != 1.0f)
+				{
+					// Convert to linear space.
+					m_floatImage->toLinear(0, 3, inputOptions.inputGamma);
+				}
+			}
+		}
+
+		const FloatImage * asFloatImage() const
+		{
+			return m_floatImage.ptr();
+		}
+
+		FloatImage * asFloatImage()
+		{
+			return m_floatImage.ptr();
+		}
+
+		const Image * asFixedImage() const
+		{
+			if (m_inputImage != NULL) 
+			{
+				return m_inputImage;
+			}
+			return m_fixedImage.ptr();
+		}
+
+		Image * asMutableFixedImage()
+		{
+			if (m_inputImage != NULL)
+			{
+				// Do not modify input image, create a copy.
+				m_fixedImage = new Image(*m_inputImage);
+				m_inputImage = NULL;
+			}
+			return m_fixedImage.ptr();
+		}
+
+		
+	private:
+		const Image * m_inputImage;
+		AutoPtr<Image> m_fixedImage;
+		AutoPtr<FloatImage> m_floatImage;
+	};
+
+} // nvtt namespace
+
+
+Compressor::Compressor() : m(*new Compressor::Private())
+{
+	// CUDA initialization.
+	m.cudaSupported = cuda::isHardwarePresent();
+	m.cudaEnabled = m.cudaSupported;
+
+	if (m.cudaEnabled)
+	{
+		// Select fastest CUDA device.
+		int device = cuda::getFastestDevice();
+		cuda::setDevice(device);
+		
+		m.cuda = new CudaCompressor();
+
+		if (!m.cuda->isValid())
+		{
+			m.cudaEnabled = false;
+			m.cuda = NULL;
+		}
+	}
+}
+
+Compressor::~Compressor()
+{
+	delete &m;
+	cuda::exit();
+}
+
+
+/// Enable CUDA acceleration.
+void Compressor::enableCudaAcceleration(bool enable)
+{
+	if (m.cudaSupported)
+	{
+		m.cudaEnabled = enable;
+	}
+
+	if (m.cudaEnabled && m.cuda == NULL)
+	{
+		// Select fastest CUDA device.
+		int device = cuda::getFastestDevice();
+		cuda::setDevice(device);
+		
+		m.cuda = new CudaCompressor();
+		
+		if (!m.cuda->isValid())
+		{
+			m.cudaEnabled = false;
+			m.cuda = NULL;
+		}
+	}
+}
+
+/// Check if CUDA acceleration is enabled.
+bool Compressor::isCudaAccelerationEnabled() const
+{
+	return m.cudaEnabled;
+}
+
+
+/// Compress the input texture with the given compression options.
+bool Compressor::process(const InputOptions & inputOptions, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const
+{
+	return m.compress(inputOptions.m, compressionOptions.m, outputOptions.m);
+}
+
+
+/// Estimate the size of compressing the input with the given options.
+int Compressor::estimateSize(const InputOptions & inputOptions, const CompressionOptions & compressionOptions) const
+{
+	return m.estimateSize(inputOptions.m, compressionOptions.m);
+}
+
+
+
+
+bool Compressor::Private::compress(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
+{
+	// Make sure enums match.
+	nvStaticCheck(FloatImage::WrapMode_Clamp == (FloatImage::WrapMode)WrapMode_Clamp);
+	nvStaticCheck(FloatImage::WrapMode_Mirror == (FloatImage::WrapMode)WrapMode_Mirror);
+	nvStaticCheck(FloatImage::WrapMode_Repeat == (FloatImage::WrapMode)WrapMode_Repeat);
+
+	// Get output handler.
+	if (!outputOptions.openFile())
+	{
+		if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_FileOpen);
+		return false;
+	}
+	
+	inputOptions.computeTargetExtents();
+	
+	// Output DDS header.
+	if (!outputHeader(inputOptions, compressionOptions, outputOptions))
+	{
+		return false;
+	}
+
+	for (uint f = 0; f < inputOptions.faceCount; f++)
+	{
+		if (!compressMipmaps(f, inputOptions, compressionOptions, outputOptions))
+		{
+			return false;
+		}
+	}
+
+	outputOptions.closeFile();
+	
+	return true;
+}
+
+
+// Output DDS header.
+bool Compressor::Private::outputHeader(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
+{
+	// Output DDS header.
+	if (outputOptions.outputHandler == NULL || !outputOptions.outputHeader)
+	{
+		return true;
+	}
+
+	DDSHeader header;
+	
+	header.setWidth(inputOptions.targetWidth);
+	header.setHeight(inputOptions.targetHeight);
+	
+	int mipmapCount = inputOptions.realMipmapCount();
+	nvDebugCheck(mipmapCount > 0);
+	
+	header.setMipmapCount(mipmapCount);
+	
+	if (inputOptions.textureType == TextureType_2D) {
+		header.setTexture2D();
+	}
+	else if (inputOptions.textureType == TextureType_Cube) {
+		header.setTextureCube();
+	}		
+	/*else if (inputOptions.textureType == TextureType_3D) {
+		header.setTexture3D();
+		header.setDepth(inputOptions.targetDepth);
+	}*/
+	
+	if (compressionOptions.format == Format_RGBA)
+	{
+		header.setPitch(computePitch(inputOptions.targetWidth, compressionOptions.bitcount));
+		header.setPixelFormat(compressionOptions.bitcount, compressionOptions.rmask, compressionOptions.gmask, compressionOptions.bmask, compressionOptions.amask);
+	}
+	else
+	{
+		header.setLinearSize(computeImageSize(inputOptions.targetWidth, inputOptions.targetHeight, inputOptions.targetDepth, compressionOptions.bitcount, compressionOptions.format));
+		
+		if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a) {
+			header.setFourCC('D', 'X', 'T', '1');
+			if (inputOptions.isNormalMap) header.setNormalFlag(true);
+		}
+		else if (compressionOptions.format == Format_DXT3) {
+			header.setFourCC('D', 'X', 'T', '3');
+		}
+		else if (compressionOptions.format == Format_DXT5) {
+			header.setFourCC('D', 'X', 'T', '5');
+		}
+		else if (compressionOptions.format == Format_DXT5n) {
+			header.setFourCC('D', 'X', 'T', '5');
+			if (inputOptions.isNormalMap) header.setNormalFlag(true);
+		}
+		else if (compressionOptions.format == Format_BC4) {
+			header.setFourCC('A', 'T', 'I', '1');
+		}
+		else if (compressionOptions.format == Format_BC5) {
+			header.setFourCC('A', 'T', 'I', '2');
+			if (inputOptions.isNormalMap) header.setNormalFlag(true);
+		}
+	}
+	
+	// Swap bytes if necessary.
+	header.swapBytes();
+	
+	uint headerSize = 128;
+	if (header.hasDX10Header())
+	{
+		nvStaticCheck(sizeof(DDSHeader) == 128 + 20);
+		headerSize = 128 + 20;
+	}
+
+	bool writeSucceed = outputOptions.outputHandler->writeData(&header, headerSize);
+	if (!writeSucceed && outputOptions.errorHandler != NULL)
+	{
+		outputOptions.errorHandler->error(Error_FileWrite);
+	}
+	
+	return writeSucceed;
+}
+
+
+bool Compressor::Private::compressMipmaps(uint f, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
+{
+	uint w = inputOptions.targetWidth;
+	uint h = inputOptions.targetHeight;
+	uint d = inputOptions.targetDepth;
+
+	Mipmap mipmap;
+
+	const uint mipmapCount = inputOptions.realMipmapCount();
+	nvDebugCheck(mipmapCount > 0);
+
+	for (uint m = 0; m < mipmapCount; m++)
+	{
+		if (outputOptions.outputHandler)
+		{
+			int size = computeImageSize(w, h, d, compressionOptions.bitcount, compressionOptions.format);
+			outputOptions.outputHandler->beginImage(size, w, h, d, f, m);
+		}
+
+		// @@ Where to do the color transform?
+		// - Color transform may not be linear, so we cannot do before computing mipmaps.
+		// - Should be done in linear space, that is, after gamma correction.
+
+		if (!initMipmap(mipmap, inputOptions, w, h, d, f, m))
+		{
+			if (outputOptions.errorHandler != NULL)
+			{
+				outputOptions.errorHandler->error(Error_InvalidInput);
+				return false;
+			}
+		}
+		
+		quantizeMipmap(mipmap, compressionOptions);
+
+		compressMipmap(mipmap, inputOptions, compressionOptions, outputOptions);
+
+		// Compute extents of next mipmap:
+		w = max(1U, w / 2);
+		h = max(1U, h / 2);
+		d = max(1U, d / 2);
+	}
+	
+	return true;
+}
+
+bool Compressor::Private::initMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f, uint m) const
+{
+	// Find image from input.
+	int inputIdx = findExactMipmap(inputOptions, w, h, d, f);
+
+	if ((inputIdx == -1 || inputOptions.convertToNormalMap) && m != 0)
+	{
+		// Generate from last, when mipmap not found, or normal map conversion enabled.
+		downsampleMipmap(mipmap, inputOptions);
+	}
+	else
+	{
+		if (inputIdx != -1)
+		{
+			// If input mipmap found, then get from input.
+			mipmap.setFromInput(inputOptions, inputIdx);
+		}
+		else
+		{
+			// If not found, resize closest mipmap.
+			inputIdx = findClosestMipmap(inputOptions, w, h, d, f);
+
+			if (inputIdx == -1)
+			{
+				return false;
+			}
+
+			mipmap.setFromInput(inputOptions, inputIdx);
+
+			scaleMipmap(mipmap, inputOptions, w, h, d);
+		}
+
+		processInputImage(mipmap, inputOptions);
+	}
+
+	// Convert linear float image to fixed image ready for compression.
+	mipmap.toFixedImage(inputOptions);
+
+	return true;
+}
+
+int Compressor::Private::findExactMipmap(const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f) const
+{
+	for (int m = 0; m < int(inputOptions.mipmapCount); m++)
+	{
+		int idx = f * inputOptions.mipmapCount + m;
+		const InputOptions::Private::InputImage & inputImage = inputOptions.images[idx];
+		
+		if (inputImage.width == int(w) && inputImage.height == int(h) && inputImage.depth == int(d))
+		{
+			if (inputImage.data != NULL)
+			{
+				return idx;
+			}
+			return -1;
+		}
+		else if (inputImage.width < int(w) || inputImage.height < int(h) || inputImage.depth < int(d))
+		{
+			return -1;
+		}
+	}
+
+	return -1;
+}
+
+int Compressor::Private::findClosestMipmap(const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f) const
+{
+	int bestIdx = -1;
+
+	for (int m = 0; m < int(inputOptions.mipmapCount); m++)
+	{
+		int idx = f * inputOptions.mipmapCount + m;
+		const InputOptions::Private::InputImage & inputImage = inputOptions.images[idx];
+
+		if (inputImage.data != NULL)
+		{
+			int difference = (inputImage.width - w) + (inputImage.height - h) + (inputImage.depth - d);
+
+			if (difference < 0)
+			{
+				if (bestIdx == -1)
+				{
+					bestIdx = idx;
+				}
+
+				return bestIdx;
+			}
+
+			bestIdx = idx;
+		}
+	}
+
+	return bestIdx;
+}
+
+// Create mipmap from the given image.
+void Compressor::Private::downsampleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const
+{
+	// Make sure that floating point linear representation is available.
+	mipmap.toFloatImage(inputOptions);
+
+	const FloatImage * floatImage = mipmap.asFloatImage();
+	
+	if (inputOptions.mipmapFilter == MipmapFilter_Box)
+	{
+		// Use fast downsample.
+		mipmap.setImage(floatImage->fastDownSample());
+	}
+	else if (inputOptions.mipmapFilter == MipmapFilter_Triangle)
+	{
+		TriangleFilter filter;
+		mipmap.setImage(floatImage->downSample(filter, (FloatImage::WrapMode)inputOptions.wrapMode));
+	}
+	else /*if (inputOptions.mipmapFilter == MipmapFilter_Kaiser)*/
+	{
+		nvDebugCheck(inputOptions.mipmapFilter == MipmapFilter_Kaiser);
+		KaiserFilter filter(inputOptions.kaiserWidth);
+		filter.setParameters(inputOptions.kaiserAlpha, inputOptions.kaiserStretch);
+		mipmap.setImage(floatImage->downSample(filter, (FloatImage::WrapMode)inputOptions.wrapMode));
+	}
+	
+	// Normalize mipmap.
+	if ((inputOptions.isNormalMap || inputOptions.convertToNormalMap) && inputOptions.normalizeMipmaps)
+	{
+		normalizeNormalMap(mipmap.asFloatImage());
+	}
+}
+
+
+void Compressor::Private::scaleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d) const
+{
+	mipmap.toFloatImage(inputOptions);
+
+	// @@ Add more filters.
+	// @@ Select different filters for downscaling and reconstruction.
+
+	// Resize image. 
+	BoxFilter boxFilter;
+	mipmap.setImage(mipmap.asFloatImage()->resize(boxFilter, w, h, (FloatImage::WrapMode)inputOptions.wrapMode));
+}
+
+
+// Process an input image: Convert to normal map, normalize, or convert to linear space.
+void Compressor::Private::processInputImage(Mipmap & mipmap, const InputOptions::Private & inputOptions) const
+{
+	if (inputOptions.convertToNormalMap)
+	{
+		mipmap.toFixedImage(inputOptions);
+		
+		Vector4 heightScale = inputOptions.heightFactors;
+		mipmap.setImage(createNormalMap(mipmap.asFixedImage(), (FloatImage::WrapMode)inputOptions.wrapMode, heightScale, inputOptions.bumpFrequencyScale));
+	}
+	else if (inputOptions.isNormalMap)
+	{
+		if (inputOptions.normalizeMipmaps)
+		{
+			// If floating point image available, normalize in place.
+			if (mipmap.asFloatImage() == NULL)
+			{
+				FloatImage * floatImage = new FloatImage(mipmap.asFixedImage());
+				normalizeNormalMap(floatImage);
+				mipmap.setImage(floatImage);
+			}
+			else
+			{
+				normalizeNormalMap(mipmap.asFloatImage());
+				mipmap.setImage(mipmap.asFloatImage());
+			}
+		}
+	}
+	else
+	{
+		if (inputOptions.inputGamma != inputOptions.outputGamma)
+		{
+			mipmap.toFloatImage(inputOptions);
+		}
+	}
+}
+
+
+// Quantize the given mipmap according to the compression options.
+void Compressor::Private::quantizeMipmap(Mipmap & mipmap, const CompressionOptions::Private & compressionOptions) const
+{
+	nvDebugCheck(mipmap.asFixedImage() != NULL);
+
+	if (compressionOptions.binaryAlpha)
+	{
+		if (compressionOptions.enableAlphaDithering)
+		{
+			Quantize::FloydSteinberg_BinaryAlpha(mipmap.asMutableFixedImage(), compressionOptions.alphaThreshold);
+		}
+		else
+		{
+			Quantize::BinaryAlpha(mipmap.asMutableFixedImage(), compressionOptions.alphaThreshold);
+		}
+	}
+
+	if (compressionOptions.enableColorDithering || compressionOptions.enableAlphaDithering)
+	{
+		uint rsize = 8;
+		uint gsize = 8;
+		uint bsize = 8;
+		uint asize = 8;
+
+		if (compressionOptions.enableColorDithering)
+		{
+			if (compressionOptions.format >= Format_DXT1 && compressionOptions.format <= Format_DXT5)
+			{
+				rsize = 5;
+				gsize = 6;
+				bsize = 5;
+			}
+			else if (compressionOptions.format == Format_RGB)
+			{
+				uint rshift, gshift, bshift;
+				PixelFormat::maskShiftAndSize(compressionOptions.rmask, &rshift, &rsize);
+				PixelFormat::maskShiftAndSize(compressionOptions.gmask, &gshift, &gsize);
+				PixelFormat::maskShiftAndSize(compressionOptions.bmask, &bshift, &bsize);
+			}
+		}
+
+		if (compressionOptions.enableAlphaDithering)
+		{
+			if (compressionOptions.format == Format_DXT3)
+			{
+				asize = 4;
+			}
+			else if (compressionOptions.format == Format_RGB)
+			{
+				uint ashift;
+				PixelFormat::maskShiftAndSize(compressionOptions.amask, &ashift, &asize);
+			}
+		}
+
+		if (compressionOptions.binaryAlpha)
+		{
+			asize = 8; // Already quantized.
+		}
+
+		Quantize::FloydSteinberg(mipmap.asMutableFixedImage(), rsize, gsize, bsize, asize);
+	}
+}
+
+
+// Compress the given mipmap.
+bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
+{
+	const Image * image = mipmap.asFixedImage();
+	nvDebugCheck(image != NULL);
+
+	FastCompressor fast;
+	fast.setImage(image, inputOptions.alphaMode);
+
+	SlowCompressor slow;
+	slow.setImage(image, inputOptions.alphaMode);
+
+	const bool useCuda = cudaEnabled && image->width() * image->height() >= 512;
+
+	if (compressionOptions.format == Format_RGBA || compressionOptions.format == Format_RGB)
+	{
+		compressRGB(image, outputOptions, compressionOptions);
+	}
+	else if (compressionOptions.format == Format_DXT1)
+	{
+#if defined(HAVE_S3QUANT)
+		if (compressionOptions.externalCompressor == "s3")
+		{
+			s3CompressDXT1(image, outputOptions);
+		}
+		else
+#endif
+
+#if defined(HAVE_ATITC)
+		if (compressionOptions.externalCompressor == "ati")
+		{
+			atiCompressDXT1(image, outputOptions);
+		}
+		else
+#endif
+		if (compressionOptions.quality == Quality_Fastest)
+		{
+			fast.compressDXT1(outputOptions);
+		}
+		else
+		{
+			if (useCuda)
+			{
+				nvDebugCheck(cudaSupported);
+				cuda->setImage(image, inputOptions.alphaMode);
+				cuda->compressDXT1(compressionOptions, outputOptions);
+			}
+			else
+			{
+				slow.compressDXT1(compressionOptions, outputOptions);
+			}
+		}
+	}
+	else if (compressionOptions.format == Format_DXT1a)
+	{
+		if (compressionOptions.quality == Quality_Fastest)
+		{
+			fast.compressDXT1a(outputOptions);
+		}
+		else
+		{
+			if (useCuda)
+			{
+				nvDebugCheck(cudaSupported);
+				/*cuda*/slow.compressDXT1a(compressionOptions, outputOptions);
+			}
+			else
+			{
+				slow.compressDXT1a(compressionOptions, outputOptions);
+			}
+		}
+	}
+	else if (compressionOptions.format == Format_DXT3)
+	{
+		if (compressionOptions.quality == Quality_Fastest)
+		{
+			fast.compressDXT3(outputOptions);
+		}
+		else
+		{
+			if (useCuda)
+			{
+				nvDebugCheck(cudaSupported);
+				cuda->setImage(image, inputOptions.alphaMode);
+				cuda->compressDXT3(compressionOptions, outputOptions);
+			}
+			else
+			{
+				slow.compressDXT3(compressionOptions, outputOptions);
+			}
+		}
+	}
+	else if (compressionOptions.format == Format_DXT5)
+	{
+		if (compressionOptions.quality == Quality_Fastest)
+		{
+			fast.compressDXT5(outputOptions);
+		}
+		else
+		{
+			if (useCuda)
+			{
+				nvDebugCheck(cudaSupported);
+				cuda->setImage(image, inputOptions.alphaMode);
+				cuda->compressDXT5(compressionOptions, outputOptions);
+			}
+			else
+			{
+				slow.compressDXT5(compressionOptions, outputOptions);
+			}
+		}
+	}
+	else if (compressionOptions.format == Format_DXT5n)
+	{
+		if (compressionOptions.quality == Quality_Fastest)
+		{
+			fast.compressDXT5n(outputOptions);
+		}
+		else
+		{
+			slow.compressDXT5n(compressionOptions, outputOptions);
+		}
+	}
+	else if (compressionOptions.format == Format_BC4)
+	{
+		slow.compressBC4(compressionOptions, outputOptions);
+	}
+	else if (compressionOptions.format == Format_BC5)
+	{
+		slow.compressBC5(compressionOptions, outputOptions);
+	}
+
+	return true;
+}
+
+
+int Compressor::Private::estimateSize(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions) const
+{
+	const Format format = compressionOptions.format;
+	const uint bitCount = compressionOptions.bitcount;
+
+	inputOptions.computeTargetExtents();
+	
+	uint mipmapCount = inputOptions.realMipmapCount();
+	
+	int size = 0;
+	
+	for (uint f = 0; f < inputOptions.faceCount; f++)
+	{
+		uint w = inputOptions.targetWidth;
+		uint h = inputOptions.targetHeight;
+		uint d = inputOptions.targetDepth;
+		
+		for (uint m = 0; m < mipmapCount; m++)
+		{
+			size += computeImageSize(w, h, d, bitCount, format);
+			
+			// Compute extents of next mipmap:
+			w = max(1U, w / 2);
+			h = max(1U, h / 2);
+			d = max(1U, d / 2);
+		}
+	}
+	
+	return size;
+}
--- a/src/nvtt/Compressor.h
+++ b/src/nvtt/Compressor.h
@ -58,7 +58,6 @@ namespace nvtt

 		void downsampleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
 		void scaleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d) const;
-		void premultiplyAlphaMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
 		void processInputImage(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
 		void quantizeMipmap(Mipmap & mipmap, const CompressionOptions::Private & compressionOptions) const;
 		bool compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
--- a/src/nvtt/Context.cpp
+++ b/src/nvtt/Context.cpp
--- a/src/nvtt/InputOptions.cpp
+++ b/src/nvtt/InputOptions.cpp
@ -23,11 +23,8 @@

 #include <string.h> // memcpy

-#include <nvcore/Containers.h> // nextPowerOfTwo
 #include <nvcore/Memory.h>

-#include <nvmath/Color.h>
-
 #include "nvtt.h"
 #include "InputOptions.h"

@ -104,8 +101,6 @@ void InputOptions::reset()
 	
 	m.colorTransform = ColorTransform_None;
 	m.linearTransform = Matrix(identity);
-	for (int i = 0; i < 4; i++) m.colorOffsets[i] = 0;
-	for (int i = 0; i < 4; i++) m.swizzleTransform[i] = i;

 	m.generateMipmaps = true;
 	m.maxLevel = -1;
@ -123,8 +118,6 @@ void InputOptions::reset()
 	
 	m.maxExtent = 0;
 	m.roundMode = RoundMode_None;
-
-	m.premultiplyAlpha = false;
 }


@ -168,8 +161,7 @@ void InputOptions::setTextureLayout(TextureType type, int width, int height, int
 			img.mipLevel = mipLevel;
 			img.face = f;
 			
-			img.uint8data = NULL;
-			img.floatdata = NULL;
+			img.data = NULL;
 			
 			w = max(1U, w / 2);
 			h = max(1U, h / 2);
@ -207,116 +199,14 @@ bool InputOptions::setMipmapData(const void * data, int width, int height, int d
 		return false;
 	}
 	
-	switch(m.inputFormat)
-	{
-		case InputFormat_BGRA_8UB:
-			if (Image * image = new nv::Image())
-			{
-				image->allocate(width, height);
-				memcpy(image->pixels(), data, width * height * 4);
-				m.images[idx].uint8data = image;
-			}
-			else
-			{
-				// @@ Out of memory error.
-				return false;
-			}
-			break;
-		case InputFormat_RGBA_32F:
-			if (FloatImage * image = new nv::FloatImage())
-			{
-				const float * floatData = (const float *)data;
-				image->allocate(4, width, height);
-				
-				for (int c = 0; c < 4; c++)
-				{
-					float * channel = image->channel(c);
-					for (int i = 0; i < width * height; i++)
-					{
-						channel[i] = floatData[i*4 + c];
-					}
-				}
-				
-				m.images[idx].floatdata = image;
-			}
-			else
-			{
-				// @@ Out of memory error.
-				return false;
-			}
-			break;
-		default:
-			return false;
-	}
+	m.images[idx].data = new nv::Image();
+	m.images[idx].data->allocate(width, height);
+	memcpy(m.images[idx].data->pixels(), data, width * height * 4); 
 	
 	return true;
 }


-// Copies data 
-bool InputOptions::setMipmapChannelData(const void * data, int channel, int width, int height, int depth /*= 1*/, int face /*= 0*/, int mipLevel /*= 0*/)
-{
-	nvCheck(depth == 1);
-	nvCheck(channel >= 0 && channel < 4);
-	
-	const int idx = face * m.mipmapCount + mipLevel;
-	
-	if (m.images[idx].width != width || m.images[idx].height != height || m.images[idx].depth != depth || m.images[idx].mipLevel != mipLevel || m.images[idx].face != face)
-	{
-		// Invalid dimension or index.
-		return false;
-	}
-	
-	// Allocate image if not allocated already.
-	if (m.inputFormat == InputFormat_BGRA_8UB)
-	{
-		m.images[idx].floatdata = NULL;
-		if (m.images[idx].uint8data == NULL)
-		{
-			m.images[idx].uint8data = new Image();
-			m.images[idx].uint8data->allocate(width, height);
-			m.images[idx].uint8data->fill(Color32(0,0,0,0));
-		}
-	}
-	else if (m.inputFormat == InputFormat_RGBA_32F)
-	{
-		m.images[idx].uint8data = NULL;
-		if (m.images[idx].floatdata == NULL)
-		{
-			m.images[idx].floatdata = new FloatImage();
-			m.images[idx].floatdata->allocate(4, width, height);
-			m.images[idx].floatdata->clear();
-		}
-
-		
-	}
-	else
-	{
-		m.images[idx].floatdata = NULL;
-		m.images[idx].uint8data = NULL;
-		return false;
-	}
-
-	// Copy channel data to image.
-	if (m.inputFormat == InputFormat_BGRA_8UB)
-	{
-		// @@ TODO
-	}
-	else if (m.inputFormat == InputFormat_RGBA_32F)
-	{
-		const float * floatData = (const float *)data;
-		float * channelPtr = m.images[idx].floatdata->channel(channel);
-
-		for (int i = 0; i < width * height; i++)
-		{
-			channelPtr[i] = floatData[i];
-		}
-	}
-
-	return true;
-}
-
-
 /// Describe the format of the input.
 void InputOptions::setFormat(InputFormat format)
 {
@ -411,32 +301,8 @@ void InputOptions::setLinearTransform(int channel, float w0, float w1, float w2,
 {
 	nvCheck(channel >= 0 && channel < 4);

-	m.linearTransform(channel, 0) = w0;
-	m.linearTransform(channel, 1) = w1;
-	m.linearTransform(channel, 2) = w2;
-	m.linearTransform(channel, 3) = w3;
-}
-
-void InputOptions::setLinearTransform(int channel, float w0, float w1, float w2, float w3, float offset)
-{
-	nvCheck(channel >= 0 && channel < 4);
-
-	setLinearTransform(channel, w0, w1, w2, w3);
-
-	m.colorOffsets[channel] = offset;
-}
-
-void InputOptions::setSwizzleTransform(int x, int y, int z, int w)
-{
-	nvCheck(x >= 0 && x <= 6);
-	nvCheck(y >= 0 && y <= 6);
-	nvCheck(z >= 0 && z <= 6);
-	nvCheck(w >= 0 && w <= 6);
-	
-	m.swizzleTransform[0] = x;
-	m.swizzleTransform[1] = y;
-	m.swizzleTransform[2] = z;
-	m.swizzleTransform[3] = w;
+	Vector4 w(w0, w1, w2, w3);
+	//m.linearTransform.setRow(channel, w);
 }

 void InputOptions::setMaxExtents(int e)
@ -450,10 +316,6 @@ void InputOptions::setRoundMode(RoundMode mode)
 	m.roundMode = mode;
 }

-void InputOptions::setPremultiplyAlpha(bool b)
-{
-	m.premultiplyAlpha = b;
-}

 void InputOptions::Private::computeTargetExtents() const
 {
@ -533,7 +395,7 @@ const Image * InputOptions::Private::image(uint face, uint mipmap) const
 	nvDebugCheck(image.face == face);
 	nvDebugCheck(image.mipLevel == mipmap);

-	return image.uint8data.ptr();
+	return image.data.ptr();
 }

 const Image * InputOptions::Private::image(uint idx) const
@ -542,14 +404,5 @@ const Image * InputOptions::Private::image(uint idx) const

 	const InputImage & image = this->images[idx];

-	return image.uint8data.ptr();
-}
-
-const FloatImage * InputOptions::Private::floatImage(uint idx) const
-{
-	nvDebugCheck(idx < faceCount * mipmapCount);
-
-	const InputImage & image = this->images[idx];
-
-	return image.floatdata.ptr();
+	return image.data.ptr();
 }
--- a/src/nvtt/InputOptions.h
+++ b/src/nvtt/InputOptions.h
@ -28,7 +28,6 @@
 #include <nvmath/Vector.h>
 #include <nvmath/Matrix.h>
 #include <nvimage/Image.h>
-#include <nvimage/FloatImage.h>
 #include "nvtt.h"

 namespace nvtt
@ -57,8 +56,6 @@ namespace nvtt
 		// Color transform.
 		ColorTransform colorTransform;
 		nv::Matrix linearTransform;
-		float colorOffsets[4];
-		uint swizzleTransform[4];
 		
 		// Mipmap generation options.
 		bool generateMipmaps;
@ -81,8 +78,6 @@ namespace nvtt
 		uint maxExtent;
 		RoundMode roundMode;
 		
-		bool premultiplyAlpha;
-
 		// @@ These are computed in nvtt::compress, so they should be mutable or stored elsewhere...
 		mutable uint targetWidth;
 		mutable uint targetHeight;
@ -94,9 +89,7 @@ namespace nvtt
 		int realMipmapCount() const;
 		
 		const nv::Image * image(uint face, uint mipmap) const;
-		const nv::Image * image(uint idx) const;
-
-		const nv::FloatImage * floatImage(uint idx) const;
+		const nv::Image * image(uint idx) const;

 	};

@ -105,8 +98,6 @@ namespace nvtt
 	{
 		InputImage() {}
 		
-		bool hasValidData() const { return uint8data != NULL || floatdata != NULL; }
-		
 		int mipLevel;
 		int face;
 		
@ -114,8 +105,7 @@ namespace nvtt
 		int height;
 		int depth;
 		
-		nv::AutoPtr<nv::Image> uint8data;
-		nv::AutoPtr<nv::FloatImage> floatdata;
+		nv::AutoPtr<nv::Image> data;
 	};

 } // nvtt namespace
--- a/src/nvtt/OptimalCompressDXT.cpp
+++ b/src/nvtt/OptimalCompressDXT.cpp
@ -21,17 +21,16 @@
 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 // OTHER DEALINGS IN THE SOFTWARE.

-#include "OptimalCompressDXT.h"
-#include "SingleColorLookup.h"
+#include <nvcore/Containers.h> // swap
+
+#include <nvmath/Color.h>

 #include <nvimage/ColorBlock.h>
 #include <nvimage/BlockDXT.h>

-#include <nvmath/Color.h>
+#include "OptimalCompressDXT.h"
+#include "SingleColorLookup.h"

-#include <nvcore/Containers.h> // swap
-
-#include <limits.h>

 using namespace nv;
 using namespace OptimalCompress;
@ -40,37 +39,10 @@ using namespace OptimalCompress;

 namespace
 {
-	static int greenDistance(int g0, int g1)
-	{
-		//return abs(g0 - g1);
-		int d = g0 - g1;
-		return d * d;
-	}
-
-	static int alphaDistance(int a0, int a1)
-	{
-		//return abs(a0 - a1);
-		int d = a0 - a1;
-		return d * d;
-	}
-
-	static uint nearestGreen4(uint green, uint maxGreen, uint minGreen)
-	{
-		uint bias = maxGreen + (maxGreen - minGreen) / 6;
-
-		uint index = 0;
-		if (maxGreen - minGreen != 0) index = clamp(3 * (bias - green) / (maxGreen - minGreen), 0U, 3U);
-
-		return (index * minGreen + (3 - index) * maxGreen) / 3;
-	}
-
-	static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block, int bestError = INT_MAX)
+	static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block)
 	{
 		nvDebugCheck(block != NULL);

-	//	uint g0 = (block->col0.g << 2) | (block->col0.g >> 4);
-	//	uint g1 = (block->col1.g << 2) | (block->col1.g >> 4);
-
 		int palette[4];
 		palette[0] = (block->col0.g << 2) | (block->col0.g >> 4);
 		palette[1] = (block->col1.g << 2) | (block->col1.g >> 4);
@ -78,24 +50,17 @@ namespace
 		palette[3] = (2 * palette[1] + palette[0]) / 3;

 		int totalError = 0;
+
 		for (int i = 0; i < 16; i++)
 		{
 			const int green = rgba.color(i).g;
 			
-			int error = greenDistance(green, palette[0]);
-			error = min(error, greenDistance(green, palette[1]));
-			error = min(error, greenDistance(green, palette[2]));
-			error = min(error, greenDistance(green, palette[3]));
-
+			int error = abs(green - palette[0]);
+			error = min(error, abs(green - palette[1]));
+			error = min(error, abs(green - palette[2]));
+			error = min(error, abs(green - palette[3]));
+			
 			totalError += error;
-
-		//	totalError += nearestGreen4(green, g0, g1);
-
-			if (totalError > bestError)
-			{
-				// early out
-				return totalError;
-			}
 		}

 		return totalError;
@ -113,10 +78,10 @@ namespace
 		{
 			const int color = rgba.color(i).g;
 			
-			uint d0 = greenDistance(color0, color);
-			uint d1 = greenDistance(color1, color);
-			uint d2 = greenDistance(color2, color);
-			uint d3 = greenDistance(color3, color);
+			uint d0 = abs(color0 - color);
+			uint d1 = abs(color1 - color);
+			uint d2 = abs(color2 - color);
+			uint d3 = abs(color3 - color);
 			
 			uint b0 = d0 > d3;
 			uint b1 = d1 > d2;
@ -137,78 +102,49 @@ namespace
 	// Choose quantized color that produces less error. Used by DXT3 compressor.
 	inline static uint quantize4(uint8 a)
 	{
-		int q0 = max(int(a >> 4) - 1, 0);
+		int q0 = (a >> 4) - 1;
 		int q1 = (a >> 4);
-		int q2 = min(int(a >> 4) + 1, 0xF);
+		int q2 = (a >> 4) + 1;
 		
 		q0 = (q0 << 4) | q0;
 		q1 = (q1 << 4) | q1;
 		q2 = (q2 << 4) | q2;
 		
-		int d0 = alphaDistance(q0, a);
-		int d1 = alphaDistance(q1, a);
-		int d2 = alphaDistance(q2, a);
+		int d0 = abs(q0 - a);
+		int d1 = abs(q1 - a);
+		int d2 = abs(q2 - a);

 		if (d0 < d1 && d0 < d2) return q0 >> 4;
 		if (d1 < d2) return q1 >> 4;
 		return q2 >> 4;
 	}
 	
-	static uint nearestAlpha8(uint alpha, uint maxAlpha, uint minAlpha)
-	{
-		float bias = maxAlpha + float(maxAlpha - minAlpha) / (2.0f * 7.0f);
-		float scale = 7.0f / float(maxAlpha - minAlpha);
-
-		uint index = (uint)clamp((bias - float(alpha)) * scale, 0.0f, 7.0f);
-
-		return (index * minAlpha + (7 - index) * maxAlpha) / 7;
-	}
-
-	static uint computeAlphaError8(const ColorBlock & rgba, const AlphaBlockDXT5 * block, int bestError = INT_MAX)
-	{
-		int totalError = 0;
-
-		for (uint i = 0; i < 16; i++)
-		{
-			uint8 alpha = rgba.color(i).a;
-
-			totalError += alphaDistance(alpha, nearestAlpha8(alpha, block->alpha0, block->alpha1));
-
-			if (totalError > bestError)
-			{
-				// early out
-				return totalError;
-			}
-		}
-
-		return totalError;
-	}
-
-	static uint computeAlphaError(const ColorBlock & rgba, const AlphaBlockDXT5 * block, int bestError = INT_MAX)
+	static uint computeAlphaError(const ColorBlock & rgba, const AlphaBlockDXT5 * block)
 	{
 		uint8 alphas[8];
 		block->evaluatePalette(alphas);

-		int totalError = 0;
+		uint totalError = 0;

 		for (uint i = 0; i < 16; i++)
 		{
 			uint8 alpha = rgba.color(i).a;

-			int minDist = INT_MAX;
+			uint besterror = 256*256;
+			uint best;
 			for (uint p = 0; p < 8; p++)
 			{
-				int dist = alphaDistance(alpha, alphas[p]);
-				minDist = min(dist, minDist);
+				int d = alphas[p] - alpha;
+				uint error = d * d;
+
+				if (error < besterror)
+				{
+					besterror = error;
+					best = p;
+				}
 			}

-			totalError += minDist;
-
-			if (totalError > bestError)
-			{
-				// early out
-				return totalError;
-			}
+			totalError += besterror;
 		}

 		return totalError;
@ -223,21 +159,22 @@ namespace
 		{
 			uint8 alpha = rgba.color(i).a;

-			int minDist = INT_MAX;
-			int bestIndex = 8;
-			for (uint p = 0; p < 8; p++)
+			uint besterror = 256*256;
+			uint best = 8;
+			for(uint p = 0; p < 8; p++)
 			{
-				int dist = alphaDistance(alpha, alphas[p]);
+				int d = alphas[p] - alpha;
+				uint error = d * d;

-				if (dist < minDist)
+				if (error < besterror)
 				{
-					minDist = dist;
-					bestIndex = p;
+					besterror = error;
+					best = p;
 				}
 			}
-			nvDebugCheck(bestIndex < 8);
+			nvDebugCheck(best < 8);

-			block->setIndex(i, bestIndex);
+			block->setIndex(i, best);
 		}
 	}

@ -280,23 +217,6 @@ void OptimalCompress::compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock)
 	}
 }

-void OptimalCompress::compressDXT1G(uint8 g, BlockDXT1 * dxtBlock)
-{
-	dxtBlock->col0.r = 31;
-	dxtBlock->col0.g = OMatch6[g][0];
-	dxtBlock->col0.b = 0;
-	dxtBlock->col1.r = 31;
-	dxtBlock->col1.g = OMatch6[g][1];
-	dxtBlock->col1.b = 0;
-	dxtBlock->indices = 0xaaaaaaaa;
-
-	if (dxtBlock->col0.u < dxtBlock->col1.u)
-	{
-		swap(dxtBlock->col0.u, dxtBlock->col1.u);
-		dxtBlock->indices ^= 0x55555555;
-	}
-}
-

 // Brute force green channel compressor
 void OptimalCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block)
@ -306,23 +226,12 @@ void OptimalCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block)
 	uint8 ming = 63;
 	uint8 maxg = 0;
 	
-	bool isSingleColor = true;
-	uint8 singleColor = rgba.color(0).g;
-
 	// Get min/max green.
 	for (uint i = 0; i < 16; i++)
 	{
-		uint8 green = (rgba.color(i).g + 1) >> 2;
+		uint8 green = rgba.color(i).g >> 2;
 		ming = min(ming, green);
 		maxg = max(maxg, green);
-
-		if (rgba.color(i).g != singleColor) isSingleColor = false;
-	}
-
-	if (isSingleColor)
-	{
-		compressDXT1G(singleColor, block);
-		return;
 	}

 	block->col0.r = 31;
@ -332,38 +241,36 @@ void OptimalCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block)
 	block->col0.b = 0;
 	block->col1.b = 0;

-	int bestError = computeGreenError(rgba, block);
-	int bestg0 = maxg;
-	int bestg1 = ming;
-
-	// Expand search space a bit.
-	const int greenExpand = 4;
-	ming = (ming <= greenExpand) ? 0 : ming - greenExpand;
-	maxg = (maxg >= 63-greenExpand) ? 63 : maxg + greenExpand;
-
-	for (int g0 = ming+1; g0 <= maxg; g0++)
+	if (maxg - ming > 4)
 	{
-		for (int g1 = ming; g1 < g0; g1++)
+		int besterror = computeGreenError(rgba, block);
+		int bestg0 = maxg;
+		int bestg1 = ming;
+		
+		for (int g0 = ming+5; g0 < maxg; g0++)
 		{
-			block->col0.g = g0;
-			block->col1.g = g1;
-			int error = computeGreenError(rgba, block, bestError);
-			
-			if (error < bestError)
+			for (int g1 = ming; g1 < g0-4; g1++)
 			{
-				bestError = error;
-				bestg0 = g0;
-				bestg1 = g1;
+				if ((maxg-g0) + (g1-ming) > besterror)
+					continue;
+				
+				block->col0.g = g0;
+				block->col1.g = g1;
+				int error = computeGreenError(rgba, block);
+				
+				if (error < besterror)
+				{
+					besterror = error;
+					bestg0 = g0;
+					bestg1 = g1;
+				}
 			}
 		}
+		
+		block->col0.g = bestg0;
+		block->col1.g = bestg1;
 	}
 	
-	block->col0.g = bestg0;
-	block->col1.g = bestg1;
-
-	nvDebugCheck(bestg0 == bestg1 || block->isFourColorMode());
-
-
 	Color32 palette[4];
 	block->evaluatePalette(palette);
 	block->indices = computeGreenIndices(rgba, palette);
@ -406,26 +313,42 @@ void OptimalCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dx
 	dxtBlock->alpha0 = maxa;
 	dxtBlock->alpha1 = mina;

+	/*int centroidDist = 256;
+	int centroid;
+
+	// Get the closest to the centroid.
+	for (uint i = 0; i < 16; i++)
+	{
+		uint8 alpha = rgba.color(i).a;
+		int dist = abs(alpha - (maxa + mina) / 2);
+		if (dist < centroidDist)
+		{
+			centroidDist = dist;
+			centroid = alpha;
+		}
+	}*/
+
 	if (maxa - mina > 8)
 	{
 		int besterror = computeAlphaError(rgba, dxtBlock);
 		int besta0 = maxa;
 		int besta1 = mina;

-		// Expand search space a bit.
-		const int alphaExpand = 8;
-		mina = (mina <= alphaExpand) ? 0 : mina - alphaExpand;
-		maxa = (maxa >= 255-alphaExpand) ? 255 : maxa + alphaExpand;
-
 		for (int a0 = mina+9; a0 < maxa; a0++)
 		{
 			for (int a1 = mina; a1 < a0-8; a1++)
+			//for (int a1 = mina; a1 < maxa; a1++)
 			{
-				nvDebugCheck(a0 - a1 > 8);
+				//nvCheck(abs(a1-a0) > 8);
+
+				//if (abs(a0 - a1) < 8) continue;
+				//if ((maxa-a0) + (a1-mina) + min(abs(centroid-a0), abs(centroid-a1)) > besterror)
+				if ((maxa-a0) + (a1-mina) > besterror)
+					continue;

 				dxtBlock->alpha0 = a0;
 				dxtBlock->alpha1 = a1;
-				int error = computeAlphaError(rgba, dxtBlock, besterror);
+				int error = computeAlphaError(rgba, dxtBlock);

 				if (error < besterror)
 				{
--- a/src/nvtt/OptimalCompressDXT.h
+++ b/src/nvtt/OptimalCompressDXT.h
@ -26,8 +26,6 @@

 #include <nvimage/nvimage.h>

-#include <nvmath/Color.h>
-
 namespace nv
 {
 	struct ColorBlock;
@ -41,7 +39,6 @@ namespace nv
 	{
 		void compressDXT1(Color32 rgba, BlockDXT1 * dxtBlock);
 		void compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock);
-		void compressDXT1G(uint8 g, BlockDXT1 * dxtBlock);
 		
 		void compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block);
 		void compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock);
--- a/src/nvtt/OutputOptions.cpp
+++ b/src/nvtt/OutputOptions.cpp
@ -43,7 +43,6 @@ void OutputOptions::reset()
 	m.outputHandler = NULL;
 	m.errorHandler = NULL;
 	m.outputHeader = true;
-	m.container = Container_DDS;
 }


@ -73,12 +72,6 @@ void OutputOptions::setOutputHeader(bool outputHeader)
 	m.outputHeader = outputHeader;
 }

-/// Set container.
-void OutputOptions::setContainer(Container container)
-{
-	m.container = container;
-}
-

 bool OutputOptions::Private::openFile() const
 {
--- a/src/nvtt/OutputOptions.h
+++ b/src/nvtt/OutputOptions.h
@ -64,7 +64,6 @@ namespace nvtt
 		mutable OutputHandler * outputHandler;
 		ErrorHandler * errorHandler;
 		bool outputHeader;
-		Container container;
 		
 		bool openFile() const;
 		void closeFile() const;
--- a/src/nvtt/QuickCompressDXT.cpp
+++ b/src/nvtt/QuickCompressDXT.cpp
@ -21,10 +21,7 @@
 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 // OTHER DEALINGS IN THE SOFTWARE.

-#include <nvcore/Containers.h> // swap
-
 #include <nvmath/Color.h>
-#include <nvmath/Fitting.h>

 #include <nvimage/ColorBlock.h>
 #include <nvimage/BlockDXT.h>
@ -133,7 +130,7 @@ inline static float colorDistance(Vector3::Arg c0, Vector3::Arg c1)
 	return dot(c0-c1, c0-c1);
 }

-inline static uint computeIndices4(const Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor)
+inline static uint computeIndices4(Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor)
 {
 	Vector3 palette[4];
 	palette[0] = maxColor;
@ -165,28 +162,6 @@ inline static uint computeIndices4(const Vector3 block[16], Vector3::Arg maxColo
 	return indices;
 }

-inline static float evaluatePaletteError4(const Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor)
-{
-	Vector3 palette[4];
-	palette[0] = maxColor;
-	palette[1] = minColor;
-	palette[2] = lerp(palette[0], palette[1], 1.0f / 3.0f);
-	palette[3] = lerp(palette[0], palette[1], 2.0f / 3.0f);
-	
-	float total = 0.0f;
-	for (int i = 0; i < 16; i++)
-	{
-		float d0 = colorDistance(palette[0], block[i]);
-		float d1 = colorDistance(palette[1], block[i]);
-		float d2 = colorDistance(palette[2], block[i]);
-		float d3 = colorDistance(palette[3], block[i]);
-
-		total += min(min(d0, d1), min(d2, d3));
-	}
-
-	return total;
-}
-
 inline static uint computeIndices3(const ColorBlock & rgba, Vector3::Arg maxColor, Vector3::Arg minColor)
 {
 	Vector3 palette[4];
@ -475,8 +450,7 @@ void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
 		// read block
 		Vector3 block[16];
 		extractColorBlockRGB(rgba, block);
-
-#if 1
+		
 		// find min and max colors
 		Vector3 maxColor, minColor;
 		findMinMaxColorsBox(block, 16, &maxColor, &minColor);
@ -484,31 +458,7 @@ void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
 		selectDiagonal(block, 16, &maxColor, &minColor);
 		
 		insetBBox(&maxColor, &minColor);
-#else
-		float weights[16] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
-		Vector3 cluster[4];
-		int count = Compute4Means(16, block, weights, Vector3(1, 1, 1), cluster);
-
-		Vector3 maxColor, minColor;
-		float bestError = FLT_MAX;
-
-		for (int i = 1; i < 4; i++)
-		{
-			for (int j = 0; j < i; j++)
-			{
-		        uint16 color0 = roundAndExpand(&cluster[i]);
-		        uint16 color1 = roundAndExpand(&cluster[j]);
-
-				float error = evaluatePaletteError4(block, cluster[i], cluster[j]);
-				if (error < bestError) {
-					bestError = error;
-					maxColor = cluster[i];
-					minColor = cluster[j];
-				}
-			}
-		}
-#endif
-
+		
 		uint16 color0 = roundAndExpand(&maxColor);
 		uint16 color1 = roundAndExpand(&minColor);

--- a/src/nvtt/Texture.cpp
+++ b/src/nvtt/Texture.cpp
@ -1,787 +0,0 @@
-// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
-// 
-// Permission is hereby granted, free of charge, to any person
-// obtaining a copy of this software and associated documentation
-// files (the "Software"), to deal in the Software without
-// restriction, including without limitation the rights to use,
-// copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following
-// conditions:
-// 
-// The above copyright notice and this permission notice shall be
-// included in all copies or substantial portions of the Software.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-// OTHER DEALINGS IN THE SOFTWARE.
-
-#include "Texture.h"
-
-#include <nvmath/Vector.h>
-#include <nvmath/Matrix.h>
-#include <nvmath/Color.h>
-
-#include <nvimage/Filter.h>
-#include <nvimage/ImageIO.h>
-#include <nvimage/NormalMap.h>
-
-using namespace nv;
-using namespace nvtt;
-
-namespace
-{
-	// 1 -> 1, 2 -> 2, 3 -> 2, 4 -> 4, 5 -> 4, ...
-	static uint previousPowerOfTwo(const uint v)
-	{
-		return nextPowerOfTwo(v + 1) / 2;
-	}
-
-	static uint nearestPowerOfTwo(const uint v)
-	{
-		const uint np2 = nextPowerOfTwo(v);
-		const uint pp2 = previousPowerOfTwo(v);
-
-		if (np2 - v <= v - pp2)
-		{
-			return np2;
-		}
-		else
-		{
-			return pp2;
-		}
-	}
-}
-
-
-TexImage::TexImage() : m(new TexImage::Private())
-{
-}
-
-TexImage::TexImage(const TexImage & tex) : m(tex.m)
-{
-	m->addRef();
-}
-
-TexImage::~TexImage()
-{
-	m->release();
-	m = NULL;
-}
-
-void TexImage::operator=(const TexImage & tex)
-{
-	tex.m->addRef();
-	m = tex.m;
-	m->release();
-}
-
-void TexImage::detach()
-{
-	if (m->refCount() > 1)
-	{
-		m = new TexImage::Private(*m);
-		m->addRef();
-		nvDebugCheck(m->refCount() == 1);
-	}
-}
-
-void TexImage::setTextureType(TextureType type)
-{
-	if (m->type != type)
-	{
-		detach();
-
-		m->type = type;
-
-		if (type == TextureType_2D)
-		{
-			// @@ Free images.
-			m->imageArray.resize(1, NULL);
-		}
-		else
-		{
-			nvCheck (type == TextureType_Cube);
-			m->imageArray.resize(6, NULL);
-		}
-	}
-}
-
-void TexImage::setWrapMode(WrapMode wrapMode)
-{
-	if (m->wrapMode != wrapMode)
-	{
-		detach();
-		m->wrapMode = wrapMode;
-	}
-}
-
-void TexImage::setAlphaMode(AlphaMode alphaMode)
-{
-	if (m->alphaMode != alphaMode)
-	{
-		detach();
-		m->alphaMode = alphaMode;
-	}
-}
-
-void TexImage::setNormalMap(bool isNormalMap)
-{
-	if (m->isNormalMap != isNormalMap)
-	{
-		detach();
-		m->isNormalMap = isNormalMap;
-	}
-}
-
-int TexImage::width() const
-{
-	if (m->imageArray.count() > 0)
-	{
-		return m->imageArray[0]->width();
-	}
-	return 0;
-}
-
-int TexImage::height() const
-{
-	if (m->imageArray.count() > 0)
-	{
-		return m->imageArray[0]->height();
-	}
-	return 0;
-}
-
-int TexImage::depth() const
-{
-	return 0;
-}
-
-int TexImage::faceCount() const
-{
-	return m->imageArray.count();
-}
-
-TextureType TexImage::textureType() const
-{
-	return m->type;
-}
-
-WrapMode TexImage::wrapMode() const
-{
-	return m->wrapMode;
-}
-
-AlphaMode TexImage::alphaMode() const
-{
-	return m->alphaMode;
-}
-
-bool TexImage::isNormalMap() const
-{
-	return m->isNormalMap;
-}
-
-bool TexImage::load(const char * fileName)
-{
-	// @@ Add support for DDS textures!
-
-	AutoPtr<FloatImage> img(ImageIO::loadFloat(fileName));
-
-	if (img == NULL)
-	{
-		return false;
-	}
-
-	detach();
-
-	m->imageArray.resize(1);
-	m->imageArray[0] = img.release();
-
-	return true;
-}
-
-bool TexImage::setImage2D(InputFormat format, int w, int h, int idx, const void * restrict data)
-{
-	if (idx >= m->imageArray.count())
-	{
-		return false;
-	}
-
-	FloatImage * img = m->imageArray[idx];
-	if (img->width() != w || img->height() != h)
-	{
-		return false;
-	}
-
-	detach();
-
-	const int count = w * h;
-
-	float * restrict rdst = img->channel(0);
-	float * restrict gdst = img->channel(1);
-	float * restrict bdst = img->channel(2);
-	float * restrict adst = img->channel(3);
-
-	if (format == InputFormat_BGRA_8UB)
-	{
-		const Color32 * src = (const Color32 *)data;
-
-		try {
-			for (int i = 0; i < count; i++)
-			{
-				rdst[i] = src[i].r;
-				gdst[i] = src[i].g;
-				bdst[i] = src[i].b;
-				adst[i] = src[i].a;
-			}
-		}
-		catch(...) {
-			return false;
-		}
-	}
-	else if (format == InputFormat_RGBA_32F)
-	{
-		const float * src = (const float *)data;
-
-		try {
-			for (int i = 0; i < count; i++)
-			{
-				rdst[i] = src[4 * i + 0];
-				gdst[i] = src[4 * i + 1];
-				bdst[i] = src[4 * i + 2];
-				adst[i] = src[4 * i + 3];
-			}
-		}
-		catch(...) {
-			return false;
-		}
-	}
-
-	return true;
-}
-
-bool TexImage::setImage2D(InputFormat format, int w, int h, int idx, const void * restrict r, const void * restrict g, const void * restrict b, const void * restrict a)
-{
-	if (idx >= m->imageArray.count())
-	{
-		return false;
-	}
-
-	FloatImage * img = m->imageArray[idx];
-	if (img->width() != w || img->height() != h)
-	{
-		return false;
-	}
-
-	detach();
-
-	const int count = w * h;
-
-	float * restrict rdst = img->channel(0);
-	float * restrict gdst = img->channel(1);
-	float * restrict bdst = img->channel(2);
-	float * restrict adst = img->channel(3);
-
-	if (format == InputFormat_BGRA_8UB)
-	{
-		const uint8 * restrict rsrc = (const uint8 *)r;
-		const uint8 * restrict gsrc = (const uint8 *)g;
-		const uint8 * restrict bsrc = (const uint8 *)b;
-		const uint8 * restrict asrc = (const uint8 *)a;
-
-		try {
-			for (int i = 0; i < count; i++) rdst[i] = float(rsrc[i]) / 255.0f;
-			for (int i = 0; i < count; i++) gdst[i] = float(gsrc[i]) / 255.0f;
-			for (int i = 0; i < count; i++) bdst[i] = float(bsrc[i]) / 255.0f;
-			for (int i = 0; i < count; i++) adst[i] = float(asrc[i]) / 255.0f;
-		}
-		catch(...) {
-			return false;
-		}
-	}
-	else if (format == InputFormat_RGBA_32F)
-	{
-		const float * rsrc = (const float *)r;
-		const float * gsrc = (const float *)g;
-		const float * bsrc = (const float *)b;
-		const float * asrc = (const float *)a;
-
-		try {
-			memcpy(rdst, rsrc, count * sizeof(float));
-			memcpy(gdst, gsrc, count * sizeof(float));
-			memcpy(bdst, bsrc, count * sizeof(float));
-			memcpy(adst, asrc, count * sizeof(float));
-		}
-		catch(...) {
-			return false;
-		}
-	}
-
-	return true;
-}
-
-void TexImage::resize(int w, int h, ResizeFilter filter)
-{
-	if (m->imageArray.count() > 0)
-	{
-		if (w == m->imageArray[0]->width() && h == m->imageArray[0]->height()) return;
-	}
-
-	// @@ TODO: if cubemap, make sure w == h.
-
-	detach();
-
-	FloatImage::WrapMode wrapMode = (FloatImage::WrapMode)m->wrapMode;
-
-	foreach (i, m->imageArray)
-	{
-		if (m->imageArray[i] == NULL) continue;
-
-		if (m->alphaMode == AlphaMode_Transparency)
-		{
-			if (filter == ResizeFilter_Box)
-			{
-				BoxFilter filter;
-				m->imageArray[i]->resize(filter, w, h, wrapMode, 3);
-			}
-			else if (filter == ResizeFilter_Triangle)
-			{
-				TriangleFilter filter;
-				m->imageArray[i]->resize(filter, w, h, wrapMode, 3);
-			}
-			else if (filter == ResizeFilter_Kaiser)
-			{
-				//KaiserFilter filter(inputOptions.kaiserWidth);
-				//filter.setParameters(inputOptions.kaiserAlpha, inputOptions.kaiserStretch);
-				KaiserFilter filter(3);
-				m->imageArray[i]->resize(filter, w, h, wrapMode, 3);
-			}
-			else //if (filter == ResizeFilter_Mitchell)
-			{
-				nvDebugCheck(filter == ResizeFilter_Mitchell);
-				MitchellFilter filter;
-				m->imageArray[i]->resize(filter, w, h, wrapMode, 3);
-			}
-		}
-		else
-		{
-			if (filter == ResizeFilter_Box)
-			{
-				BoxFilter filter;
-				m->imageArray[i]->resize(filter, w, h, wrapMode);
-			}
-			else if (filter == ResizeFilter_Triangle)
-			{
-				TriangleFilter filter;
-				m->imageArray[i]->resize(filter, w, h, wrapMode);
-			}
-			else if (filter == ResizeFilter_Kaiser)
-			{
-				//KaiserFilter filter(inputOptions.kaiserWidth);
-				//filter.setParameters(inputOptions.kaiserAlpha, inputOptions.kaiserStretch);
-				KaiserFilter filter(3);
-				m->imageArray[i]->resize(filter, w, h, wrapMode);
-			}
-			else //if (filter == ResizeFilter_Mitchell)
-			{
-				nvDebugCheck(filter == ResizeFilter_Mitchell);
-				MitchellFilter filter;
-				m->imageArray[i]->resize(filter, w, h, wrapMode);
-			}
-		}
-	}
-}
-
-void TexImage::resize(int maxExtent, RoundMode roundMode, ResizeFilter filter)
-{
-	if (m->imageArray.count() > 0)
-	{
-		int w = m->imageArray[0]->width();
-		int h = m->imageArray[0]->height();
-
-		nvDebugCheck(w > 0);
-		nvDebugCheck(h > 0);
-
-		if (roundMode != RoundMode_None)
-		{
-			// rounded max extent should never be higher than original max extent.
-			maxExtent = previousPowerOfTwo(maxExtent);
-		}
-
-		// Scale extents without changing aspect ratio.
-		int maxwh = max(w, h);
-		if (maxExtent != 0 && maxwh > maxExtent)
-		{
-			w = max((w * maxExtent) / maxwh, 1);
-			h = max((h * maxExtent) / maxwh, 1);
-		}
-
-		// Round to power of two.
-		if (roundMode == RoundMode_ToNextPowerOfTwo)
-		{
-			w = nextPowerOfTwo(w);
-			h = nextPowerOfTwo(h);
-		}
-		else if (roundMode == RoundMode_ToNearestPowerOfTwo)
-		{
-			w = nearestPowerOfTwo(w);
-			h = nearestPowerOfTwo(h);
-		}
-		else if (roundMode == RoundMode_ToPreviousPowerOfTwo)
-		{
-			w = previousPowerOfTwo(w);
-			h = previousPowerOfTwo(h);
-		}
-
-		resize(w, h, filter);
-	}
-}
-
-bool TexImage::buildNextMipmap(MipmapFilter filter)
-{
-	if (m->imageArray.count() > 0)
-	{
-		int w = m->imageArray[0]->width();
-		int h = m->imageArray[0]->height();
-
-		nvDebugCheck(w > 0);
-		nvDebugCheck(h > 0);
-
-        if (w == 1 && h == 1)
-        {
-            return false;
-        }
-    }
-
-    detach();
-
-	FloatImage::WrapMode wrapMode = (FloatImage::WrapMode)m->wrapMode;
-
-	foreach (i, m->imageArray)
-	{
-		if (m->imageArray[i] == NULL) continue;
-
-		if (m->alphaMode == AlphaMode_Transparency)
-		{
-			if (filter == MipmapFilter_Box)
-			{
-				BoxFilter filter;
-				m->imageArray[i]->downSample(filter, wrapMode, 3);
-			}
-			else if (filter == MipmapFilter_Triangle)
-			{
-				TriangleFilter filter;
-				m->imageArray[i]->downSample(filter, wrapMode, 3);
-			}
-			else if (filter == MipmapFilter_Kaiser)
-			{
-				nvDebugCheck(filter == MipmapFilter_Kaiser);
-				//KaiserFilter filter(inputOptions.kaiserWidth);
-				//filter.setParameters(inputOptions.kaiserAlpha, inputOptions.kaiserStretch);
-				KaiserFilter filter(3);
-				m->imageArray[i]->downSample(filter, wrapMode, 3);
-			}
-		}
-		else
-		{
-			if (filter == MipmapFilter_Box)
-			{
-				m->imageArray[i]->fastDownSample();
-			}
-			else if (filter == MipmapFilter_Triangle)
-			{
-				TriangleFilter filter;
-				m->imageArray[i]->downSample(filter, wrapMode);
-			}
-			else //if (filter == MipmapFilter_Kaiser)
-			{
-				nvDebugCheck(filter == MipmapFilter_Kaiser);
-				//KaiserFilter filter(inputOptions.kaiserWidth);
-				//filter.setParameters(inputOptions.kaiserAlpha, inputOptions.kaiserStretch);
-				KaiserFilter filter(3);
-				m->imageArray[i]->downSample(filter, wrapMode);
-			}
-		}
-	}
-
-    return true;
-}
-
-// Color transforms.
-void TexImage::toLinear(float gamma)
-{
-	if (equal(gamma, 1.0f)) return;
-
-	detach();
-
-	foreach (i, m->imageArray)
-	{
-		if (m->imageArray[i] == NULL) continue;
-
-		m->imageArray[i]->toLinear(0, 3, gamma);
-	}
-}
-
-void TexImage::toGamma(float gamma)
-{
-	if (equal(gamma, 1.0f)) return;
-
-	detach();
-
-	foreach (i, m->imageArray)
-	{
-		if (m->imageArray[i] == NULL) continue;
-
-		m->imageArray[i]->toGamma(0, 3, gamma);
-	}
-}
-
-void TexImage::transform(const float w0[4], const float w1[4], const float w2[4], const float w3[4], const float offset[4])
-{
-	detach();
-
-	Matrix xform(
-		Vector4(w0[0], w0[1], w0[2], w0[3]),
-		Vector4(w1[0], w1[1], w1[2], w1[3]),
-		Vector4(w2[0], w2[1], w2[2], w2[3]),
-		Vector4(w3[0], w3[1], w3[2], w3[3]));
-
-	Vector4 voffset(offset[0], offset[1], offset[2], offset[3]);
-
-	foreach (i, m->imageArray)
-	{
-		if (m->imageArray[i] == NULL) continue;
-
-		m->imageArray[i]->transform(0, xform, voffset);
-	}
-}
-
-void TexImage::swizzle(int r, int g, int b, int a)
-{
-	if (r == 0 && g == 1 && b == 2 && a == 3) return;
-
-	detach();
-
-	foreach (i, m->imageArray)
-	{
-		if (m->imageArray[i] == NULL) continue;
-
-		m->imageArray[i]->swizzle(0, r, g, b, a);
-	}
-}
-
-void TexImage::scaleBias(int channel, float scale, float bias)
-{
-	if (equal(scale, 1.0f) && equal(bias, 0.0f)) return;
-
-	detach();
-
-	foreach (i, m->imageArray)
-	{
-		if (m->imageArray[i] == NULL) continue;
-
-		m->imageArray[i]->scaleBias(channel, 1, scale, bias);
-	}
-}
-
-void TexImage::blend(float red, float green, float blue, float alpha, float t)
-{
-	detach();
-
-	foreach (i, m->imageArray)
-	{
-		FloatImage * img = m->imageArray[i];
-		if (img == NULL) continue;
-
-		float * restrict r = img->channel(0);
-		float * restrict g = img->channel(1);
-		float * restrict b = img->channel(2);
-		float * restrict a = img->channel(3);
-
-		const int count = img->width() * img->height();
-		for (int i = 0; i < count; i++)
-		{
-			r[i] = lerp(r[i], red, t);
-			g[i] = lerp(g[i], green, t);
-			b[i] = lerp(b[i], blue, t);
-			a[i] = lerp(a[i], alpha, t);
-		}
-	}
-}
-
-void TexImage::premultiplyAlpha()
-{
-	detach();
-
-	foreach (i, m->imageArray)
-	{
-		FloatImage * img = m->imageArray[i];
-		if (img == NULL) continue;
-
-		float * restrict r = img->channel(0);
-		float * restrict g = img->channel(1);
-		float * restrict b = img->channel(2);
-		float * restrict a = img->channel(3);
-
-		const int count = img->width() * img->height();
-		for (int i = 0; i < count; i++)
-		{
-			r[i] *= a[i];
-			g[i] *= a[i];
-			b[i] *= a[i];
-		}
-	}
-}
-
-
-void TexImage::toGreyScale(float redScale, float greenScale, float blueScale, float alphaScale)
-{
-	detach();
-
-	foreach (i, m->imageArray)
-	{
-		FloatImage * img = m->imageArray[i];
-		if (img == NULL) continue;
-
-		float sum = redScale + greenScale + blueScale + alphaScale;
-		redScale /= sum;
-		greenScale /= sum;
-		blueScale /= sum;
-		alphaScale /= sum;
-
-		float * restrict r = img->channel(0);
-		float * restrict g = img->channel(1);
-		float * restrict b = img->channel(2);
-		float * restrict a = img->channel(3);
-
-		const int count = img->width() * img->height();
-		for (int i = 0; i < count; i++)
-		{
-			float grey = r[i] * redScale + g[i] * greenScale + b[i] * blueScale + a[i] * alphaScale;
-			a[i] = b[i] = g[i] = r[i] = grey;
-		}
-	}
-}
-
-// Draw colored border.
-void TexImage::setBorder(float r, float g, float b, float a)
-{
-	detach();
-
-	foreach (i, m->imageArray)
-	{
-		FloatImage * img = m->imageArray[i];
-		if (img == NULL) continue;
-
-		const int w = img->width();
-		const int h = img->height();
-
-		for (int i = 0; i < w; i++)
-		{
-			img->setPixel(r, i, 0, 0);
-			img->setPixel(g, i, 0, 1);
-			img->setPixel(b, i, 0, 2);
-			img->setPixel(a, i, 0, 3);
-
-			img->setPixel(r, i, h-1, 0);
-			img->setPixel(g, i, h-1, 1);
-			img->setPixel(b, i, h-1, 2);
-			img->setPixel(a, i, h-1, 3);
-		}
-
-		for (int i = 0; i < h; i++)
-		{
-			img->setPixel(r, 0, i, 0);
-			img->setPixel(g, 0, i, 1);
-			img->setPixel(b, 0, i, 2);
-			img->setPixel(a, 0, i, 3);
-
-			img->setPixel(r, w-1, i, 0);
-			img->setPixel(g, w-1, i, 1);
-			img->setPixel(b, w-1, i, 2);
-			img->setPixel(a, w-1, i, 3);
-		}
-	}
-}
-
-// Fill image with the given color.
-void TexImage::fill(float red, float green, float blue, float alpha)
-{
-	detach();
-
-	foreach (i, m->imageArray)
-	{
-		FloatImage * img = m->imageArray[i];
-		if (img == NULL) continue;
-
-		float * restrict r = img->channel(0);
-		float * restrict g = img->channel(1);
-		float * restrict b = img->channel(2);
-		float * restrict a = img->channel(3);
-
-		const int count = img->width() * img->height();
-		for (int i = 0; i < count; i++)
-		{
-			r[i] = red;
-			g[i] = green;
-			b[i] = blue;
-			a[i] = alpha;
-		}
-	}
-}
-
-// Set normal map options.
-void TexImage::toNormalMap(float sm, float medium, float big, float large)
-{
-	detach();
-
-	foreach (i, m->imageArray)
-	{
-		if (m->imageArray[i] == NULL) continue;
-
-		// @@ Not implemented.
-	}
-}
-
-void TexImage::toHeightMap()
-{
-	detach();
-
-	foreach (i, m->imageArray)
-	{
-		if (m->imageArray[i] == NULL) continue;
-
-		// @@ Not implemented.
-	}
-}
-
-void TexImage::normalizeNormalMap()
-{
-	//nvCheck(m->isNormalMap);
-
-	detach();
-
-	foreach (i, m->imageArray)
-	{
-		if (m->imageArray[i] == NULL) continue;
-
-		nv::normalizeNormalMap(m->imageArray[i]);
-	}
-}
-
-// Compress.
-void TexImage::outputCompressed(const CompressionOptions & compressionOptions, const OutputOptions & outputOptions)
-{
-	foreach (i, m->imageArray)
-	{
-		if (m->imageArray[i] == NULL) continue;
-
-		// @@ Not implemented.
-	}
-}
--- a/src/nvtt/Texture.h
+++ b/src/nvtt/Texture.h
@ -1,76 +0,0 @@
-// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
-// 
-// Permission is hereby granted, free of charge, to any person
-// obtaining a copy of this software and associated documentation
-// files (the "Software"), to deal in the Software without
-// restriction, including without limitation the rights to use,
-// copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following
-// conditions:
-// 
-// The above copyright notice and this permission notice shall be
-// included in all copies or substantial portions of the Software.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-// OTHER DEALINGS IN THE SOFTWARE.
-
-#ifndef NV_TT_TEXTURE_H
-#define NV_TT_TEXTURE_H
-
-#include "nvtt.h"
-
-#include <nvcore/Containers.h>
-#include <nvcore/RefCounted.h>
-#include <nvcore/Ptr.h>
-
-#include <nvimage/Image.h>
-#include <nvimage/FloatImage.h>
-
-namespace nvtt
-{
-
-	struct TexImage::Private : public nv::RefCounted
-	{
-		Private()
-		{
-			type = TextureType_2D;
-			wrapMode = WrapMode_Mirror;
-			alphaMode = AlphaMode_None;
-			isNormalMap = false;
-
-			imageArray.resize(1, NULL);
-		}
-		Private(const Private & p)
-		{
-			type = p.type;
-			wrapMode = p.wrapMode;
-			alphaMode = p.alphaMode;
-			isNormalMap = p.isNormalMap;
-
-			imageArray = p.imageArray;
-		}
-		~Private()
-		{
-			// @@ Free images.
-		}
-
-		TextureType type;
-		WrapMode wrapMode;
-		AlphaMode alphaMode;
-		bool isNormalMap;
-
-		nv::Array<nv::FloatImage *> imageArray;
-	};
-
-	
-} // nvtt namespace
-
-
-#endif // NV_TT_TEXTURE_H
--- a/src/nvtt/cuda/Bitmaps.h
+++ b/src/nvtt/cuda/Bitmaps.h
@ -1117,770 +1117,3 @@ const static uint s_bitmapTable[992] =
 	0x55555557,
 	0x55555557,
 };
-
-
-/*
-void precomp()
-{
-	unsigned int bitmaps[1024];
-
-	int num = 0;
-
-	printf("const static uint s_bitmapTableCTX[704] =\n{\n");
-
-	for (int a = 1; a <= 15; a++)
-	{
-		  for (int b = a; b <= 15; b++)
-		  {
-				for (int c = b; c <= 15; c++)
-				{
-					int indices[16];
-
-					int i = 0;
-					for(; i < a; i++) {
-						indices[i] = 0;
-					}
-					for(; i < a+b; i++) {
-						indices[i] = 2;
-					}
-					for(; i < a+b+c; i++) {
-						indices[i] = 3;
-					}
-					for(; i < 16; i++) {
-						indices[i] = 1;
-					}
-
-					unsigned int bm = 0;
-					for(i = 0; i < 16; i++) {
-						bm |= indices[i] << (i * 2);
-					}
-
-					printf("\t0x%8X, // %d %d %d %d\n", bm, a-0, b-a, c-b, 16-c);
-
-					bitmaps[num] = bm;
-					num++;
-				}
-		  }
-	}
-
-	// Align to 32: 680 -> 704
-	while (num < 704)
-	{
-		printf("\t0x80000000,\n");
-
-		bitmaps[num] = 0x80000000; // 15 0 0 1;
-		num++;
-	}
-
-	printf("}; // num = %d\n", num);
-}
-*/
-
-const static uint s_bitmapTableCTX[704] =
-{
-	0x55555578, // 1 0 0 15
-	0x555555F8, // 1 0 1 14
-	0x555557F8, // 1 0 2 13
-	0x55555FF8, // 1 0 3 12
-	0x55557FF8, // 1 0 4 11
-	0x5555FFF8, // 1 0 5 10
-	0x5557FFF8, // 1 0 6 9
-	0x555FFFF8, // 1 0 7 8
-	0x557FFFF8, // 1 0 8 7
-	0x55FFFFF8, // 1 0 9 6
-	0x57FFFFF8, // 1 0 10 5
-	0x5FFFFFF8, // 1 0 11 4
-	0x7FFFFFF8, // 1 0 12 3
-	0xFFFFFFF8, // 1 0 13 2
-	0xFFFFFFF8, // 1 0 14 1
-	0x555557E8, // 1 1 0 14
-	0x55555FE8, // 1 1 1 13
-	0x55557FE8, // 1 1 2 12
-	0x5555FFE8, // 1 1 3 11
-	0x5557FFE8, // 1 1 4 10
-	0x555FFFE8, // 1 1 5 9
-	0x557FFFE8, // 1 1 6 8
-	0x55FFFFE8, // 1 1 7 7
-	0x57FFFFE8, // 1 1 8 6
-	0x5FFFFFE8, // 1 1 9 5
-	0x7FFFFFE8, // 1 1 10 4
-	0xFFFFFFE8, // 1 1 11 3
-	0xFFFFFFE8, // 1 1 12 2
-	0xFFFFFFE8, // 1 1 13 1
-	0x55557FA8, // 1 2 0 13
-	0x5555FFA8, // 1 2 1 12
-	0x5557FFA8, // 1 2 2 11
-	0x555FFFA8, // 1 2 3 10
-	0x557FFFA8, // 1 2 4 9
-	0x55FFFFA8, // 1 2 5 8
-	0x57FFFFA8, // 1 2 6 7
-	0x5FFFFFA8, // 1 2 7 6
-	0x7FFFFFA8, // 1 2 8 5
-	0xFFFFFFA8, // 1 2 9 4
-	0xFFFFFFA8, // 1 2 10 3
-	0xFFFFFFA8, // 1 2 11 2
-	0xFFFFFFA8, // 1 2 12 1
-	0x5557FEA8, // 1 3 0 12
-	0x555FFEA8, // 1 3 1 11
-	0x557FFEA8, // 1 3 2 10
-	0x55FFFEA8, // 1 3 3 9
-	0x57FFFEA8, // 1 3 4 8
-	0x5FFFFEA8, // 1 3 5 7
-	0x7FFFFEA8, // 1 3 6 6
-	0xFFFFFEA8, // 1 3 7 5
-	0xFFFFFEA8, // 1 3 8 4
-	0xFFFFFEA8, // 1 3 9 3
-	0xFFFFFEA8, // 1 3 10 2
-	0xFFFFFEA8, // 1 3 11 1
-	0x557FFAA8, // 1 4 0 11
-	0x55FFFAA8, // 1 4 1 10
-	0x57FFFAA8, // 1 4 2 9
-	0x5FFFFAA8, // 1 4 3 8
-	0x7FFFFAA8, // 1 4 4 7
-	0xFFFFFAA8, // 1 4 5 6
-	0xFFFFFAA8, // 1 4 6 5
-	0xFFFFFAA8, // 1 4 7 4
-	0xFFFFFAA8, // 1 4 8 3
-	0xFFFFFAA8, // 1 4 9 2
-	0xFFFFFAA8, // 1 4 10 1
-	0x57FFEAA8, // 1 5 0 10
-	0x5FFFEAA8, // 1 5 1 9
-	0x7FFFEAA8, // 1 5 2 8
-	0xFFFFEAA8, // 1 5 3 7
-	0xFFFFEAA8, // 1 5 4 6
-	0xFFFFEAA8, // 1 5 5 5
-	0xFFFFEAA8, // 1 5 6 4
-	0xFFFFEAA8, // 1 5 7 3
-	0xFFFFEAA8, // 1 5 8 2
-	0xFFFFEAA8, // 1 5 9 1
-	0x7FFFAAA8, // 1 6 0 9
-	0xFFFFAAA8, // 1 6 1 8
-	0xFFFFAAA8, // 1 6 2 7
-	0xFFFFAAA8, // 1 6 3 6
-	0xFFFFAAA8, // 1 6 4 5
-	0xFFFFAAA8, // 1 6 5 4
-	0xFFFFAAA8, // 1 6 6 3
-	0xFFFFAAA8, // 1 6 7 2
-	0xFFFFAAA8, // 1 6 8 1
-	0xFFFEAAA8, // 1 7 0 8
-	0xFFFEAAA8, // 1 7 1 7
-	0xFFFEAAA8, // 1 7 2 6
-	0xFFFEAAA8, // 1 7 3 5
-	0xFFFEAAA8, // 1 7 4 4
-	0xFFFEAAA8, // 1 7 5 3
-	0xFFFEAAA8, // 1 7 6 2
-	0xFFFEAAA8, // 1 7 7 1
-	0xFFFAAAA8, // 1 8 0 7
-	0xFFFAAAA8, // 1 8 1 6
-	0xFFFAAAA8, // 1 8 2 5
-	0xFFFAAAA8, // 1 8 3 4
-	0xFFFAAAA8, // 1 8 4 3
-	0xFFFAAAA8, // 1 8 5 2
-	0xFFFAAAA8, // 1 8 6 1
-	0xFFEAAAA8, // 1 9 0 6
-	0xFFEAAAA8, // 1 9 1 5
-	0xFFEAAAA8, // 1 9 2 4
-	0xFFEAAAA8, // 1 9 3 3
-	0xFFEAAAA8, // 1 9 4 2
-	0xFFEAAAA8, // 1 9 5 1
-	0xFFAAAAA8, // 1 10 0 5
-	0xFFAAAAA8, // 1 10 1 4
-	0xFFAAAAA8, // 1 10 2 3
-	0xFFAAAAA8, // 1 10 3 2
-	0xFFAAAAA8, // 1 10 4 1
-	0xFEAAAAA8, // 1 11 0 4
-	0xFEAAAAA8, // 1 11 1 3
-	0xFEAAAAA8, // 1 11 2 2
-	0xFEAAAAA8, // 1 11 3 1
-	0xFAAAAAA8, // 1 12 0 3
-	0xFAAAAAA8, // 1 12 1 2
-	0xFAAAAAA8, // 1 12 2 1
-	0xEAAAAAA8, // 1 13 0 2
-	0xEAAAAAA8, // 1 13 1 1
-	0xAAAAAAA8, // 1 14 0 1
-	0x55555FA0, // 2 0 0 14
-	0x55557FA0, // 2 0 1 13
-	0x5555FFA0, // 2 0 2 12
-	0x5557FFA0, // 2 0 3 11
-	0x555FFFA0, // 2 0 4 10
-	0x557FFFA0, // 2 0 5 9
-	0x55FFFFA0, // 2 0 6 8
-	0x57FFFFA0, // 2 0 7 7
-	0x5FFFFFA0, // 2 0 8 6
-	0x7FFFFFA0, // 2 0 9 5
-	0xFFFFFFA0, // 2 0 10 4
-	0xFFFFFFA0, // 2 0 11 3
-	0xFFFFFFA0, // 2 0 12 2
-	0xFFFFFFA0, // 2 0 13 1
-	0x5555FEA0, // 2 1 0 13
-	0x5557FEA0, // 2 1 1 12
-	0x555FFEA0, // 2 1 2 11
-	0x557FFEA0, // 2 1 3 10
-	0x55FFFEA0, // 2 1 4 9
-	0x57FFFEA0, // 2 1 5 8
-	0x5FFFFEA0, // 2 1 6 7
-	0x7FFFFEA0, // 2 1 7 6
-	0xFFFFFEA0, // 2 1 8 5
-	0xFFFFFEA0, // 2 1 9 4
-	0xFFFFFEA0, // 2 1 10 3
-	0xFFFFFEA0, // 2 1 11 2
-	0xFFFFFEA0, // 2 1 12 1
-	0x555FFAA0, // 2 2 0 12
-	0x557FFAA0, // 2 2 1 11
-	0x55FFFAA0, // 2 2 2 10
-	0x57FFFAA0, // 2 2 3 9
-	0x5FFFFAA0, // 2 2 4 8
-	0x7FFFFAA0, // 2 2 5 7
-	0xFFFFFAA0, // 2 2 6 6
-	0xFFFFFAA0, // 2 2 7 5
-	0xFFFFFAA0, // 2 2 8 4
-	0xFFFFFAA0, // 2 2 9 3
-	0xFFFFFAA0, // 2 2 10 2
-	0xFFFFFAA0, // 2 2 11 1
-	0x55FFEAA0, // 2 3 0 11
-	0x57FFEAA0, // 2 3 1 10
-	0x5FFFEAA0, // 2 3 2 9
-	0x7FFFEAA0, // 2 3 3 8
-	0xFFFFEAA0, // 2 3 4 7
-	0xFFFFEAA0, // 2 3 5 6
-	0xFFFFEAA0, // 2 3 6 5
-	0xFFFFEAA0, // 2 3 7 4
-	0xFFFFEAA0, // 2 3 8 3
-	0xFFFFEAA0, // 2 3 9 2
-	0xFFFFEAA0, // 2 3 10 1
-	0x5FFFAAA0, // 2 4 0 10
-	0x7FFFAAA0, // 2 4 1 9
-	0xFFFFAAA0, // 2 4 2 8
-	0xFFFFAAA0, // 2 4 3 7
-	0xFFFFAAA0, // 2 4 4 6
-	0xFFFFAAA0, // 2 4 5 5
-	0xFFFFAAA0, // 2 4 6 4
-	0xFFFFAAA0, // 2 4 7 3
-	0xFFFFAAA0, // 2 4 8 2
-	0xFFFFAAA0, // 2 4 9 1
-	0xFFFEAAA0, // 2 5 0 9
-	0xFFFEAAA0, // 2 5 1 8
-	0xFFFEAAA0, // 2 5 2 7
-	0xFFFEAAA0, // 2 5 3 6
-	0xFFFEAAA0, // 2 5 4 5
-	0xFFFEAAA0, // 2 5 5 4
-	0xFFFEAAA0, // 2 5 6 3
-	0xFFFEAAA0, // 2 5 7 2
-	0xFFFEAAA0, // 2 5 8 1
-	0xFFFAAAA0, // 2 6 0 8
-	0xFFFAAAA0, // 2 6 1 7
-	0xFFFAAAA0, // 2 6 2 6
-	0xFFFAAAA0, // 2 6 3 5
-	0xFFFAAAA0, // 2 6 4 4
-	0xFFFAAAA0, // 2 6 5 3
-	0xFFFAAAA0, // 2 6 6 2
-	0xFFFAAAA0, // 2 6 7 1
-	0xFFEAAAA0, // 2 7 0 7
-	0xFFEAAAA0, // 2 7 1 6
-	0xFFEAAAA0, // 2 7 2 5
-	0xFFEAAAA0, // 2 7 3 4
-	0xFFEAAAA0, // 2 7 4 3
-	0xFFEAAAA0, // 2 7 5 2
-	0xFFEAAAA0, // 2 7 6 1
-	0xFFAAAAA0, // 2 8 0 6
-	0xFFAAAAA0, // 2 8 1 5
-	0xFFAAAAA0, // 2 8 2 4
-	0xFFAAAAA0, // 2 8 3 3
-	0xFFAAAAA0, // 2 8 4 2
-	0xFFAAAAA0, // 2 8 5 1
-	0xFEAAAAA0, // 2 9 0 5
-	0xFEAAAAA0, // 2 9 1 4
-	0xFEAAAAA0, // 2 9 2 3
-	0xFEAAAAA0, // 2 9 3 2
-	0xFEAAAAA0, // 2 9 4 1
-	0xFAAAAAA0, // 2 10 0 4
-	0xFAAAAAA0, // 2 10 1 3
-	0xFAAAAAA0, // 2 10 2 2
-	0xFAAAAAA0, // 2 10 3 1
-	0xEAAAAAA0, // 2 11 0 3
-	0xEAAAAAA0, // 2 11 1 2
-	0xEAAAAAA0, // 2 11 2 1
-	0xAAAAAAA0, // 2 12 0 2
-	0xAAAAAAA0, // 2 12 1 1
-	0xAAAAAAA0, // 2 13 0 1
-	0x5557FA80, // 3 0 0 13
-	0x555FFA80, // 3 0 1 12
-	0x557FFA80, // 3 0 2 11
-	0x55FFFA80, // 3 0 3 10
-	0x57FFFA80, // 3 0 4 9
-	0x5FFFFA80, // 3 0 5 8
-	0x7FFFFA80, // 3 0 6 7
-	0xFFFFFA80, // 3 0 7 6
-	0xFFFFFA80, // 3 0 8 5
-	0xFFFFFA80, // 3 0 9 4
-	0xFFFFFA80, // 3 0 10 3
-	0xFFFFFA80, // 3 0 11 2
-	0xFFFFFA80, // 3 0 12 1
-	0x557FEA80, // 3 1 0 12
-	0x55FFEA80, // 3 1 1 11
-	0x57FFEA80, // 3 1 2 10
-	0x5FFFEA80, // 3 1 3 9
-	0x7FFFEA80, // 3 1 4 8
-	0xFFFFEA80, // 3 1 5 7
-	0xFFFFEA80, // 3 1 6 6
-	0xFFFFEA80, // 3 1 7 5
-	0xFFFFEA80, // 3 1 8 4
-	0xFFFFEA80, // 3 1 9 3
-	0xFFFFEA80, // 3 1 10 2
-	0xFFFFEA80, // 3 1 11 1
-	0x57FFAA80, // 3 2 0 11
-	0x5FFFAA80, // 3 2 1 10
-	0x7FFFAA80, // 3 2 2 9
-	0xFFFFAA80, // 3 2 3 8
-	0xFFFFAA80, // 3 2 4 7
-	0xFFFFAA80, // 3 2 5 6
-	0xFFFFAA80, // 3 2 6 5
-	0xFFFFAA80, // 3 2 7 4
-	0xFFFFAA80, // 3 2 8 3
-	0xFFFFAA80, // 3 2 9 2
-	0xFFFFAA80, // 3 2 10 1
-	0x7FFEAA80, // 3 3 0 10
-	0xFFFEAA80, // 3 3 1 9
-	0xFFFEAA80, // 3 3 2 8
-	0xFFFEAA80, // 3 3 3 7
-	0xFFFEAA80, // 3 3 4 6
-	0xFFFEAA80, // 3 3 5 5
-	0xFFFEAA80, // 3 3 6 4
-	0xFFFEAA80, // 3 3 7 3
-	0xFFFEAA80, // 3 3 8 2
-	0xFFFEAA80, // 3 3 9 1
-	0xFFFAAA80, // 3 4 0 9
-	0xFFFAAA80, // 3 4 1 8
-	0xFFFAAA80, // 3 4 2 7
-	0xFFFAAA80, // 3 4 3 6
-	0xFFFAAA80, // 3 4 4 5
-	0xFFFAAA80, // 3 4 5 4
-	0xFFFAAA80, // 3 4 6 3
-	0xFFFAAA80, // 3 4 7 2
-	0xFFFAAA80, // 3 4 8 1
-	0xFFEAAA80, // 3 5 0 8
-	0xFFEAAA80, // 3 5 1 7
-	0xFFEAAA80, // 3 5 2 6
-	0xFFEAAA80, // 3 5 3 5
-	0xFFEAAA80, // 3 5 4 4
-	0xFFEAAA80, // 3 5 5 3
-	0xFFEAAA80, // 3 5 6 2
-	0xFFEAAA80, // 3 5 7 1
-	0xFFAAAA80, // 3 6 0 7
-	0xFFAAAA80, // 3 6 1 6
-	0xFFAAAA80, // 3 6 2 5
-	0xFFAAAA80, // 3 6 3 4
-	0xFFAAAA80, // 3 6 4 3
-	0xFFAAAA80, // 3 6 5 2
-	0xFFAAAA80, // 3 6 6 1
-	0xFEAAAA80, // 3 7 0 6
-	0xFEAAAA80, // 3 7 1 5
-	0xFEAAAA80, // 3 7 2 4
-	0xFEAAAA80, // 3 7 3 3
-	0xFEAAAA80, // 3 7 4 2
-	0xFEAAAA80, // 3 7 5 1
-	0xFAAAAA80, // 3 8 0 5
-	0xFAAAAA80, // 3 8 1 4
-	0xFAAAAA80, // 3 8 2 3
-	0xFAAAAA80, // 3 8 3 2
-	0xFAAAAA80, // 3 8 4 1
-	0xEAAAAA80, // 3 9 0 4
-	0xEAAAAA80, // 3 9 1 3
-	0xEAAAAA80, // 3 9 2 2
-	0xEAAAAA80, // 3 9 3 1
-	0xAAAAAA80, // 3 10 0 3
-	0xAAAAAA80, // 3 10 1 2
-	0xAAAAAA80, // 3 10 2 1
-	0xAAAAAA80, // 3 11 0 2
-	0xAAAAAA80, // 3 11 1 1
-	0xAAAAAA80, // 3 12 0 1
-	0x55FFAA00, // 4 0 0 12
-	0x57FFAA00, // 4 0 1 11
-	0x5FFFAA00, // 4 0 2 10
-	0x7FFFAA00, // 4 0 3 9
-	0xFFFFAA00, // 4 0 4 8
-	0xFFFFAA00, // 4 0 5 7
-	0xFFFFAA00, // 4 0 6 6
-	0xFFFFAA00, // 4 0 7 5
-	0xFFFFAA00, // 4 0 8 4
-	0xFFFFAA00, // 4 0 9 3
-	0xFFFFAA00, // 4 0 10 2
-	0xFFFFAA00, // 4 0 11 1
-	0x5FFEAA00, // 4 1 0 11
-	0x7FFEAA00, // 4 1 1 10
-	0xFFFEAA00, // 4 1 2 9
-	0xFFFEAA00, // 4 1 3 8
-	0xFFFEAA00, // 4 1 4 7
-	0xFFFEAA00, // 4 1 5 6
-	0xFFFEAA00, // 4 1 6 5
-	0xFFFEAA00, // 4 1 7 4
-	0xFFFEAA00, // 4 1 8 3
-	0xFFFEAA00, // 4 1 9 2
-	0xFFFEAA00, // 4 1 10 1
-	0xFFFAAA00, // 4 2 0 10
-	0xFFFAAA00, // 4 2 1 9
-	0xFFFAAA00, // 4 2 2 8
-	0xFFFAAA00, // 4 2 3 7
-	0xFFFAAA00, // 4 2 4 6
-	0xFFFAAA00, // 4 2 5 5
-	0xFFFAAA00, // 4 2 6 4
-	0xFFFAAA00, // 4 2 7 3
-	0xFFFAAA00, // 4 2 8 2
-	0xFFFAAA00, // 4 2 9 1
-	0xFFEAAA00, // 4 3 0 9
-	0xFFEAAA00, // 4 3 1 8
-	0xFFEAAA00, // 4 3 2 7
-	0xFFEAAA00, // 4 3 3 6
-	0xFFEAAA00, // 4 3 4 5
-	0xFFEAAA00, // 4 3 5 4
-	0xFFEAAA00, // 4 3 6 3
-	0xFFEAAA00, // 4 3 7 2
-	0xFFEAAA00, // 4 3 8 1
-	0xFFAAAA00, // 4 4 0 8
-	0xFFAAAA00, // 4 4 1 7
-	0xFFAAAA00, // 4 4 2 6
-	0xFFAAAA00, // 4 4 3 5
-	0xFFAAAA00, // 4 4 4 4
-	0xFFAAAA00, // 4 4 5 3
-	0xFFAAAA00, // 4 4 6 2
-	0xFFAAAA00, // 4 4 7 1
-	0xFEAAAA00, // 4 5 0 7
-	0xFEAAAA00, // 4 5 1 6
-	0xFEAAAA00, // 4 5 2 5
-	0xFEAAAA00, // 4 5 3 4
-	0xFEAAAA00, // 4 5 4 3
-	0xFEAAAA00, // 4 5 5 2
-	0xFEAAAA00, // 4 5 6 1
-	0xFAAAAA00, // 4 6 0 6
-	0xFAAAAA00, // 4 6 1 5
-	0xFAAAAA00, // 4 6 2 4
-	0xFAAAAA00, // 4 6 3 3
-	0xFAAAAA00, // 4 6 4 2
-	0xFAAAAA00, // 4 6 5 1
-	0xEAAAAA00, // 4 7 0 5
-	0xEAAAAA00, // 4 7 1 4
-	0xEAAAAA00, // 4 7 2 3
-	0xEAAAAA00, // 4 7 3 2
-	0xEAAAAA00, // 4 7 4 1
-	0xAAAAAA00, // 4 8 0 4
-	0xAAAAAA00, // 4 8 1 3
-	0xAAAAAA00, // 4 8 2 2
-	0xAAAAAA00, // 4 8 3 1
-	0xAAAAAA00, // 4 9 0 3
-	0xAAAAAA00, // 4 9 1 2
-	0xAAAAAA00, // 4 9 2 1
-	0xAAAAAA00, // 4 10 0 2
-	0xAAAAAA00, // 4 10 1 1
-	0xAAAAAA00, // 4 11 0 1
-	0x7FFAA800, // 5 0 0 11
-	0xFFFAA800, // 5 0 1 10
-	0xFFFAA800, // 5 0 2 9
-	0xFFFAA800, // 5 0 3 8
-	0xFFFAA800, // 5 0 4 7
-	0xFFFAA800, // 5 0 5 6
-	0xFFFAA800, // 5 0 6 5
-	0xFFFAA800, // 5 0 7 4
-	0xFFFAA800, // 5 0 8 3
-	0xFFFAA800, // 5 0 9 2
-	0xFFFAA800, // 5 0 10 1
-	0xFFEAA800, // 5 1 0 10
-	0xFFEAA800, // 5 1 1 9
-	0xFFEAA800, // 5 1 2 8
-	0xFFEAA800, // 5 1 3 7
-	0xFFEAA800, // 5 1 4 6
-	0xFFEAA800, // 5 1 5 5
-	0xFFEAA800, // 5 1 6 4
-	0xFFEAA800, // 5 1 7 3
-	0xFFEAA800, // 5 1 8 2
-	0xFFEAA800, // 5 1 9 1
-	0xFFAAA800, // 5 2 0 9
-	0xFFAAA800, // 5 2 1 8
-	0xFFAAA800, // 5 2 2 7
-	0xFFAAA800, // 5 2 3 6
-	0xFFAAA800, // 5 2 4 5
-	0xFFAAA800, // 5 2 5 4
-	0xFFAAA800, // 5 2 6 3
-	0xFFAAA800, // 5 2 7 2
-	0xFFAAA800, // 5 2 8 1
-	0xFEAAA800, // 5 3 0 8
-	0xFEAAA800, // 5 3 1 7
-	0xFEAAA800, // 5 3 2 6
-	0xFEAAA800, // 5 3 3 5
-	0xFEAAA800, // 5 3 4 4
-	0xFEAAA800, // 5 3 5 3
-	0xFEAAA800, // 5 3 6 2
-	0xFEAAA800, // 5 3 7 1
-	0xFAAAA800, // 5 4 0 7
-	0xFAAAA800, // 5 4 1 6
-	0xFAAAA800, // 5 4 2 5
-	0xFAAAA800, // 5 4 3 4
-	0xFAAAA800, // 5 4 4 3
-	0xFAAAA800, // 5 4 5 2
-	0xFAAAA800, // 5 4 6 1
-	0xEAAAA800, // 5 5 0 6
-	0xEAAAA800, // 5 5 1 5
-	0xEAAAA800, // 5 5 2 4
-	0xEAAAA800, // 5 5 3 3
-	0xEAAAA800, // 5 5 4 2
-	0xEAAAA800, // 5 5 5 1
-	0xAAAAA800, // 5 6 0 5
-	0xAAAAA800, // 5 6 1 4
-	0xAAAAA800, // 5 6 2 3
-	0xAAAAA800, // 5 6 3 2
-	0xAAAAA800, // 5 6 4 1
-	0xAAAAA800, // 5 7 0 4
-	0xAAAAA800, // 5 7 1 3
-	0xAAAAA800, // 5 7 2 2
-	0xAAAAA800, // 5 7 3 1
-	0xAAAAA800, // 5 8 0 3
-	0xAAAAA800, // 5 8 1 2
-	0xAAAAA800, // 5 8 2 1
-	0xAAAAA800, // 5 9 0 2
-	0xAAAAA800, // 5 9 1 1
-	0xAAAAA800, // 5 10 0 1
-	0xFFAAA000, // 6 0 0 10
-	0xFFAAA000, // 6 0 1 9
-	0xFFAAA000, // 6 0 2 8
-	0xFFAAA000, // 6 0 3 7
-	0xFFAAA000, // 6 0 4 6
-	0xFFAAA000, // 6 0 5 5
-	0xFFAAA000, // 6 0 6 4
-	0xFFAAA000, // 6 0 7 3
-	0xFFAAA000, // 6 0 8 2
-	0xFFAAA000, // 6 0 9 1
-	0xFEAAA000, // 6 1 0 9
-	0xFEAAA000, // 6 1 1 8
-	0xFEAAA000, // 6 1 2 7
-	0xFEAAA000, // 6 1 3 6
-	0xFEAAA000, // 6 1 4 5
-	0xFEAAA000, // 6 1 5 4
-	0xFEAAA000, // 6 1 6 3
-	0xFEAAA000, // 6 1 7 2
-	0xFEAAA000, // 6 1 8 1
-	0xFAAAA000, // 6 2 0 8
-	0xFAAAA000, // 6 2 1 7
-	0xFAAAA000, // 6 2 2 6
-	0xFAAAA000, // 6 2 3 5
-	0xFAAAA000, // 6 2 4 4
-	0xFAAAA000, // 6 2 5 3
-	0xFAAAA000, // 6 2 6 2
-	0xFAAAA000, // 6 2 7 1
-	0xEAAAA000, // 6 3 0 7
-	0xEAAAA000, // 6 3 1 6
-	0xEAAAA000, // 6 3 2 5
-	0xEAAAA000, // 6 3 3 4
-	0xEAAAA000, // 6 3 4 3
-	0xEAAAA000, // 6 3 5 2
-	0xEAAAA000, // 6 3 6 1
-	0xAAAAA000, // 6 4 0 6
-	0xAAAAA000, // 6 4 1 5
-	0xAAAAA000, // 6 4 2 4
-	0xAAAAA000, // 6 4 3 3
-	0xAAAAA000, // 6 4 4 2
-	0xAAAAA000, // 6 4 5 1
-	0xAAAAA000, // 6 5 0 5
-	0xAAAAA000, // 6 5 1 4
-	0xAAAAA000, // 6 5 2 3
-	0xAAAAA000, // 6 5 3 2
-	0xAAAAA000, // 6 5 4 1
-	0xAAAAA000, // 6 6 0 4
-	0xAAAAA000, // 6 6 1 3
-	0xAAAAA000, // 6 6 2 2
-	0xAAAAA000, // 6 6 3 1
-	0xAAAAA000, // 6 7 0 3
-	0xAAAAA000, // 6 7 1 2
-	0xAAAAA000, // 6 7 2 1
-	0xAAAAA000, // 6 8 0 2
-	0xAAAAA000, // 6 8 1 1
-	0xAAAAA000, // 6 9 0 1
-	0xFAAA8000, // 7 0 0 9
-	0xFAAA8000, // 7 0 1 8
-	0xFAAA8000, // 7 0 2 7
-	0xFAAA8000, // 7 0 3 6
-	0xFAAA8000, // 7 0 4 5
-	0xFAAA8000, // 7 0 5 4
-	0xFAAA8000, // 7 0 6 3
-	0xFAAA8000, // 7 0 7 2
-	0xFAAA8000, // 7 0 8 1
-	0xEAAA8000, // 7 1 0 8
-	0xEAAA8000, // 7 1 1 7
-	0xEAAA8000, // 7 1 2 6
-	0xEAAA8000, // 7 1 3 5
-	0xEAAA8000, // 7 1 4 4
-	0xEAAA8000, // 7 1 5 3
-	0xEAAA8000, // 7 1 6 2
-	0xEAAA8000, // 7 1 7 1
-	0xAAAA8000, // 7 2 0 7
-	0xAAAA8000, // 7 2 1 6
-	0xAAAA8000, // 7 2 2 5
-	0xAAAA8000, // 7 2 3 4
-	0xAAAA8000, // 7 2 4 3
-	0xAAAA8000, // 7 2 5 2
-	0xAAAA8000, // 7 2 6 1
-	0xAAAA8000, // 7 3 0 6
-	0xAAAA8000, // 7 3 1 5
-	0xAAAA8000, // 7 3 2 4
-	0xAAAA8000, // 7 3 3 3
-	0xAAAA8000, // 7 3 4 2
-	0xAAAA8000, // 7 3 5 1
-	0xAAAA8000, // 7 4 0 5
-	0xAAAA8000, // 7 4 1 4
-	0xAAAA8000, // 7 4 2 3
-	0xAAAA8000, // 7 4 3 2
-	0xAAAA8000, // 7 4 4 1
-	0xAAAA8000, // 7 5 0 4
-	0xAAAA8000, // 7 5 1 3
-	0xAAAA8000, // 7 5 2 2
-	0xAAAA8000, // 7 5 3 1
-	0xAAAA8000, // 7 6 0 3
-	0xAAAA8000, // 7 6 1 2
-	0xAAAA8000, // 7 6 2 1
-	0xAAAA8000, // 7 7 0 2
-	0xAAAA8000, // 7 7 1 1
-	0xAAAA8000, // 7 8 0 1
-	0xAAAA0000, // 8 0 0 8
-	0xAAAA0000, // 8 0 1 7
-	0xAAAA0000, // 8 0 2 6
-	0xAAAA0000, // 8 0 3 5
-	0xAAAA0000, // 8 0 4 4
-	0xAAAA0000, // 8 0 5 3
-	0xAAAA0000, // 8 0 6 2
-	0xAAAA0000, // 8 0 7 1
-	0xAAAA0000, // 8 1 0 7
-	0xAAAA0000, // 8 1 1 6
-	0xAAAA0000, // 8 1 2 5
-	0xAAAA0000, // 8 1 3 4
-	0xAAAA0000, // 8 1 4 3
-	0xAAAA0000, // 8 1 5 2
-	0xAAAA0000, // 8 1 6 1
-	0xAAAA0000, // 8 2 0 6
-	0xAAAA0000, // 8 2 1 5
-	0xAAAA0000, // 8 2 2 4
-	0xAAAA0000, // 8 2 3 3
-	0xAAAA0000, // 8 2 4 2
-	0xAAAA0000, // 8 2 5 1
-	0xAAAA0000, // 8 3 0 5
-	0xAAAA0000, // 8 3 1 4
-	0xAAAA0000, // 8 3 2 3
-	0xAAAA0000, // 8 3 3 2
-	0xAAAA0000, // 8 3 4 1
-	0xAAAA0000, // 8 4 0 4
-	0xAAAA0000, // 8 4 1 3
-	0xAAAA0000, // 8 4 2 2
-	0xAAAA0000, // 8 4 3 1
-	0xAAAA0000, // 8 5 0 3
-	0xAAAA0000, // 8 5 1 2
-	0xAAAA0000, // 8 5 2 1
-	0xAAAA0000, // 8 6 0 2
-	0xAAAA0000, // 8 6 1 1
-	0xAAAA0000, // 8 7 0 1
-	0xAAA80000, // 9 0 0 7
-	0xAAA80000, // 9 0 1 6
-	0xAAA80000, // 9 0 2 5
-	0xAAA80000, // 9 0 3 4
-	0xAAA80000, // 9 0 4 3
-	0xAAA80000, // 9 0 5 2
-	0xAAA80000, // 9 0 6 1
-	0xAAA80000, // 9 1 0 6
-	0xAAA80000, // 9 1 1 5
-	0xAAA80000, // 9 1 2 4
-	0xAAA80000, // 9 1 3 3
-	0xAAA80000, // 9 1 4 2
-	0xAAA80000, // 9 1 5 1
-	0xAAA80000, // 9 2 0 5
-	0xAAA80000, // 9 2 1 4
-	0xAAA80000, // 9 2 2 3
-	0xAAA80000, // 9 2 3 2
-	0xAAA80000, // 9 2 4 1
-	0xAAA80000, // 9 3 0 4
-	0xAAA80000, // 9 3 1 3
-	0xAAA80000, // 9 3 2 2
-	0xAAA80000, // 9 3 3 1
-	0xAAA80000, // 9 4 0 3
-	0xAAA80000, // 9 4 1 2
-	0xAAA80000, // 9 4 2 1
-	0xAAA80000, // 9 5 0 2
-	0xAAA80000, // 9 5 1 1
-	0xAAA80000, // 9 6 0 1
-	0xAAA00000, // 10 0 0 6
-	0xAAA00000, // 10 0 1 5
-	0xAAA00000, // 10 0 2 4
-	0xAAA00000, // 10 0 3 3
-	0xAAA00000, // 10 0 4 2
-	0xAAA00000, // 10 0 5 1
-	0xAAA00000, // 10 1 0 5
-	0xAAA00000, // 10 1 1 4
-	0xAAA00000, // 10 1 2 3
-	0xAAA00000, // 10 1 3 2
-	0xAAA00000, // 10 1 4 1
-	0xAAA00000, // 10 2 0 4
-	0xAAA00000, // 10 2 1 3
-	0xAAA00000, // 10 2 2 2
-	0xAAA00000, // 10 2 3 1
-	0xAAA00000, // 10 3 0 3
-	0xAAA00000, // 10 3 1 2
-	0xAAA00000, // 10 3 2 1
-	0xAAA00000, // 10 4 0 2
-	0xAAA00000, // 10 4 1 1
-	0xAAA00000, // 10 5 0 1
-	0xAA800000, // 11 0 0 5
-	0xAA800000, // 11 0 1 4
-	0xAA800000, // 11 0 2 3
-	0xAA800000, // 11 0 3 2
-	0xAA800000, // 11 0 4 1
-	0xAA800000, // 11 1 0 4
-	0xAA800000, // 11 1 1 3
-	0xAA800000, // 11 1 2 2
-	0xAA800000, // 11 1 3 1
-	0xAA800000, // 11 2 0 3
-	0xAA800000, // 11 2 1 2
-	0xAA800000, // 11 2 2 1
-	0xAA800000, // 11 3 0 2
-	0xAA800000, // 11 3 1 1
-	0xAA800000, // 11 4 0 1
-	0xAA000000, // 12 0 0 4
-	0xAA000000, // 12 0 1 3
-	0xAA000000, // 12 0 2 2
-	0xAA000000, // 12 0 3 1
-	0xAA000000, // 12 1 0 3
-	0xAA000000, // 12 1 1 2
-	0xAA000000, // 12 1 2 1
-	0xAA000000, // 12 2 0 2
-	0xAA000000, // 12 2 1 1
-	0xAA000000, // 12 3 0 1
-	0xA8000000, // 13 0 0 3
-	0xA8000000, // 13 0 1 2
-	0xA8000000, // 13 0 2 1
-	0xA8000000, // 13 1 0 2
-	0xA8000000, // 13 1 1 1
-	0xA8000000, // 13 2 0 1
-	0xA0000000, // 14 0 0 2
-	0xA0000000, // 14 0 1 1
-	0xA0000000, // 14 1 0 1
-	0x80000000, // 15 0 0 1
-	0x80000000,
-	0x80000000,
-	0x80000000,
-	0x80000000,
-	0x80000000,
-	0x80000000,
-	0x80000000,
-	0x80000000,
-	0x80000000,
-	0x80000000,
-	0x80000000,
-	0x80000000,
-	0x80000000,
-	0x80000000,
-	0x80000000,
-	0x80000000,
-	0x80000000,
-	0x80000000,
-	0x80000000,
-	0x80000000,
-	0x80000000,
-	0x80000000,
-	0x80000000,
-	0x80000000,
-};
-
--- a/src/nvtt/cuda/CompressKernel.cu
+++ b/src/nvtt/cuda/CompressKernel.cu
@ -21,8 +21,10 @@
 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 // OTHER DEALINGS IN THE SOFTWARE.

+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
 #include <math.h>
-#include <float.h> // FLT_MAX

 #include "CudaMath.h"

@ -51,57 +53,65 @@ __device__ inline void swap(T & a, T & b)
 __constant__ float3 kColorMetric = { 1.0f, 1.0f, 1.0f };
 __constant__ float3 kColorMetricSqr = { 1.0f, 1.0f, 1.0f };

-// Some kernels read the input through texture.
-texture<uchar4, 2, cudaReadModeNormalizedFloat> tex;


 ////////////////////////////////////////////////////////////////////////////////
 // Sort colors
 ////////////////////////////////////////////////////////////////////////////////
-__device__ void sortColors(const float * values, int * ranks)
+__device__ void sortColors(const float * values, int * cmp)
 {
-#if __DEVICE_EMULATION__
-    if (threadIdx.x == 0)
-    {
-        for (int tid = 0; tid < 16; tid++)
-        {
-            int rank = 0;
-            for (int i = 0; i < 16; i++)
-            {
-                rank += (values[i] < values[tid]);
-            }
-            
-            ranks[tid] = rank;
-        }
+	int tid = threadIdx.x;

-        // Resolve elements with the same index.
-        for (int i = 0; i < 15; i++)
-        {
-            for (int tid = 0; tid < 16; tid++)
-            {
-                if (tid > i && ranks[tid] == ranks[i]) ++ranks[tid];
-            }
-        }
-    }
+#if 1
+	cmp[tid] = (values[0] < values[tid]);
+	cmp[tid] += (values[1] < values[tid]);
+	cmp[tid] += (values[2] < values[tid]);
+	cmp[tid] += (values[3] < values[tid]);
+	cmp[tid] += (values[4] < values[tid]);
+	cmp[tid] += (values[5] < values[tid]);
+	cmp[tid] += (values[6] < values[tid]);
+	cmp[tid] += (values[7] < values[tid]);
+	cmp[tid] += (values[8] < values[tid]);
+	cmp[tid] += (values[9] < values[tid]);
+	cmp[tid] += (values[10] < values[tid]);
+	cmp[tid] += (values[11] < values[tid]);
+	cmp[tid] += (values[12] < values[tid]);
+	cmp[tid] += (values[13] < values[tid]);
+	cmp[tid] += (values[14] < values[tid]);
+	cmp[tid] += (values[15] < values[tid]);
+	
+	// Resolve elements with the same index.
+	if (tid > 0 && cmp[tid] == cmp[0]) ++cmp[tid];
+	if (tid > 1 && cmp[tid] == cmp[1]) ++cmp[tid];
+	if (tid > 2 && cmp[tid] == cmp[2]) ++cmp[tid];
+	if (tid > 3 && cmp[tid] == cmp[3]) ++cmp[tid];
+	if (tid > 4 && cmp[tid] == cmp[4]) ++cmp[tid];
+	if (tid > 5 && cmp[tid] == cmp[5]) ++cmp[tid];
+	if (tid > 6 && cmp[tid] == cmp[6]) ++cmp[tid];
+	if (tid > 7 && cmp[tid] == cmp[7]) ++cmp[tid];
+	if (tid > 8 && cmp[tid] == cmp[8]) ++cmp[tid];
+	if (tid > 9 && cmp[tid] == cmp[9]) ++cmp[tid];
+	if (tid > 10 && cmp[tid] == cmp[10]) ++cmp[tid];
+	if (tid > 11 && cmp[tid] == cmp[11]) ++cmp[tid];
+	if (tid > 12 && cmp[tid] == cmp[12]) ++cmp[tid];
+	if (tid > 13 && cmp[tid] == cmp[13]) ++cmp[tid];
+	if (tid > 14 && cmp[tid] == cmp[14]) ++cmp[tid];
 #else
-    const int tid = threadIdx.x;

-    int rank = 0;
+	cmp[tid] = 0;

-    #pragma unroll
-    for (int i = 0; i < 16; i++)
-    {
-        rank += (values[i] < values[tid]);
-    }
-    
-    ranks[tid] = rank;
+	#pragma unroll
+	for (int i = 0; i < 16; i++)
+	{
+		cmp[tid] += (values[i] < values[tid]);
+	}

-    // Resolve elements with the same index.
-    #pragma unroll
-    for (int i = 0; i < 15; i++)
-    {
-        if (tid > i && ranks[tid] == ranks[i]) ++ranks[tid];
-    }
+	// Resolve elements with the same index.
+	#pragma unroll
+	for (int i = 0; i < 15; i++)
+	{
+		if (tid > 0 && cmp[tid] == cmp[i]) ++cmp[tid];
+	}
 #endif
 }

@ -126,7 +136,9 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
 		colors[idx].x = ((c >> 16) & 0xFF) * (1.0f / 255.0f);
 		
 		// No need to synchronize, 16 < warp size.
-		__debugsync();
+#if __DEVICE_EMULATION__
+		} __debugsync(); if (idx < 16) {
+#endif
 		
 		// Sort colors along the best fit line.
 		colorSums(colors, sums);
@ -136,74 +148,17 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
 		
 		dps[idx] = dot(colors[idx], axis);
 		
-		__debugsync();
+#if __DEVICE_EMULATION__
+		} __debugsync(); if (idx < 16) {
+#endif
 		
 		sortColors(dps, xrefs);
 		
 		float3 tmp = colors[idx];
-		__debugsync();
 		colors[xrefs[idx]] = tmp;
 	}
-#if __DEVICE_EMULATION__
-	else
-	{
-		__debugsync();
-		__debugsync();
-		__debugsync();
-	}
-#endif
 }

-__device__ void loadColorBlockTex(uint bn, uint w, float3 colors[16], float3 sums[16], int xrefs[16], int * sameColor)
-{
-	const int bid = blockIdx.x;
-	const int idx = threadIdx.x;
-
-	__shared__ float dps[16];
-
-	if (idx < 16)
-	{
-		float x = 4 * ((bn + bid) % w) + idx % 4;
-		float y = 4 * ((bn + bid) / w) + idx / 4;
-
-		// Read color and copy to shared mem.
-		float4 c = tex2D(tex, x, y);
-
-		colors[idx].x = c.z;
-		colors[idx].y = c.y;
-		colors[idx].z = c.x;
-
-		// No need to synchronize, 16 < warp size.
-		__debugsync();
-		
-		// Sort colors along the best fit line.
-		colorSums(colors, sums);
-		float3 axis = bestFitLine(colors, sums[0], kColorMetric);
-		
-		*sameColor = (axis == make_float3(0, 0, 0));
-		
-		dps[idx] = dot(colors[idx], axis);
-		
-		__debugsync();
-		
-		sortColors(dps, xrefs);
-		
-		float3 tmp = colors[idx];
-		__debugsync();
-		colors[xrefs[idx]] = tmp;
-	}
-#if __DEVICE_EMULATION__
-	else
-	{
-		__debugsync();
-		__debugsync();
-		__debugsync();
-	}
-#endif
-
-}
-
-
 __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], float weights[16], int xrefs[16], int * sameColor)
 {
 	const int bid = blockIdx.x;
@ -224,8 +179,11 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
 		
 		colors[idx] = rawColors[idx] * weights[idx];

+		
 		// No need to synchronize, 16 < warp size.
-		__debugsync();
+#if __DEVICE_EMULATION__
+		} __debugsync(); if (idx < 16) {
+#endif

 		// Sort colors along the best fit line.
 		colorSums(colors, sums);
@ -233,70 +191,23 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum

 		*sameColor = (axis == make_float3(0, 0, 0));

+		// Single color compressor needs unweighted colors.
+		if (*sameColor) colors[idx] = rawColors[idx];
+
 		dps[idx] = dot(rawColors[idx], axis);
 		
-		__debugsync();
+#if __DEVICE_EMULATION__
+		} __debugsync(); if (idx < 16) {
+#endif
 		
 		sortColors(dps, xrefs);
 		
 		float3 tmp = colors[idx];
-		float w = weights[idx];
-		__debugsync();
 		colors[xrefs[idx]] = tmp;
+		
+		float w = weights[idx];
 		weights[xrefs[idx]] = w;
 	}
-#if __DEVICE_EMULATION__
-	else
-	{
-		__debugsync();
-		__debugsync();
-		__debugsync();
-	}
-#endif
-}
-
-__device__ void loadColorBlock(const uint * image, float2 colors[16], float2 sums[16], int xrefs[16], int * sameColor)
-{
-	const int bid = blockIdx.x;
-	const int idx = threadIdx.x;
-
-	__shared__ float dps[16];
-
-	if (idx < 16)
-	{
-		// Read color and copy to shared mem.
-		uint c = image[(bid) * 16 + idx];
-	
-		colors[idx].y = ((c >> 8) & 0xFF) * (1.0f / 255.0f);
-		colors[idx].x = ((c >> 16) & 0xFF) * (1.0f / 255.0f);
-		
-		// No need to synchronize, 16 < warp size.
-		__debugsync();
-
-		// Sort colors along the best fit line.
-		colorSums(colors, sums);
-		float2 axis = bestFitLine(colors, sums[0]);
-		
-		*sameColor = (axis == make_float2(0, 0));
-
-		dps[idx] = dot(colors[idx], axis);
-		
-		__debugsync();
-		
-		sortColors(dps, xrefs);
-		
-		float2 tmp = colors[idx];
-		__debugsync();
-		colors[xrefs[idx]] = tmp;
-	}
-#if __DEVICE_EMULATION__
-	else
-	{
-		__debugsync();
-		__debugsync();
-		__debugsync();
-	}
-#endif
 }


@ -305,33 +216,13 @@ __device__ void loadColorBlock(const uint * image, float2 colors[16], float2 sum
 ////////////////////////////////////////////////////////////////////////////////
 inline __device__ float3 roundAndExpand565(float3 v, ushort * w)
 {
-	uint x = __float2uint_rn(__saturatef(v.x) * 31.0f);
-	uint y = __float2uint_rn(__saturatef(v.y) * 63.0f);
-	uint z = __float2uint_rn(__saturatef(v.z) * 31.0f);
-	*w = (x << 11) | (y << 5) | z;
-	v.x = __uint2float_rn(x) * 1.0f / 31.0f;
-	v.y = __uint2float_rn(y) * 1.0f / 63.0f;
-	v.z = __uint2float_rn(z) * 1.0f / 31.0f;
-	return v;
-}
-
-inline __device__ float2 roundAndExpand56(float2 v, ushort * w)
-{
-	uint x = __float2uint_rn(__saturatef(v.x) * 31.0f);
-	uint y = __float2uint_rn(__saturatef(v.y) * 63.0f);
-	*w = (x << 11) | (y << 5);
-	v.x = __uint2float_rn(x) * 1.0f / 31.0f;
-	v.y = __uint2float_rn(y) * 1.0f / 63.0f;
-	return v;
-}
-
-inline __device__ float2 roundAndExpand88(float2 v, ushort * w)
-{
-	uint x = __float2uint_rn(__saturatef(v.x) * 255.0f);
-	uint y = __float2uint_rn(__saturatef(v.y) * 255.0f);
-	*w = (x << 8) | y;
-	v.x = __uint2float_rn(x) * 1.0f / 255.0f;
-	v.y = __uint2float_rn(y) * 1.0f / 255.0f;
+	v.x = rintf(__saturatef(v.x) * 31.0f);
+	v.y = rintf(__saturatef(v.y) * 63.0f);
+	v.z = rintf(__saturatef(v.z) * 31.0f);
+	*w = ((ushort)v.x << 11) | ((ushort)v.y << 5) | (ushort)v.z;
+	v.x *= 0.03227752766457f; // approximate integer bit expansion.
+	v.y *= 0.01583151765563f;
+	v.z *= 0.03227752766457f;
 	return v;
 }

@ -578,114 +469,6 @@ __device__ float evalPermutation3(const float3 * colors, const float * weights,
 }
 */

-__device__ float evalPermutation4(const float2 * colors, float2 color_sum, uint permutation, ushort * start, ushort * end)
-{
-	// Compute endpoints using least squares.
-	float2 alphax_sum = make_float2(0.0f, 0.0f);
-	uint akku = 0;
-
-	// Compute alpha & beta for this permutation.
-	#pragma unroll
-	for (int i = 0; i < 16; i++)
-	{
-		const uint bits = permutation >> (2*i);
-		
-		alphax_sum += alphaTable4[bits & 3] * colors[i];
-		akku += prods4[bits & 3];
-	}
-
-	float alpha2_sum = float(akku >> 16);
-	float beta2_sum = float((akku >> 8) & 0xff);
-	float alphabeta_sum = float(akku & 0xff);
-	float2 betax_sum = 9.0f * color_sum - alphax_sum;
-
-	const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
-
-	float2 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
-	float2 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
-	
-	// Round a, b to the closest 5-6 color and expand...
-	a = roundAndExpand56(a, start);
-	b = roundAndExpand56(b, end);
-
-	// compute the error
-	float2 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
-
-	return (1.0f / 9.0f) * (e.x + e.y);
-}
-
-__device__ float evalPermutation3(const float2 * colors, float2 color_sum, uint permutation, ushort * start, ushort * end)
-{
-	// Compute endpoints using least squares.
-	float2 alphax_sum = make_float2(0.0f, 0.0f);
-	uint akku = 0;
-
-	// Compute alpha & beta for this permutation.
-	#pragma unroll
-	for (int i = 0; i < 16; i++)
-	{
-		const uint bits = permutation >> (2*i);
-
-		alphax_sum += alphaTable3[bits & 3] * colors[i];
-		akku += prods3[bits & 3];
-	}
-
-	float alpha2_sum = float(akku >> 16);
-	float beta2_sum = float((akku >> 8) & 0xff);
-	float alphabeta_sum = float(akku & 0xff);
-	float2 betax_sum = 4.0f * color_sum - alphax_sum;
-
-	const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
-
-	float2 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
-	float2 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
-	
-	// Round a, b to the closest 5-6 color and expand...
-	a = roundAndExpand56(a, start);
-	b = roundAndExpand56(b, end);
-
-	// compute the error
-	float2 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
-
-	return (1.0f / 4.0f) * (e.x + e.y);
-}
-
-__device__ float evalPermutationCTX(const float2 * colors, float2 color_sum, uint permutation, ushort * start, ushort * end)
-{
-	// Compute endpoints using least squares.
-	float2 alphax_sum = make_float2(0.0f, 0.0f);
-	uint akku = 0;
-
-	// Compute alpha & beta for this permutation.
-	#pragma unroll
-	for (int i = 0; i < 16; i++)
-	{
-		const uint bits = permutation >> (2*i);
-		
-		alphax_sum += alphaTable4[bits & 3] * colors[i];
-		akku += prods4[bits & 3];
-	}
-
-	float alpha2_sum = float(akku >> 16);
-	float beta2_sum = float((akku >> 8) & 0xff);
-	float alphabeta_sum = float(akku & 0xff);
-	float2 betax_sum = 9.0f * color_sum - alphax_sum;
-
-	const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
-
-	float2 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
-	float2 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
-	
-	// Round a, b to the closest 8-8 color and expand...
-	a = roundAndExpand88(a, start);
-	b = roundAndExpand88(b, end);
-
-	// compute the error
-	float2 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
-
-	return (1.0f / 9.0f) * (e.x + e.y);
-}
-

 ////////////////////////////////////////////////////////////////////////////////
 // Evaluate all permutations
@ -814,67 +597,6 @@ __device__ void evalAllPermutations(const float3 * colors, const float * weights
 }
 */

-__device__ void evalAllPermutations(const float2 * colors, float2 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
-{
-	const int idx = threadIdx.x;
-	
-	float bestError = FLT_MAX;
-	
-	__shared__ uint s_permutations[160];
-
-	for(int i = 0; i < 16; i++)
-	{
-		int pidx = idx + NUM_THREADS * i;
-		if (pidx >= 992) break;
-		
-		ushort start, end;
-		uint permutation = permutations[pidx];
-		if (pidx < 160) s_permutations[pidx] = permutation;
-				
-		float error = evalPermutation4(colors, colorSum, permutation, &start, &end);
-		
-		if (error < bestError)
-		{
-			bestError = error;
-			bestPermutation = permutation;
-			bestStart = start;
-			bestEnd = end;
-		}
-	}
-
-	if (bestStart < bestEnd)
-	{
-		swap(bestEnd, bestStart);
-		bestPermutation ^= 0x55555555;	// Flip indices.
-	}
-
-	for(int i = 0; i < 3; i++)
-	{
-		int pidx = idx + NUM_THREADS * i;
-		if (pidx >= 160) break;
-		
-		ushort start, end;
-		uint permutation = s_permutations[pidx];
-		float error = evalPermutation3(colors, colorSum, permutation, &start, &end);
-		
-		if (error < bestError)
-		{
-			bestError = error;
-			bestPermutation = permutation;
-			bestStart = start;
-			bestEnd = end;
-			
-			if (bestStart > bestEnd)
-			{
-				swap(bestEnd, bestStart);
-				bestPermutation ^= (~bestPermutation >> 1) & 0x55555555;	// Flip indices.
-			}
-		}
-	}
-
-	errors[idx] = bestError;
-}
-
 __device__ void evalLevel4Permutations(const float3 * colors, float3 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
 {
 	const int idx = threadIdx.x;
@ -943,40 +665,6 @@ __device__ void evalLevel4Permutations(const float3 * colors, const float * weig
 	errors[idx] = bestError;
 }

-__device__ void evalAllPermutationsCTX(const float2 * colors, float2 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
-{
-	const int idx = threadIdx.x;
-	
-	float bestError = FLT_MAX;
-	
-	for(int i = 0; i < 16; i++)
-	{
-		int pidx = idx + NUM_THREADS * i;
-		if (pidx >= 704) break;
-		
-		ushort start, end;
-		uint permutation = permutations[pidx];
-
-		float error = evalPermutationCTX(colors, colorSum, permutation, &start, &end);
-		
-		if (error < bestError)
-		{
-			bestError = error;
-			bestPermutation = permutation;
-			bestStart = start;
-			bestEnd = end;
-		}
-	}
-
-	if (bestStart < bestEnd)
-	{
-		swap(bestEnd, bestStart);
-		bestPermutation ^= 0x55555555;	// Flip indices.
-	}
-
-	errors[idx] = bestError;
-}
-

 ////////////////////////////////////////////////////////////////////////////////
 // Find index with minimum error
@ -1004,6 +692,7 @@ __device__ int findMinError(float * errors)
 			}
 		}
 	}
+
 #else
 	for(int d = NUM_THREADS/2; d > 32; d >>= 1)
 	{
@ -1086,11 +775,6 @@ __device__ void saveBlockDXT1(ushort start, ushort end, uint permutation, int xr
 	result[bid].y = indices;
 }

-__device__ void saveBlockCTX1(ushort start, ushort end, uint permutation, int xrefs[16], uint2 * result)
-{
-	saveBlockDXT1(start, end, permutation, xrefs, result);
-}
-
 __device__ void saveSingleColorBlockDXT1(float3 color, uint2 * result)
 {
 	const int bid = blockIdx.x;
@ -1114,41 +798,6 @@ __device__ void saveSingleColorBlockDXT1(float3 color, uint2 * result)
 	}
 }

-__device__ void saveSingleColorBlockDXT1(float2 color, uint2 * result)
-{
-	const int bid = blockIdx.x;
-
-	int r = color.x * 255;
-	int g = color.y * 255;
-
-	ushort color0 = (OMatch5[r][0] << 11) | (OMatch6[g][0] << 5);
-	ushort color1 = (OMatch5[r][1] << 11) | (OMatch6[g][1] << 5);
-
-	if (color0 < color1)
-	{
-		result[bid].x = (color0 << 16) | color1;
-		result[bid].y = 0xffffffff;
-	}
-	else
-	{
-		result[bid].x = (color1 << 16) | color0;
-		result[bid].y = 0xaaaaaaaa;
-	}
-}
-
-__device__ void saveSingleColorBlockCTX1(float2 color, uint2 * result)
-{
-	const int bid = blockIdx.x;
-
-	int r = color.x * 255;
-	int g = color.y * 255;
-
-	ushort color0 = (r << 8) | (g);
-	
-	result[bid].x = (color0 << 16) | color0;
-	result[bid].y = 0x00000000;
-}
-

 ////////////////////////////////////////////////////////////////////////////////
 // Compress color block
@ -1187,41 +836,6 @@ __global__ void compressDXT1(const uint * permutations, const uint * image, uint
 	}
 }

-__global__ void compressDXT1_Tex(uint bn, uint w, const uint * permutations, uint2 * result)
-{
-	__shared__ float3 colors[16];
-	__shared__ float3 sums[16];
-	__shared__ int xrefs[16];
-	__shared__ int sameColor;
-	
-	loadColorBlockTex(bn, w, colors, sums, xrefs, &sameColor);
-
-	__syncthreads();
-
-	if (sameColor)
-	{
-		if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result);
-		return;
-	}
-
-	ushort bestStart, bestEnd;
-	uint bestPermutation;
-
-	__shared__ float errors[NUM_THREADS];
-
-	evalAllPermutations(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors);
-	
-	// Use a parallel reduction to find minimum error.
-	const int minIdx = findMinError(errors);
-	
-	// Only write the result of the winner thread.
-	if (threadIdx.x == minIdx)
-	{
-		saveBlockDXT1(bestStart, bestEnd, bestPermutation, xrefs, result);
-	}
-}
-
-
 __global__ void compressLevel4DXT1(const uint * permutations, const uint * image, uint2 * result)
 {
 	__shared__ float3 colors[16];
@ -1292,75 +906,6 @@ __global__ void compressWeightedDXT1(const uint * permutations, const uint * ima
 }


-__global__ void compressNormalDXT1(const uint * permutations, const uint * image, uint2 * result)
-{
-	__shared__ float2 colors[16];
-	__shared__ float2 sums[16];
-	__shared__ int xrefs[16];
-	__shared__ int sameColor;
-
-	loadColorBlock(image, colors, sums, xrefs, &sameColor);
-	
-	__syncthreads();
-
-	if (sameColor)
-	{
-		if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result);
-		return;
-	}
-
-	ushort bestStart, bestEnd;
-	uint bestPermutation;
-
-	__shared__ float errors[NUM_THREADS];
-
-	evalAllPermutations(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors);
-	
-	// Use a parallel reduction to find minimum error.
-	const int minIdx = findMinError(errors);
-	
-	// Only write the result of the winner thread.
-	if (threadIdx.x == minIdx)
-	{
-		saveBlockDXT1(bestStart, bestEnd, bestPermutation, xrefs, result);
-	}
-}
-
-__global__ void compressCTX1(const uint * permutations, const uint * image, uint2 * result)
-{
-	__shared__ float2 colors[16];
-	__shared__ float2 sums[16];
-	__shared__ int xrefs[16];
-	__shared__ int sameColor;
-	
-	loadColorBlock(image, colors, sums, xrefs, &sameColor);
-	
-	__syncthreads();
-
-	if (sameColor)
-	{
-		if (threadIdx.x == 0) saveSingleColorBlockCTX1(colors[0], result);
-		return;
-	}
-
-	ushort bestStart, bestEnd;
-	uint bestPermutation;
-
-	__shared__ float errors[NUM_THREADS];
-
-	evalAllPermutationsCTX(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors);
-	
-	// Use a parallel reduction to find minimum error.
-	const int minIdx = findMinError(errors);
-	
-	// Only write the result of the winner thread.
-	if (threadIdx.x == minIdx)
-	{
-		saveBlockCTX1(bestStart, bestEnd, bestPermutation, xrefs, result);
-	}
-}
-
-
 /*
 __device__ float computeError(const float weights[16], uchar a0, uchar a1)
 {
@ -1539,125 +1084,6 @@ __global__ void compressDXT5(const uint * permutations, const uint * image, uint
 }
 */

-/*__device__ void evaluatePalette(uint alpha0, uint alpha1, uint alphas[8])
-{
-	alpha[0] = alpha0;
-	alpha[1] = alpha1;
-	alpha[2] = (6 * alpha[0] + 1 * alpha[1]) / 7;	// bit code 010
-	alpha[3] = (5 * alpha[0] + 2 * alpha[1]) / 7;	// bit code 011
-	alpha[4] = (4 * alpha[0] + 3 * alpha[1]) / 7;	// bit code 100
-	alpha[5] = (3 * alpha[0] + 4 * alpha[1]) / 7;	// bit code 101
-	alpha[6] = (2 * alpha[0] + 5 * alpha[1]) / 7;	// bit code 110
-	alpha[7] = (1 * alpha[0] + 6 * alpha[1]) / 7;	// bit code 111
-}
-
-__device__ uint computeAlphaError(const uint block[16], uint alpha0, uint alpha1, int bestError = INT_MAX)
-{
-	uint8 alphas[8];
-	evaluatePalette(alpha0, alpha1, alphas);
-
-	int totalError = 0;
-
-	for (uint i = 0; i < 16; i++)
-	{
-		uint8 alpha = block[i];
-
-		// @@ It should be possible to do this much faster.
-
-		int minDist = INT_MAX;
-		for (uint p = 0; p < 8; p++)
-		{
-			int dist = alphaDistance(alpha, alphas[p]);
-			minDist = min(dist, minDist);
-		}
-
-
-
-		totalError += minDist;
-
-		if (totalError > bestError)
-		{
-			// early out
-			return totalError;
-		}
-	}
-
-	return totalError;
-}
-
-
-void compressDXT5A(uint alpha[16])
-{
-	// Get min/max alpha.
-	for (uint i = 0; i < 16; i++)
-	{
-		mina = min(mina, alpha[i]);
-		maxa = max(maxa, alpha[i]);
-	}
-
-	dxtBlock->alpha0 = maxa;
-	dxtBlock->alpha1 = mina;
-
-	if (maxa - mina > 8)
-	{
-		int besterror = computeAlphaError(rgba, dxtBlock);
-		int besta0 = maxa;
-		int besta1 = mina;
-
-		// Expand search space a bit.
-		const int alphaExpand = 8;
-		mina = (mina <= alphaExpand) ? 0 : mina - alphaExpand;
-		maxa = (maxa <= 255-alphaExpand) ? 255 : maxa + alphaExpand;
-
-		for (int a0 = mina+9; a0 < maxa; a0++)
-		{
-			for (int a1 = mina; a1 < a0-8; a1++)
-			{
-				nvDebugCheck(a0 - a1 > 8);
-
-				dxtBlock->alpha0 = a0;
-				dxtBlock->alpha1 = a1;
-				int error = computeAlphaError(rgba, dxtBlock, besterror);
-
-				if (error < besterror)
-				{
-					besterror = error;
-					besta0 = a0;
-					besta1 = a1;
-				}
-			}
-		}
-
-		dxtBlock->alpha0 = besta0;
-		dxtBlock->alpha1 = besta1;
-	}
-}
-
-__global__ void compressDXT5n(uint blockNum, uint2 * d_result)
-{
-	uint idx = blockIdx.x * 128 + threadIdx.x;
-
-	if (idx >= blockNum)
-	{
-		return;
-	}
-
-	// @@ Ideally we would load the data to shared mem to achieve coalesced global mem access.
-	// @@ Blocks would require too much shared memory (8k) and limit occupancy.
-
-	// @@ Ideally we should use SIMD processing, multiple threads (4-8) processing the same block.
-	// That simplifies coalescing, and reduces divergence.
-
-	// @@ Experiment with texture. That's probably the most simple approach.
-
-	uint x[16];
-	uint y[16];
-
-
-}
-*/
-
-
 ////////////////////////////////////////////////////////////////////////////////
 // Setup kernel
 ////////////////////////////////////////////////////////////////////////////////
@ -1685,20 +1111,6 @@ extern "C" void compressKernelDXT1(uint blockNum, uint * d_data, uint * d_result
 	compressDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
 }

-extern "C" void compressKernelDXT1_Tex(uint bn, uint blockNum, uint w, cudaArray * d_data, uint * d_result, uint * d_bitmaps)
-{
-	// Setup texture
-	tex.normalized = false;
-	tex.filterMode = cudaFilterModePoint;
-	tex.addressMode[0] = cudaAddressModeClamp;
-	tex.addressMode[1] = cudaAddressModeClamp;
-    
-	cudaBindTextureToArray(tex, d_data);
-
-	compressDXT1_Tex<<<blockNum, NUM_THREADS>>>(bn, w, d_bitmaps, (uint2 *)d_result);
-}
-
-
 extern "C" void compressKernelDXT1_Level4(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
 {
 	compressLevel4DXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
@ -1708,26 +1120,3 @@ extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint *
 {
 	compressWeightedDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
 }
-
-extern "C" void compressNormalKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
-{
-	compressNormalDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
-}
-
-extern "C" void compressKernelCTX1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
-{
-	compressCTX1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
-}
-
-extern "C" void compressKernelDXT5n(uint blockNum, cudaArray * d_data, uint * d_result)
-{
-	// Setup texture
-	tex.normalized = false;
-	tex.filterMode = cudaFilterModePoint;
-	tex.addressMode[0] = cudaAddressModeClamp;
-	tex.addressMode[1] = cudaAddressModeClamp;
-    
-	cudaBindTextureToArray(tex, d_data);
-
-//	compressDXT5n<<<blockNum/128, 128>>>(blockNum, (uint2 *)d_result);
-}
--- a/src/nvtt/cuda/CudaCompressDXT.cpp
+++ b/src/nvtt/cuda/CudaCompressDXT.cpp
--- a/src/nvtt/cuda/CudaCompressDXT.h
+++ b/src/nvtt/cuda/CudaCompressDXT.h
@ -42,17 +42,12 @@ namespace nv
 		void setImage(const Image * image, nvtt::AlphaMode alphaMode);

 		void compressDXT1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
-		void compressDXT1_Tex(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
 		void compressDXT3(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
 		void compressDXT5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
-		void compressDXT1n(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
-		void compressCTX1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
-		void compressDXT5n(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);

 	private:

 		uint * m_bitmapTable;
-		uint * m_bitmapTableCTX;
 		uint * m_data;
 		uint * m_result;
 		
--- a/src/nvtt/cuda/CudaMath.h
+++ b/src/nvtt/cuda/CudaMath.h
@ -26,6 +26,7 @@
 #ifndef CUDAMATH_H
 #define CUDAMATH_H

+#include <float.h>


 inline __device__ __host__ float3 operator *(float3 a, float3 b)
@ -86,69 +87,6 @@ inline __device__ __host__ bool operator ==(float3 a, float3 b)
 	return a.x == b.x && a.y == b.y && a.z == b.z;
 }

-
-// float2 operators
-inline __device__ __host__ float2 operator *(float2 a, float2 b)
-{
-    return make_float2(a.x*b.x, a.y*b.y);
-}
-
-inline __device__ __host__ float2 operator *(float f, float2 v)
-{
-    return make_float2(v.x*f, v.y*f);
-}
-
-inline __device__ __host__ float2 operator *(float2 v, float f)
-{
-    return make_float2(v.x*f, v.y*f);
-}
-
-inline __device__ __host__ float2 operator +(float2 a, float2 b)
-{
-    return make_float2(a.x+b.x, a.y+b.y);
-}
-
-inline __device__ __host__ void operator +=(float2 & b, float2 a)
-{
-    b.x += a.x;
-    b.y += a.y;
-}
-
-inline __device__ __host__ float2 operator -(float2 a, float2 b)
-{
-    return make_float2(a.x-b.x, a.y-b.y);
-}
-
-inline __device__ __host__ void operator -=(float2 & b, float2 a)
-{
-    b.x -= a.x;
-    b.y -= a.y;
-}
-
-inline __device__ __host__ float2 operator /(float2 v, float f)
-{
-    float inv = 1.0f / f;
-    return v * inv;
-}
-
-inline __device__ __host__ void operator /=(float2 & b, float f)
-{
-    float inv = 1.0f / f;
-    b.x *= inv;
-	b.y *= inv;
-}
-
-inline __device__ __host__ bool operator ==(float2 a, float2 b)
-{
-	return a.x == b.x && a.y == b.y;
-}
-
-
-inline __device__ __host__ float dot(float2 a, float2 b)
-{
-    return a.x * b.x + a.y * b.y;
-}
-
 inline __device__ __host__ float dot(float3 a, float3 b)
 {
    return a.x * b.x + a.y * b.y + a.z * b.z;
@ -305,89 +243,5 @@ inline __device__ float3 bestFitLine(const float3 * colors, float3 color_sum, fl
 	return firstEigenVector(covariance);
 }

-// @@ For 2D this may not be the most efficient method. It's a quadratic equation, right?
-inline __device__ __host__ float2 firstEigenVector2D( float matrix[3] )
-{
-	// @@ 8 iterations is probably more than enough.
-
-	float2 v = make_float2(1.0f, 1.0f);
-	for(int i = 0; i < 8; i++) {
-		float x = v.x * matrix[0] + v.y * matrix[1];
-		float y = v.x * matrix[1] + v.y * matrix[2];
-		float m = max(x, y);        
-		float iv = 1.0f / m;
-		if (m == 0.0f) iv = 0.0f;
-		v = make_float2(x*iv, y*iv);
-	}
-
-	return v;
-}
-
-inline __device__ void colorSums(const float2 * colors, float2 * sums)
-{
-#if __DEVICE_EMULATION__
-	float2 color_sum = make_float2(0.0f, 0.0f);
-	for (int i = 0; i < 16; i++)
-	{
-		color_sum += colors[i];
-	}
-
-	for (int i = 0; i < 16; i++)
-	{
-		sums[i] = color_sum;
-	}
-#else
-
-	const int idx = threadIdx.x;
-
-	sums[idx] = colors[idx];
-	sums[idx] += sums[idx^8];
-	sums[idx] += sums[idx^4];
-	sums[idx] += sums[idx^2];
-	sums[idx] += sums[idx^1];
-
-#endif
-}
-
-inline __device__ float2 bestFitLine(const float2 * colors, float2 color_sum)
-{
-	// Compute covariance matrix of the given colors.
-#if __DEVICE_EMULATION__
-	float covariance[3] = {0, 0, 0};
-	for (int i = 0; i < 16; i++)
-	{
-		float2 a = (colors[i] - color_sum * (1.0f / 16.0f));
-		covariance[0] += a.x * a.x;
-		covariance[1] += a.x * a.y;
-		covariance[2] += a.y * a.y;
-	}
-#else
-
-	const int idx = threadIdx.x;
-
-	float2 diff = (colors[idx] - color_sum * (1.0f / 16.0f));
-
-	__shared__ float covariance[16*3];
-
-	covariance[3 * idx + 0] = diff.x * diff.x;
-	covariance[3 * idx + 1] = diff.x * diff.y;
-	covariance[3 * idx + 2] = diff.y * diff.y;
-
-	for(int d = 8; d > 0; d >>= 1)
-	{
-		if (idx < d)
-		{
-			covariance[3 * idx + 0] += covariance[3 * (idx+d) + 0];
-			covariance[3 * idx + 1] += covariance[3 * (idx+d) + 1];
-			covariance[3 * idx + 2] += covariance[3 * (idx+d) + 2];
-		}
-	}
-
-#endif
-
-	// Compute first eigen vector.
-	return firstEigenVector2D(covariance);
-}
-

 #endif // CUDAMATH_H
--- a/src/nvtt/experimental/nvtt_experimental.cpp
+++ b/src/nvtt/experimental/nvtt_experimental.cpp
@ -1,60 +0,0 @@
-
-#include "nvtt_experimental.h"
-
-struct NvttTexture
-{
-	NvttTexture() :
-		m_constant(false),
-		m_image(NULL),
-		m_floatImage(NULL)
-	{
-	}
-	
-	~NvttTexture()
-	{
-		if (m_constant && m_image) m_image->unwrap();
-		delete m_image;
-		delete m_floatImage;
-	}
-	
-	bool m_constant;
-	Image * m_image;
-	FloatImage * m_floatImage;
-};
-
-NvttTexture * nvttCreateTexture() 
-{
-	return new NvttTexture();
-}
-	
-void nvttDestroyTexture(NvttTexture * tex)
-{
-	delete tex;
-}
-
-void nvttSetImageData(NvttImage * img, NvttInputFormat format, uint w, uint h, void * data)
-{
-	nvCheck(img != NULL);
-	
-	if (format == NVTT_InputFormat_BGRA_8UB)
-	{
-		img->m_constant = false;
-		img->m_image->allocate(w, h);
-		memcpy(img->m_image->pixels(), data, w * h * 4);
-	}
-	else
-	{
-		nvCheck(false);
-	}
-}
-
-void nvttCompressImage(NvttImage * img, NvttFormat format)
-{
-	nvCheck(img != NULL);
-
-	// @@ Invoke appropriate compressor.
-}
-
-
-
-#endif // NVTT_EXPERIMENTAL_H
--- a/src/nvtt/experimental/nvtt_experimental.h
+++ b/src/nvtt/experimental/nvtt_experimental.h
@ -1,103 +0,0 @@
-
-#ifndef NVTT_EXPERIMENTAL_H
-#define NVTT_EXPERIMENTAL_H
-
-#include <nvtt/nvtt.h>
-
-typedef struct NvttTexture NvttTexture;
-typedef struct NvttOutputOptions NvttOutputOptions;
-
-
-// Global functions
-void nvttInitialize(...);
-unsigned int nvttGetVersion();
-const char * nvttGetErrorString(unsigned int error);
-
-
-// Texture functions
-NvttTexture * nvttCreateTexture();
-void nvttDestroyTexture(NvttTexture * tex);
-
-void nvttSetTexture2D(NvttTexture * tex, NvttInputFormat format, uint w, uint h, uint idx, void * data);
-
-void nvttResize(NvttTexture * img, uint w, uint h);
-unsigned int nvttDownsample(NvttTexture * img);
-
-void nvttOutputCompressed(NvttTexture * img, NvttOutputFormat format);
-void nvttOutputPixelFormat(NvttTexture * img, NvttOutputFormat format);
-
-
-
-
-// How to control the compression parameters?
-
-// Using many arguments:
-// void nvttCompressImage(img, format, quality, r, g, b, a, ...);
-
-// Using existing compression option class:
-// compressionOptions = nvttCreateCompressionOptions();
-// nvttSetCompressionOptionsFormat(compressionOptions, format);
-// nvttSetCompressionOptionsQuality(compressionOptions, quality);
-// nvttSetCompressionOptionsQuality(compressionOptions, quality);
-// nvttSetCompressionOptionsColorWeights(compressionOptions, r, g, b, a);
-// ...
-// nvttCompressImage(img, compressionOptions);
-
-// Using thread local context state:
-// void nvttSetCompressionFormat(format);
-// void nvttSetCompressionQuality(quality);
-// void nvttSetCompressionColorWeights(r, g, b, a);
-// ...
-// nvttCompressImage(img);
-
-// Using thread local context state, but with GL style function arguments:
-// nvttCompressorParameteri(NVTT_FORMAT, format);
-// nvttCompressorParameteri(NVTT_QUALITY, quality);
-// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_RED, r);
-// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_GREEN, g);
-// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_BLUE, b);
-// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_ALPHA, a);
-// or nvttCompressorParameter4f(NVTT_COLOR_WEIGHTS, r, g, b, a);
-// ...
-// nvttCompressImage(img);
-
-// How do we get the compressed output?
-// - Using callbacks. (via new entrypoints, or through outputOptions)
-// - Return it explicitely from nvttCompressImage.
-// - Store it along the image, retrieve later explicitely with 'nvttGetCompressedData(img, ...)'
-
-/*
-
-// Global functions
-void nvttInitialize(...);
-unsigned int nvttGetVersion();
-const char * nvttGetErrorString(unsigned int error);
-
-// Context object
-void nvttCreateContext();
-void nvttDestroyContext();
-
-void nvttSetParameter1i(unsigned int name, int value);
-
-void nvttSetParameter1f(unsigned int name, float value);
-void nvttSetParameter2f(unsigned int name, float v0, float v1);
-void nvttSetParameter3f(unsigned int name, float v0, float v1, float v2);
-void nvttSetParameter4f(unsigned int name, float v0, float v1, float v2, float v3);
-
-// Image object
-NvttImage * nvttCreateImage();
-void nvttDestroyImage(NvttImage * img);
-
-void nvttSetImageData(NvttImage * image, NvttInputFormat format, unsigned int w, unsigned int h, void * data);
-
-void nvttSetImageParameter1i(NvttImage * image, unsigned int name, int value);
-void nvttSetImageParameter1f(NvttImage * image, unsigned int name, float value);
-
-void nvttResizeImage(NvttImage * image, unsigned int w, unsigned int h);
-void nvttQuantizeImage(NvttImage * image, bool dither, unsigned int rbits, unsigned int gbits, unsigned int bbits, unsigned int abits);
-void nvttCompressImage(NvttImage * image, void * buffer, int size);
-
-*/
-
-
-#endif // NVTT_EXPERIMENTAL_H
--- a/src/nvtt/experimental/test.cpp
+++ b/src/nvtt/experimental/test.cpp
@ -1,61 +0,0 @@
-
-#include "nvtt_experimental.h"
-
-/*
-Errors in the original API:
- Too many memory copies.
- Implementation too complicated.
- Error output should not be in output options.
- Data driven interface. Follows the dialog model. Provide all the data upfront.
-*/
-
-
-// Output texture with mipmaps
-void example0()
-{
-	CompressionOptions compressionOptions;
-	OutputOptions outputOptions;
-	
-	Texture img;
-	img.setTexture2D(format, w, h, 0, data);
-
-	Compressor context;
-	context.outputHeader(outputOptions);
-	context.outputCompressed(img, compressionOptions, outputOptions);
-
-	img.toLinear(2.2);	
-	while (img.downsample(NVTT_FILTER_BOX))
-	{
-		img.toGamma(2.2);	
-		outputCompressed(img, compressionOptions, outputOptions);		
-	}
-}
-
-
-// Output texture with colored mipmaps
-void example1()
-{
-	CompressionOptions compressionOptions;
-	OutputOptions outputOptions;
-	
-	Texture img;
-	img.setTexture2D(format, w, h, 0, data);
-
-	Compressor context;
-	context.outputHeader(outputOptions);
-	context.outputCompressed(img, compressionOptions, outputOptions);
-
-	img.toLinear(2.2);	
-	while (img.downsample(NVTT_FILTER_BOX))
-	{
-		img.toGamma(2.2);
-		
-		Texture mipmap = img;
-		mipmap.blend(color[i].r, color[i].g, color[i].b, 0.5f);
-		
-		context.outputCompressed(mipmap, compressionOptions, outputOptions);		
-	}
-}
-
-
-
--- a/src/nvtt/nvtt.cpp
+++ b/src/nvtt/nvtt.cpp
@ -42,8 +42,6 @@ const char * nvtt::errorString(Error e)
 			return "Error opening file";
 		case Error_FileWrite:
 			return "Error writing through output handler";
-        case Error_UnsupportedOutputFormat:
-            return "The container file does not support the selected output format";
 	}
 	
 	return "Invalid error";
--- a/src/nvtt/nvtt.h
+++ b/src/nvtt/nvtt.h
@ -47,15 +47,12 @@
 #	define NVTT_API
 #endif

-#define NVTT_VERSION 201
+#define NVTT_VERSION 200

-#define NVTT_FORBID_COPY(Class) \
+#define NVTT_DECLARE_PIMPL(Class) \
 	private: \
 		Class(const Class &); \
 		void operator=(const Class &); \
-	public:
-
-#define NVTT_DECLARE_PIMPL(Class) \
 	public: \
 		struct Private; \
 		Private & m
@ -64,9 +61,6 @@
 // Public interface.
 namespace nvtt
 {
-	// Forward declarations.
-	struct TexImage;
-	
 	/// Supported compression formats.
 	enum Format
 	{
@ -89,19 +83,6 @@ namespace nvtt
 		Format_BC3n = Format_DXT5n,
 		Format_BC4,     // ATI1
 		Format_BC5,     // 3DC, ATI2
-
-		Format_DXT1n,
-		Format_CTX1,
-	};
-
-	/// Pixel types.
-	enum PixelType
-	{
-		PixelType_UnsignedNorm,
-		PixelType_SignedNorm,
-		PixelType_UnsignedInt,
-		PixelType_SignedInt,
-		PixelType_Float,
 	};
 	
 	/// Quality modes.
@ -116,7 +97,6 @@ namespace nvtt
 	/// Compression options. This class describes the desired compression format and other compression settings.
 	struct CompressionOptions
 	{
-		NVTT_FORBID_COPY(CompressionOptions);
 		NVTT_DECLARE_PIMPL(CompressionOptions);

 		NVTT_API CompressionOptions();
@ -132,23 +112,10 @@ namespace nvtt

 		// Set color mask to describe the RGB/RGBA format.
 		NVTT_API void setPixelFormat(unsigned int bitcount, unsigned int rmask, unsigned int gmask, unsigned int bmask, unsigned int amask);
-		NVTT_API void setPixelFormat(unsigned char rsize, unsigned char gsize, unsigned char bsize, unsigned char asize);
-		
-		NVTT_API void setPixelType(PixelType pixelType);

 		NVTT_API void setQuantization(bool colorDithering, bool alphaDithering, bool binaryAlpha, int alphaThreshold = 127);
 	};

-	/* 
-	// DXGI_FORMAT_R16G16_FLOAT
-	compressionOptions.setPixelType(PixelType_Float);
-	compressionOptions.setPixelFormat2(16, 16, 0, 0);
-	
-	// DXGI_FORMAT_R32G32B32A32_FLOAT
-	compressionOptions.setPixelType(PixelType_Float);
-	compressionOptions.setPixelFormat2(32, 32, 32, 32);
-	*/
-	

 	/// Wrap modes.
 	enum WrapMode
@ -170,7 +137,8 @@ namespace nvtt
 	enum InputFormat
 	{
 		InputFormat_BGRA_8UB,
-		InputFormat_RGBA_32F,
+	//	InputFormat_RGBE_8UB,
+	//	InputFormat_BGRA_32F,
 	};
 	
 	/// Mipmap downsampling filters.
@ -181,23 +149,11 @@ namespace nvtt
 		MipmapFilter_Kaiser,    ///< Kaiser-windowed Sinc filter is the best downsampling filter.
 	};
 	
-	/// Texture resize filters.
-	enum ResizeFilter
-	{
-		ResizeFilter_Box,
-		ResizeFilter_Triangle,
-		ResizeFilter_Kaiser,
-		ResizeFilter_Mitchell,
-	};
-	
 	/// Color transformation.
 	enum ColorTransform
 	{
 		ColorTransform_None,
-		ColorTransform_Linear,      ///< Not implemented.
-		ColorTransform_Swizzle,     ///< Not implemented.
-		ColorTransform_YCoCg,       ///< Transform into r=Co, g=Cg, b=0, a=Y
-		ColorTransform_ScaledYCoCg, ///< Not implemented.
+		ColorTransform_Linear,
 	};
 	
 	/// Extents rounding mode.
@ -220,7 +176,6 @@ namespace nvtt
 	/// Input options. Specify format and layout of the input texture.
 	struct InputOptions
 	{
-		NVTT_FORBID_COPY(InputOptions);
 		NVTT_DECLARE_PIMPL(InputOptions);

 		NVTT_API InputOptions();
@ -235,7 +190,6 @@ namespace nvtt
 		
 		// Set mipmap data. Copies the data.
 		NVTT_API bool setMipmapData(const void * data, int w, int h, int d = 1, int face = 0, int mipmap = 0);
-		NVTT_API bool setMipmapChannelData(const void * data, int channel, int w, int h, int d = 1, int face = 0, int mipmap = 0);
 		
 		// Describe the format of the input.
 		NVTT_API void setFormat(InputFormat format);
@ -246,7 +200,7 @@ namespace nvtt
 		// Set gamma settings.
 		NVTT_API void setGamma(float inputGamma, float outputGamma);
 		
-		// Set texture wrapping mode.
+		// Set texture wrappign mode.
 		NVTT_API void setWrapMode(WrapMode mode);
 		
 		// Set mipmapping options.
@ -261,18 +215,13 @@ namespace nvtt
 		NVTT_API void setNormalFilter(float sm, float medium, float big, float large);
 		NVTT_API void setNormalizeMipmaps(bool b);
 		
-		// Set color transforms.
+		// Set color transforms. @@ Not implemented!
 		NVTT_API void setColorTransform(ColorTransform t);
 		NVTT_API void setLinearTransform(int channel, float w0, float w1, float w2, float w3);
-		NVTT_API void setLinearTransform(int channel, float w0, float w1, float w2, float w3, float offset);
-		NVTT_API void setSwizzleTransform(int x, int y, int z, int w);
 		
 		// Set resizing options.
 		NVTT_API void setMaxExtents(int d);
 		NVTT_API void setRoundMode(RoundMode mode);
-
-		// Set whether or not to premultiply color by alpha
-		NVTT_API void setPremultiplyAlpha(bool b);
 	};
 	
 	
@ -297,7 +246,6 @@ namespace nvtt
 		Error_CudaError,
  		Error_FileOpen,
  		Error_FileWrite,
-        Error_UnsupportedOutputFormat,
 	};
 	
 	/// Error handler.
@ -309,19 +257,11 @@ namespace nvtt
 		virtual void error(Error e) = 0;
 	};

-	/// Container.
-	enum Container
-	{
-		Container_DDS,
-		Container_DDS10,
-	};
-	

 	/// Output Options. This class holds pointers to the interfaces that are used to report the output of 
 	/// the compressor to the user.
 	struct OutputOptions
 	{
-		NVTT_FORBID_COPY(OutputOptions);
 		NVTT_DECLARE_PIMPL(OutputOptions);

 		NVTT_API OutputOptions();
@ -335,14 +275,12 @@ namespace nvtt
 		NVTT_API void setOutputHandler(OutputHandler * outputHandler);
 		NVTT_API void setErrorHandler(ErrorHandler * errorHandler);
 		NVTT_API void setOutputHeader(bool outputHeader);
-		NVTT_API void setContainer(Container container);
 	};


-	/// Context.
+	/// Texture compressor.
 	struct Compressor
 	{
-		NVTT_FORBID_COPY(Compressor);
 		NVTT_DECLARE_PIMPL(Compressor);

 		NVTT_API Compressor();
@ -356,90 +294,15 @@ namespace nvtt
 		
 		// Estimate the size of compressing the input with the given options.
 		NVTT_API int estimateSize(const InputOptions & inputOptions, const CompressionOptions & compressionOptions) const;
-
-		// TexImage api
-		NVTT_API TexImage createTexImage();
-		NVTT_API int estimateSize(const TexImage & tex, const CompressionOptions & compressionOptions);
-		NVTT_API void outputCompressed(const TexImage & tex, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions);
 	};
-
-	// "Compressor" is deprecated. This should have been called "Context"
-	typedef Compressor Context;
-
 	
-	/// A texture mipmap.
-	struct TexImage
-	{
-		NVTT_API TexImage();
-		NVTT_API TexImage(const TexImage & tex);
-		NVTT_API ~TexImage();
-
-		NVTT_API void operator=(const TexImage & tex);
-
-		// Texture parameters.
-		NVTT_API void setTextureType(TextureType type);
-		NVTT_API void setWrapMode(WrapMode mode);
-		NVTT_API void setAlphaMode(AlphaMode alphaMode);
-		NVTT_API void setNormalMap(bool isNormalMap);
-
-		// Accessors.
-		NVTT_API int width() const;
-		NVTT_API int height() const;
-		NVTT_API int depth() const;
-		NVTT_API int faceCount() const;
-		NVTT_API TextureType textureType() const;
-		NVTT_API WrapMode wrapMode() const;
-		NVTT_API AlphaMode alphaMode() const;
-		NVTT_API bool isNormalMap() const;
-
-		// Texture data.
-		NVTT_API bool load(const char * fileName);
-		NVTT_API bool setImage2D(InputFormat format, int w, int h, int idx, const void * data);
-		NVTT_API bool setImage2D(InputFormat format, int w, int h, int idx, const void * r, const void * g, const void * b, const void * a);
-
-		// Resizing methods.
-		NVTT_API void resize(int w, int h, ResizeFilter filter);
-		NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter);
-		NVTT_API bool buildNextMipmap(MipmapFilter filter);
-
-		// Color transforms.
-		NVTT_API void toLinear(float gamma);
-		NVTT_API void toGamma(float gamma);
-		NVTT_API void transform(const float w0[4], const float w1[4], const float w2[4], const float w3[4], const float offset[4]);
-		NVTT_API void swizzle(int r, int g, int b, int a);
-		NVTT_API void scaleBias(int channel, float scale, float bias);
-		NVTT_API void blend(float r, float g, float b, float a, float t);
-		NVTT_API void premultiplyAlpha();
-		NVTT_API void toGreyScale(float redScale, float greenScale, float blueScale, float alphaScale);
-		NVTT_API void setBorder(float r, float g, float b, float a);
-		NVTT_API void fill(float r, float g, float b, float a);
-
-		// Set normal map options.
-		NVTT_API void toNormalMap(float sm, float medium, float big, float large);
-		NVTT_API void toHeightMap();
-		NVTT_API void normalizeNormalMap();
-
-		// Compress.
-		NVTT_API void outputCompressed(const CompressionOptions & compressionOptions, const OutputOptions & outputOptions);
-
-	private:
-		void detach();
-
-		struct Private;
-		Private * m;
-	};
-
-
+	
 	// Return string for the given error code.
 	NVTT_API const char * errorString(Error e);

 	// Return NVTT version.
 	NVTT_API unsigned int version();

-	// Set callbacks.
-	//NVTT_API void setErrorCallback(ErrorCallback callback);
-	//NVTT_API void setMemoryCallbacks(...);	
-	
 } // nvtt namespace

 #endif // NV_TT_H
--- a/src/nvtt/nvtt_wrapper.h
+++ b/src/nvtt/nvtt_wrapper.h
@ -47,7 +47,7 @@
 #	define NVTT_API
 #endif

-#define NVTT_VERSION 201
+#define NVTT_VERSION 200

 #ifdef __cplusplus
 typedef struct nvtt::InputOptions NvttInputOptions;
@ -156,7 +156,6 @@ typedef enum
 	NVTT_Error_Unknown,
 	NVTT_Error_FileOpen,
 	NVTT_Error_FileWrite,
-    NVTT_Error_UnsupportedOutputFormat,
 } NvttError;

 typedef enum
--- a/src/nvtt/squish/Makefile
+++ b/src/nvtt/squish/Makefile
@ -0,0 +1,31 @@
+
+include config
+
+SRC = alpha.cpp clusterfit.cpp colourblock.cpp colourfit.cpp colourset.cpp maths.cpp rangefit.cpp singlecolourfit.cpp squish.cpp
+
+OBJ = $(SRC:%.cpp=%.o)
+
+LIB = libsquish.a
+
+all : $(LIB)
+
+install : $(LIB)
+	install squish.h $(INSTALL_DIR)/include 
+	install libsquish.a $(INSTALL_DIR)/lib
+
+uninstall:
+	$(RM) $(INSTALL_DIR)/include/squish.h
+	$(RM) $(INSTALL_DIR)/lib/libsquish.a
+
+$(LIB) : $(OBJ)
+	$(AR) cr $@ $?
+	ranlib $@
+
+%.o : %.cpp
+	$(CXX) $(CPPFLAGS) -I. $(CXXFLAGS) -o$@ -c $<
+
+clean :
+	$(RM) $(OBJ) $(LIB)
+
+
+
--- a/src/nvtt/squish/clusterfit.cpp
+++ b/src/nvtt/squish/clusterfit.cpp
@ -28,7 +28,7 @@
 #include "colourblock.h"
 #include <cfloat>

-namespace nvsquish {
+namespace squish {

 ClusterFit::ClusterFit()
 {
--- a/src/nvtt/squish/clusterfit.h
+++ b/src/nvtt/squish/clusterfit.h
@ -23,15 +23,15 @@
 	
   -------------------------------------------------------------------------- */
   
-#ifndef NV_SQUISH_CLUSTERFIT_H
-#define NV_SQUISH_CLUSTERFIT_H
+#ifndef SQUISH_CLUSTERFIT_H
+#define SQUISH_CLUSTERFIT_H

 #include "squish.h"
 #include "maths.h"
 #include "simd.h"
 #include "colourfit.h"

-namespace nvsquish {
+namespace squish {

 class ClusterFit : public ColourFit
 {
--- a/src/nvtt/squish/colourblock.cpp
+++ b/src/nvtt/squish/colourblock.cpp
@ -25,7 +25,7 @@
   
 #include "colourblock.h"

-namespace nvsquish {
+namespace squish {

 static int FloatToInt( float a, int limit )
 {
--- a/src/nvtt/squish/colourblock.h
+++ b/src/nvtt/squish/colourblock.h
@ -23,13 +23,13 @@
 	
   -------------------------------------------------------------------------- */
   
-#ifndef NV_SQUISH_COLOURBLOCK_H
-#define NV_SQUISH_COLOURBLOCK_H
+#ifndef SQUISH_COLOURBLOCK_H
+#define SQUISH_COLOURBLOCK_H

 #include "squish.h"
 #include "maths.h"

-namespace nvsquish {
+namespace squish {

 void WriteColourBlock3( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block );
 void WriteColourBlock4( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block );
--- a/src/nvtt/squish/colourfit.cpp
+++ b/src/nvtt/squish/colourfit.cpp
@ -26,7 +26,7 @@
 #include "colourfit.h"
 #include "colourset.h"

-namespace nvsquish {
+namespace squish {

 ColourFit::ColourFit()
 {
--- a/src/nvtt/squish/colourfit.h
+++ b/src/nvtt/squish/colourfit.h
@ -23,13 +23,13 @@
 	
   -------------------------------------------------------------------------- */
   
-#ifndef NV_SQUISH_COLOURFIT_H
-#define NV_SQUISH_COLOURFIT_H
+#ifndef SQUISH_COLOURFIT_H
+#define SQUISH_COLOURFIT_H

 #include "squish.h"
 #include "maths.h"

-namespace nvsquish {
+namespace squish {

 class ColourSet;

--- a/src/nvtt/squish/colourset.cpp
+++ b/src/nvtt/squish/colourset.cpp
@ -25,7 +25,7 @@
   
 #include "colourset.h"

-namespace nvsquish {
+namespace squish {

 // @@ Add flags:
 // - MatchTransparent
--- a/src/nvtt/squish/colourset.h
+++ b/src/nvtt/squish/colourset.h
@ -23,14 +23,14 @@
 	
   -------------------------------------------------------------------------- */
   
-#ifndef NV_SQUISH_COLOURSET_H
-#define NV_SQUISH_COLOURSET_H
+#ifndef SQUISH_COLOURSET_H
+#define SQUISH_COLOURSET_H

 #include "squish.h"
 #include "maths.h"
 #include "simd.h"

-namespace nvsquish {
+namespace squish {

 /*! @brief Represents a set of block colours
 */
--- a/src/nvtt/squish/extra/squishgen2.cpp
+++ b/src/nvtt/squish/extra/squishgen2.cpp
@ -1,113 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
-	Copyright (c) 2008 Ignacio Castano                      castano@gmail.com
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-
-#include <stdio.h>
-#include <float.h>
-#include <math.h>
-
-struct Precomp {
-	float alpha2_sum;
-	float beta2_sum;
-	float alphabeta_sum;
-	float factor;
-};
-
-
-int main()
-{
-	int i = 0;
-	
-	printf("struct Precomp {\n");
-	printf("\tfloat alpha2_sum;\n");
-	printf("\tfloat beta2_sum;\n");
-	printf("\tfloat alphabeta_sum;\n");
-	printf("\tfloat factor;\n");
-	printf("};\n\n");
-
-	printf("static const SQUISH_ALIGN_16 Precomp s_threeElement[153] = {\n");
-	
-	// Three element clusters:
-	for( int c0 = 0; c0 <= 16; c0++)	// At least two clusters.
-	{
-		for( int c1 = 0; c1 <=  16-c0; c1++)
-		{
-			int c2 = 16 - c0 - c1;
-
-			Precomp p;
-			p.alpha2_sum = c0 + c1 * 0.25f;
-			p.beta2_sum = c2 + c1 * 0.25f;
-			p.alphabeta_sum = c1 * 0.25f;
-			p.factor = 1.0f / (p.alpha2_sum * p.beta2_sum - p.alphabeta_sum * p.alphabeta_sum);
-
-			if (isfinite(p.factor))
-			{
-				printf("\t{ %ff, %ff, %ff, %ff }, // %d (%d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, p.factor, i, c0, c1, c2);
-			}
-			else
-			{
-				printf("\t{ %ff, %ff, %ff, FLT_MAX }, // %d (%d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, i, c0, c1, c2);
-			}
-			
-			i++;
-		}
-	}
-	printf("}; // %d three cluster elements\n\n", i);
-	
-	printf("static const SQUISH_ALIGN_16 Precomp s_fourElement[969] = {\n");
-
-	// Four element clusters:
-	i = 0;
-	for( int c0 = 0; c0 <= 16; c0++)
-	{
-		for( int c1 = 0; c1 <=  16-c0; c1++)
-		{
-			for( int c2 = 0; c2 <=  16-c0-c1; c2++)
-			{
-				int c3 = 16 - c0 - c1 - c2;
-
-				Precomp p;			
-				p.alpha2_sum = c0 + c1 * (4.0f/9.0f) + c2 * (1.0f/9.0f);
-				p.beta2_sum = c3 + c2 * (4.0f/9.0f) + c1 * (1.0f/9.0f);
-				p.alphabeta_sum = (c1 + c2) * (2.0f/9.0f);
-				p.factor = 1.0f / (p.alpha2_sum * p.beta2_sum - p.alphabeta_sum * p.alphabeta_sum);
-
-				if (isfinite(p.factor))
-				{
-					printf("\t{ %ff, %ff, %ff, %ff }, // %d (%d %d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, p.factor, i, c0, c1, c2, c3);
-				}
-				else
-				{
-					printf("\t{ %ff, %ff, %ff, FLT_MAX }, // %d (%d %d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, i, c0, c1, c2, c3);
-				}
-
-				i++;
-			}
-		}
-	}
-	printf("}; // %d four cluster elements\n\n", i);
-
-	return 0;
-}
--- a/src/nvtt/squish/fastclusterfit.cpp
+++ b/src/nvtt/squish/fastclusterfit.cpp
@ -31,7 +31,7 @@

 #include "fastclusterlookup.inl"

-namespace nvsquish {
+namespace squish {

 FastClusterFit::FastClusterFit()
 {
@ -129,8 +129,6 @@ void FastClusterFit::Compress3( void* block )
 	Vec4 const zero = VEC4_CONST(0.0f);
 	Vec4 const half = VEC4_CONST(0.5f);
 	Vec4 const two = VEC4_CONST(2.0);
-	Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
-	Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
 	 
 	// declare variables
 	Vec4 beststart = VEC4_CONST( 0.0f );
@ -162,22 +160,25 @@ void FastClusterFit::Compress3( void* block )
 			Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
 			Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
 			
-			// clamp to the grid
+			// clamp the output to [0, 1]
 			a = Min( one, Max( zero, a ) );
 			b = Min( one, Max( zero, b ) );
+			
+			// clamp to the grid
+			Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
+			Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f );
 			a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp;
 			b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp;
 			
-			// compute the error (we skip the constant xxsum)
-			Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
-			Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
-			Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
-			Vec4 e4 = MultiplyAdd( two, e3, e1 );
-
+			// compute the error
+			Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum );
+			Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
+			Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 );
+			
 			// apply the metric to the error term
-			Vec4 e5 = e4 * m_metricSqr;
-			Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
-
+			Vec4 e4 = e3 * m_metricSqr;
+			Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ();
+			
 			// keep the solution if it wins
 			if( CompareAnyLessThan( error, besterror ) )
 			{
@ -273,7 +274,7 @@ void FastClusterFit::Compress4( void* block )
 				Vec4 const factor = constants.SplatW();
 				i++;
 				
-				Vec4 const alphax_sum = MultiplyAdd(x2, onethird, MultiplyAdd(x1, twothirds, x0));
+				Vec4 const alphax_sum = x0 + MultiplyAdd(x1, twothirds, x2 * onethird);
 				Vec4 const betax_sum = m_xsum - alphax_sum;
 				
 				Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
@ -285,19 +286,18 @@ void FastClusterFit::Compress4( void* block )
 				
 				// clamp to the grid
 				Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
-				Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
+				Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f );
 				a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp;
 				b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp;
 				
-				// compute the error (we skip the constant xxsum)
-				Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
-				Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
-				Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
-				Vec4 e4 = MultiplyAdd( two, e3, e1 );
-
+				// compute the error
+				Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum );
+				Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
+				Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 );
+				
 				// apply the metric to the error term
-				Vec4 e5 = e4 * m_metricSqr;
-				Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
+				Vec4 e4 = e3 * m_metricSqr;
+				Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ();
 				
 				// keep the solution if it wins
 				if( CompareAnyLessThan( error, besterror ) )
@ -370,12 +370,6 @@ void FastClusterFit::Compress4( void* block )

 void FastClusterFit::Compress3( void* block )
 {
-	Vec3 const one( 1.0f );
-	Vec3 const zero( 0.0f );
-	Vec3 const half( 0.5f );
-	Vec3 const grid( 31.0f, 63.0f, 31.0f );
-	Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
-
 	// declare variables
 	Vec3 beststart( 0.0f );
 	Vec3 bestend( 0.0f );
@ -405,9 +399,16 @@ void FastClusterFit::Compress3( void* block )
 			Vec3 a = (alphax_sum*beta2_sum - betax_sum*alphabeta_sum) * factor;
 			Vec3 b = (betax_sum*alpha2_sum - alphax_sum*alphabeta_sum) * factor;
 			
-			// clamp to the grid
+			// clamp the output to [0, 1]
+			Vec3 const one( 1.0f );
+			Vec3 const zero( 0.0f );
 			a = Min( one, Max( zero, a ) );
 			b = Min( one, Max( zero, b ) );
+			
+			// clamp to the grid
+			Vec3 const grid( 31.0f, 63.0f, 31.0f );
+			Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f );
+			Vec3 const half( 0.5f );
 			a = Floor( grid*a + half )*gridrcp;
 			b = Floor( grid*b + half )*gridrcp;
 			
@ -476,12 +477,6 @@ void FastClusterFit::Compress3( void* block )

 void FastClusterFit::Compress4( void* block )
 {
-	Vec3 const one( 1.0f );
-	Vec3 const zero( 0.0f );
-	Vec3 const half( 0.5f );
-	Vec3 const grid( 31.0f, 63.0f, 31.0f );
-	Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
-
 	// declare variables
 	Vec3 beststart( 0.0f );
 	Vec3 bestend( 0.0f );
@ -516,9 +511,16 @@ void FastClusterFit::Compress4( void* block )
 				Vec3 a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor;
 				Vec3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor;
 				
-				// clamp to the grid
+				// clamp the output to [0, 1]
+				Vec3 const one( 1.0f );
+				Vec3 const zero( 0.0f );
 				a = Min( one, Max( zero, a ) );
 				b = Min( one, Max( zero, b ) );
+				
+				// clamp to the grid
+				Vec3 const grid( 31.0f, 63.0f, 31.0f );
+				Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f );
+				Vec3 const half( 0.5f );
 				a = Floor( grid*a + half )*gridrcp;
 				b = Floor( grid*b + half )*gridrcp;
 				
--- a/src/nvtt/squish/fastclusterfit.h
+++ b/src/nvtt/squish/fastclusterfit.h
@ -24,15 +24,15 @@
 	
   -------------------------------------------------------------------------- */
   
-#ifndef NV_SQUISH_FASTCLUSTERFIT_H
-#define NV_SQUISH_FASTCLUSTERFIT_H
+#ifndef SQUISH_FASTCLUSTERFIT_H
+#define SQUISH_FASTCLUSTERFIT_H

 #include "squish.h"
 #include "maths.h"
 #include "simd.h"
 #include "colourfit.h"

-namespace nvsquish {
+namespace squish {

 class FastClusterFit : public ColourFit
 {
--- a/src/nvtt/squish/maths.cpp
+++ b/src/nvtt/squish/maths.cpp
@ -26,7 +26,7 @@
 #include "maths.h"
 #include <cfloat>

-namespace nvsquish {
+namespace squish {

 Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weights, Vec3::Arg metric )
 {
@ -59,189 +59,28 @@ Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weight
 	return covariance;
 }

-#if 1

 Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
 {
 	const int NUM = 8;

 	Vec3 v(1, 1, 1);
-	for (int i = 0; i < NUM; i++)
-    {
+	for(int i = 0; i < NUM; i++) {
 		float x = v.X() * matrix[0] + v.Y() * matrix[1] + v.Z() * matrix[2];
 		float y = v.X() * matrix[1] + v.Y() * matrix[3] + v.Z() * matrix[4];
 		float z = v.X() * matrix[2] + v.Y() * matrix[4] + v.Z() * matrix[5];
 		
 		float norm = std::max(std::max(x, y), z);
-
 		float iv = 1.0f / norm;
+		if (norm == 0.0f) {		// @@ I think this is not necessary in this case!!
+			return Vec3(0.0f);
+		}
+		
 		v = Vec3(x*iv, y*iv, z*iv);
 	}

 	return v;
 }

-#else
-
-static Vec3 GetMultiplicity1Evector( Sym3x3 const& matrix, float evalue )
-{
-        // compute M
-        Sym3x3 m;
-        m[0] = matrix[0] - evalue;
-        m[1] = matrix[1];
-        m[2] = matrix[2];
-        m[3] = matrix[3] - evalue;
-        m[4] = matrix[4];
-        m[5] = matrix[5] - evalue;
-
-        // compute U
-        Sym3x3 u;
-        u[0] = m[3]*m[5] - m[4]*m[4];
-        u[1] = m[2]*m[4] - m[1]*m[5];
-        u[2] = m[1]*m[4] - m[2]*m[3];
-        u[3] = m[0]*m[5] - m[2]*m[2];
-        u[4] = m[1]*m[2] - m[4]*m[0];
-        u[5] = m[0]*m[3] - m[1]*m[1];
-
-        // find the largest component
-        float mc = std::fabs( u[0] );
-        int mi = 0;
-        for( int i = 1; i < 6; ++i )
-        {
-                float c = std::fabs( u[i] );
-                if( c > mc )
-                {
-                        mc = c;
-                        mi = i;
-                }
-        }
-
-        // pick the column with this component
-        switch( mi )
-        {
-        case 0:
-                return Vec3( u[0], u[1], u[2] );
-
-        case 1:
-        case 3:
-                return Vec3( u[1], u[3], u[4] );
-
-        default:
-                return Vec3( u[2], u[4], u[5] );
-        }
-}
-
-static Vec3 GetMultiplicity2Evector( Sym3x3 const& matrix, float evalue )
-{
-        // compute M
-        Sym3x3 m;
-        m[0] = matrix[0] - evalue;
-        m[1] = matrix[1];
-        m[2] = matrix[2];
-        m[3] = matrix[3] - evalue;
-        m[4] = matrix[4];
-        m[5] = matrix[5] - evalue;
-
-        // find the largest component
-        float mc = std::fabs( m[0] );
-        int mi = 0;
-        for( int i = 1; i < 6; ++i )
-        {
-                float c = std::fabs( m[i] );
-                if( c > mc )
-                {
-                        mc = c;
-                        mi = i;
-                }
-        }
-
-        // pick the first eigenvector based on this index
-        switch( mi )
-        {
-        case 0:
-        case 1:
-                return Vec3( -m[1], m[0], 0.0f );
-
-        case 2:
-                return Vec3( m[2], 0.0f, -m[0] );
-
-        case 3:
-        case 4:
-                return Vec3( 0.0f, -m[4], m[3] );
-
-        default:
-                return Vec3( 0.0f, -m[5], m[4] );
-        }
-}
-
-Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
-{
-        // compute the cubic coefficients
-        float c0 = matrix[0]*matrix[3]*matrix[5] 
-                + 2.0f*matrix[1]*matrix[2]*matrix[4] 
-                - matrix[0]*matrix[4]*matrix[4] 
-                - matrix[3]*matrix[2]*matrix[2] 
-                - matrix[5]*matrix[1]*matrix[1];
-        float c1 = matrix[0]*matrix[3] + matrix[0]*matrix[5] + matrix[3]*matrix[5]
-                - matrix[1]*matrix[1] - matrix[2]*matrix[2] - matrix[4]*matrix[4];
-        float c2 = matrix[0] + matrix[3] + matrix[5];
-
-        // compute the quadratic coefficients
-        float a = c1 - ( 1.0f/3.0f )*c2*c2;
-        float b = ( -2.0f/27.0f )*c2*c2*c2 + ( 1.0f/3.0f )*c1*c2 - c0;
-
-        // compute the root count check
-        float Q = 0.25f*b*b + ( 1.0f/27.0f )*a*a*a;
-
-        // test the multiplicity
-        if( FLT_EPSILON < Q )
-        {
-                // only one root, which implies we have a multiple of the identity
-        return Vec3( 1.0f );
-        }
-        else if( Q < -FLT_EPSILON )
-        {
-                // three distinct roots
-                float theta = std::atan2( std::sqrt( -Q ), -0.5f*b );
-                float rho = std::sqrt( 0.25f*b*b - Q );
-
-                float rt = std::pow( rho, 1.0f/3.0f );
-                float ct = std::cos( theta/3.0f );
-                float st = std::sin( theta/3.0f );
-
-                float l1 = ( 1.0f/3.0f )*c2 + 2.0f*rt*ct;
-                float l2 = ( 1.0f/3.0f )*c2 - rt*( ct + ( float )sqrt( 3.0f )*st );
-                float l3 = ( 1.0f/3.0f )*c2 - rt*( ct - ( float )sqrt( 3.0f )*st );
-
-                // pick the larger
-                if( std::fabs( l2 ) > std::fabs( l1 ) )
-                        l1 = l2;
-                if( std::fabs( l3 ) > std::fabs( l1 ) )
-                        l1 = l3;
-
-                // get the eigenvector
-                return GetMultiplicity1Evector( matrix, l1 );
-        }
-        else // if( -FLT_EPSILON <= Q && Q <= FLT_EPSILON )
-        {
-                // two roots
-                float rt;
-                if( b < 0.0f )
-                        rt = -std::pow( -0.5f*b, 1.0f/3.0f );
-                else
-                        rt = std::pow( 0.5f*b, 1.0f/3.0f );
-                
-                float l1 = ( 1.0f/3.0f )*c2 + rt;               // repeated
-                float l2 = ( 1.0f/3.0f )*c2 - 2.0f*rt;
-                
-                // get the eigenvector
-                if( std::fabs( l1 ) > std::fabs( l2 ) )
-                        return GetMultiplicity2Evector( matrix, l1 );
-                else
-                        return GetMultiplicity1Evector( matrix, l2 );
-        }
-}
-#endif
-

 } // namespace squish
--- a/src/nvtt/squish/maths.h
+++ b/src/nvtt/squish/maths.h
@ -23,14 +23,14 @@
 	
   -------------------------------------------------------------------------- */
   
-#ifndef NV_SQUISH_MATHS_H
-#define NV_SQUISH_MATHS_H
+#ifndef SQUISH_MATHS_H
+#define SQUISH_MATHS_H

 #include <cmath>
 #include <algorithm>
 #include "config.h"

-namespace nvsquish {
+namespace squish {

 class Vec3
 {
--- a/src/nvtt/squish/simd.h
+++ b/src/nvtt/squish/simd.h
@ -23,8 +23,8 @@
 	
   -------------------------------------------------------------------------- */
   
-#ifndef NV_SQUISH_SIMD_H
-#define NV_SQUISH_SIMD_H
+#ifndef SQUISH_SIMD_H
+#define SQUISH_SIMD_H

 #include "maths.h"

--- a/src/nvtt/squish/simd_sse.h
+++ b/src/nvtt/squish/simd_sse.h
@ -23,8 +23,8 @@
 	
   -------------------------------------------------------------------------- */
   
-#ifndef NV_SQUISH_SIMD_SSE_H
-#define NV_SQUISH_SIMD_SSE_H
+#ifndef SQUISH_SIMD_SSE_H
+#define SQUISH_SIMD_SSE_H

 #include <xmmintrin.h>
 #if ( SQUISH_USE_SSE > 1 )
@ -35,7 +35,7 @@
 #define SQUISH_SSE_SPLAT( a )										\
 	( ( a ) | ( ( a ) << 2 ) | ( ( a ) << 4 ) | ( ( a ) << 6 ) )

-namespace nvsquish {
+namespace squish {

 #define VEC4_CONST( X ) Vec4( _mm_set1_ps( X ) )

--- a/src/nvtt/squish/simd_ve.h
+++ b/src/nvtt/squish/simd_ve.h
@ -26,12 +26,10 @@
 #ifndef SQUISH_SIMD_VE_H
 #define SQUISH_SIMD_VE_H

-#ifndef __APPLE_ALTIVEC__
 #include <altivec.h>
 #undef bool
-#endif

-namespace nvsquish {
+namespace squish {

 #define VEC4_CONST( X ) Vec4( ( vector float )( X ) )

--- a/src/nvtt/squish/squish.h
+++ b/src/nvtt/squish/squish.h
@ -23,11 +23,11 @@
 	
   -------------------------------------------------------------------------- */
   
-#ifndef NV_SQUISH_H
-#define NV_SQUISH_H
+#ifndef SQUISH_H
+#define SQUISH_H

 //! All squish API functions live in this namespace.
-namespace nvsquish {
+namespace squish {

 // -----------------------------------------------------------------------------

--- a/src/nvtt/squish/squish.xcodeproj/project.pbxproj
+++ b/src/nvtt/squish/squish.xcodeproj/project.pbxproj
@ -0,0 +1,531 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 42;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		133FA0DC096A7B8E0050752E /* alpha.h in Headers */ = {isa = PBXBuildFile; fileRef = 133FA0DA096A7B8E0050752E /* alpha.h */; };
+		133FA0DD096A7B8E0050752E /* alpha.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 133FA0DB096A7B8E0050752E /* alpha.cpp */; };
+		1342B4160999DF1900152915 /* libsquish.a in Frameworks */ = {isa = PBXBuildFile; fileRef = D2AAC046055464E500DB518D /* libsquish.a */; };
+		1342B41A0999DF7000152915 /* squishpng.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1342B4190999DF7000152915 /* squishpng.cpp */; };
+		1342B43F0999E0CC00152915 /* squishtest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1342B43E0999E0CC00152915 /* squishtest.cpp */; };
+		1342B4420999E0EC00152915 /* libsquish.a in Frameworks */ = {isa = PBXBuildFile; fileRef = D2AAC046055464E500DB518D /* libsquish.a */; };
+		1350D71A092AA858005EE038 /* clusterfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D70B092AA857005EE038 /* clusterfit.cpp */; };
+		1350D71B092AA858005EE038 /* clusterfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D70C092AA858005EE038 /* clusterfit.h */; };
+		1350D71E092AA858005EE038 /* colourblock.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D70F092AA858005EE038 /* colourblock.cpp */; };
+		1350D71F092AA858005EE038 /* colourblock.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D710092AA858005EE038 /* colourblock.h */; };
+		1350D720092AA858005EE038 /* config.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D711092AA858005EE038 /* config.h */; };
+		1350D721092AA858005EE038 /* maths.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D712092AA858005EE038 /* maths.cpp */; };
+		1350D722092AA858005EE038 /* maths.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D713092AA858005EE038 /* maths.h */; };
+		1350D725092AA858005EE038 /* rangefit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D716092AA858005EE038 /* rangefit.cpp */; };
+		1350D726092AA858005EE038 /* rangefit.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D717092AA858005EE038 /* rangefit.h */; };
+		1350D727092AA858005EE038 /* squish.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D718092AA858005EE038 /* squish.cpp */; };
+		1350D728092AA858005EE038 /* squish.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D719092AA858005EE038 /* squish.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		139C21CF09ADAB0800A2500D /* squishgen.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 139C21CE09ADAB0800A2500D /* squishgen.cpp */; };
+		139C234F09B0602700A2500D /* singlecolourfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 139C234D09B0602700A2500D /* singlecolourfit.h */; };
+		139C235009B0602700A2500D /* singlecolourfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 139C234E09B0602700A2500D /* singlecolourfit.cpp */; };
+		13A7CCA40952BE63001C963A /* colourfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 13A7CCA20952BE63001C963A /* colourfit.h */; };
+		13A7CCA50952BE63001C963A /* colourfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 13A7CCA30952BE63001C963A /* colourfit.cpp */; };
+		13C4C7AD0941C18000AC5B89 /* colourset.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 13C4C7AB0941C18000AC5B89 /* colourset.cpp */; };
+		13C4C7AE0941C18000AC5B89 /* colourset.h in Headers */ = {isa = PBXBuildFile; fileRef = 13C4C7AC0941C18000AC5B89 /* colourset.h */; };
+		13CD64C2092BCF8A00488C97 /* simd.h in Headers */ = {isa = PBXBuildFile; fileRef = 13CD64C0092BCF8A00488C97 /* simd.h */; };
+		13D0DC910931F93A00909807 /* simd_ve.h in Headers */ = {isa = PBXBuildFile; fileRef = 13D0DC900931F93A00909807 /* simd_ve.h */; };
+		13D0DC970931F9D600909807 /* simd_sse.h in Headers */ = {isa = PBXBuildFile; fileRef = 13D0DC960931F9D600909807 /* simd_sse.h */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXContainerItemProxy section */
+		1342B52B099BF72F00152915 /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = D2AAC045055464E500DB518D;
+			remoteInfo = squish;
+		};
+		1342B58E099BF93D00152915 /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = D2AAC045055464E500DB518D;
+			remoteInfo = squish;
+		};
+/* End PBXContainerItemProxy section */
+
+/* Begin PBXFileReference section */
+		133FA0DA096A7B8E0050752E /* alpha.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alpha.h; sourceTree = "<group>"; };
+		133FA0DB096A7B8E0050752E /* alpha.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alpha.cpp; sourceTree = "<group>"; };
+		1342B4110999DE7F00152915 /* squishpng */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = squishpng; sourceTree = BUILT_PRODUCTS_DIR; };
+		1342B4190999DF7000152915 /* squishpng.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = squishpng.cpp; path = extra/squishpng.cpp; sourceTree = "<group>"; };
+		1342B4370999E07C00152915 /* squishtest */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = squishtest; sourceTree = BUILT_PRODUCTS_DIR; };
+		1342B43E0999E0CC00152915 /* squishtest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = squishtest.cpp; path = extra/squishtest.cpp; sourceTree = "<group>"; };
+		1350D70B092AA857005EE038 /* clusterfit.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = clusterfit.cpp; sourceTree = "<group>"; };
+		1350D70C092AA858005EE038 /* clusterfit.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = clusterfit.h; sourceTree = "<group>"; };
+		1350D70F092AA858005EE038 /* colourblock.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = colourblock.cpp; sourceTree = "<group>"; };
+		1350D710092AA858005EE038 /* colourblock.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = colourblock.h; sourceTree = "<group>"; };
+		1350D711092AA858005EE038 /* config.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = config.h; sourceTree = "<group>"; };
+		1350D712092AA858005EE038 /* maths.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = maths.cpp; sourceTree = "<group>"; };
+		1350D713092AA858005EE038 /* maths.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = maths.h; sourceTree = "<group>"; };
+		1350D716092AA858005EE038 /* rangefit.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = rangefit.cpp; sourceTree = "<group>"; };
+		1350D717092AA858005EE038 /* rangefit.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = rangefit.h; sourceTree = "<group>"; };
+		1350D718092AA858005EE038 /* squish.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = squish.cpp; sourceTree = "<group>"; };
+		1350D719092AA858005EE038 /* squish.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = squish.h; sourceTree = "<group>"; };
+		13906CE3096938880000A6A7 /* texture_compression_s3tc.txt */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; path = texture_compression_s3tc.txt; sourceTree = "<group>"; };
+		139C21C409ADAA7000A2500D /* squishgen */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = squishgen; sourceTree = BUILT_PRODUCTS_DIR; };
+		139C21CE09ADAB0800A2500D /* squishgen.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = squishgen.cpp; path = extra/squishgen.cpp; sourceTree = "<group>"; };
+		139C234D09B0602700A2500D /* singlecolourfit.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = singlecolourfit.h; sourceTree = "<group>"; };
+		139C234E09B0602700A2500D /* singlecolourfit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = singlecolourfit.cpp; sourceTree = "<group>"; };
+		139C236D09B060A900A2500D /* singlecolourlookup.inl */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; path = singlecolourlookup.inl; sourceTree = "<group>"; };
+		13A7CCA20952BE63001C963A /* colourfit.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = colourfit.h; sourceTree = "<group>"; };
+		13A7CCA30952BE63001C963A /* colourfit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = colourfit.cpp; sourceTree = "<group>"; };
+		13C4C7AB0941C18000AC5B89 /* colourset.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = colourset.cpp; sourceTree = "<group>"; };
+		13C4C7AC0941C18000AC5B89 /* colourset.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = colourset.h; sourceTree = "<group>"; };
+		13CD64C0092BCF8A00488C97 /* simd.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simd.h; sourceTree = "<group>"; };
+		13D0DC900931F93A00909807 /* simd_ve.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simd_ve.h; sourceTree = "<group>"; };
+		13D0DC960931F9D600909807 /* simd_sse.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simd_sse.h; sourceTree = "<group>"; };
+		D2AAC046055464E500DB518D /* libsquish.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libsquish.a; sourceTree = BUILT_PRODUCTS_DIR; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		1342B40F0999DE7F00152915 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				1342B4160999DF1900152915 /* libsquish.a in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		1342B4350999E07C00152915 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				1342B4420999E0EC00152915 /* libsquish.a in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		139C21C209ADAA7000A2500D /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		D289987405E68DCB004EDB86 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		08FB7794FE84155DC02AAC07 /* squish */ = {
+			isa = PBXGroup;
+			children = (
+				08FB7795FE84155DC02AAC07 /* Source */,
+				C6A0FF2B0290797F04C91782 /* Documentation */,
+				1AB674ADFE9D54B511CA2CBB /* Products */,
+			);
+			name = squish;
+			sourceTree = "<group>";
+		};
+		08FB7795FE84155DC02AAC07 /* Source */ = {
+			isa = PBXGroup;
+			children = (
+				133FA0DB096A7B8E0050752E /* alpha.cpp */,
+				133FA0DA096A7B8E0050752E /* alpha.h */,
+				1350D70B092AA857005EE038 /* clusterfit.cpp */,
+				1350D70C092AA858005EE038 /* clusterfit.h */,
+				13A7CCA30952BE63001C963A /* colourfit.cpp */,
+				13A7CCA20952BE63001C963A /* colourfit.h */,
+				13C4C7AB0941C18000AC5B89 /* colourset.cpp */,
+				13C4C7AC0941C18000AC5B89 /* colourset.h */,
+				1350D70F092AA858005EE038 /* colourblock.cpp */,
+				1350D710092AA858005EE038 /* colourblock.h */,
+				13906CE3096938880000A6A7 /* texture_compression_s3tc.txt */,
+				1350D711092AA858005EE038 /* config.h */,
+				1350D712092AA858005EE038 /* maths.cpp */,
+				1350D713092AA858005EE038 /* maths.h */,
+				1350D716092AA858005EE038 /* rangefit.cpp */,
+				1350D717092AA858005EE038 /* rangefit.h */,
+				13CD64C0092BCF8A00488C97 /* simd.h */,
+				13D0DC960931F9D600909807 /* simd_sse.h */,
+				13D0DC900931F93A00909807 /* simd_ve.h */,
+				139C234E09B0602700A2500D /* singlecolourfit.cpp */,
+				139C234D09B0602700A2500D /* singlecolourfit.h */,
+				139C236D09B060A900A2500D /* singlecolourlookup.inl */,
+				1350D718092AA858005EE038 /* squish.cpp */,
+				1350D719092AA858005EE038 /* squish.h */,
+				139C21CE09ADAB0800A2500D /* squishgen.cpp */,
+				1342B4190999DF7000152915 /* squishpng.cpp */,
+				1342B43E0999E0CC00152915 /* squishtest.cpp */,
+			);
+			name = Source;
+			sourceTree = "<group>";
+		};
+		1AB674ADFE9D54B511CA2CBB /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				D2AAC046055464E500DB518D /* libsquish.a */,
+				1342B4110999DE7F00152915 /* squishpng */,
+				1342B4370999E07C00152915 /* squishtest */,
+				139C21C409ADAA7000A2500D /* squishgen */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		C6A0FF2B0290797F04C91782 /* Documentation */ = {
+			isa = PBXGroup;
+			children = (
+			);
+			name = Documentation;
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXHeadersBuildPhase section */
+		D2AAC043055464E500DB518D /* Headers */ = {
+			isa = PBXHeadersBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				1350D71B092AA858005EE038 /* clusterfit.h in Headers */,
+				1350D71F092AA858005EE038 /* colourblock.h in Headers */,
+				1350D720092AA858005EE038 /* config.h in Headers */,
+				1350D722092AA858005EE038 /* maths.h in Headers */,
+				1350D726092AA858005EE038 /* rangefit.h in Headers */,
+				1350D728092AA858005EE038 /* squish.h in Headers */,
+				13CD64C2092BCF8A00488C97 /* simd.h in Headers */,
+				13D0DC910931F93A00909807 /* simd_ve.h in Headers */,
+				13D0DC970931F9D600909807 /* simd_sse.h in Headers */,
+				13C4C7AE0941C18000AC5B89 /* colourset.h in Headers */,
+				13A7CCA40952BE63001C963A /* colourfit.h in Headers */,
+				133FA0DC096A7B8E0050752E /* alpha.h in Headers */,
+				139C234F09B0602700A2500D /* singlecolourfit.h in Headers */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXHeadersBuildPhase section */
+
+/* Begin PBXNativeTarget section */
+		1342B4100999DE7F00152915 /* squishpng */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 1342B4130999DE9F00152915 /* Build configuration list for PBXNativeTarget "squishpng" */;
+			buildPhases = (
+				1342B40E0999DE7F00152915 /* Sources */,
+				1342B40F0999DE7F00152915 /* Frameworks */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+				1342B58F099BF93D00152915 /* PBXTargetDependency */,
+			);
+			name = squishpng;
+			productName = squishpng;
+			productReference = 1342B4110999DE7F00152915 /* squishpng */;
+			productType = "com.apple.product-type.tool";
+		};
+		1342B4360999E07C00152915 /* squishtest */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 1342B43B0999E0C000152915 /* Build configuration list for PBXNativeTarget "squishtest" */;
+			buildPhases = (
+				1342B4340999E07C00152915 /* Sources */,
+				1342B4350999E07C00152915 /* Frameworks */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+				1342B52C099BF72F00152915 /* PBXTargetDependency */,
+			);
+			name = squishtest;
+			productName = squishtest;
+			productReference = 1342B4370999E07C00152915 /* squishtest */;
+			productType = "com.apple.product-type.tool";
+		};
+		139C21C309ADAA7000A2500D /* squishgen */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 139C21CB09ADAB0300A2500D /* Build configuration list for PBXNativeTarget "squishgen" */;
+			buildPhases = (
+				139C21C109ADAA7000A2500D /* Sources */,
+				139C21C209ADAA7000A2500D /* Frameworks */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = squishgen;
+			productName = squishgen;
+			productReference = 139C21C409ADAA7000A2500D /* squishgen */;
+			productType = "com.apple.product-type.tool";
+		};
+		D2AAC045055464E500DB518D /* squish */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "squish" */;
+			buildPhases = (
+				D2AAC043055464E500DB518D /* Headers */,
+				D2AAC044055464E500DB518D /* Sources */,
+				D289987405E68DCB004EDB86 /* Frameworks */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = squish;
+			productName = squish;
+			productReference = D2AAC046055464E500DB518D /* libsquish.a */;
+			productType = "com.apple.product-type.library.static";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		08FB7793FE84155DC02AAC07 /* Project object */ = {
+			isa = PBXProject;
+			buildConfigurationList = 1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "squish" */;
+			hasScannedForEncodings = 1;
+			mainGroup = 08FB7794FE84155DC02AAC07 /* squish */;
+			projectDirPath = "";
+			targets = (
+				D2AAC045055464E500DB518D /* squish */,
+				1342B4100999DE7F00152915 /* squishpng */,
+				1342B4360999E07C00152915 /* squishtest */,
+				139C21C309ADAA7000A2500D /* squishgen */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXSourcesBuildPhase section */
+		1342B40E0999DE7F00152915 /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				1342B41A0999DF7000152915 /* squishpng.cpp in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		1342B4340999E07C00152915 /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				1342B43F0999E0CC00152915 /* squishtest.cpp in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		139C21C109ADAA7000A2500D /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				139C21CF09ADAB0800A2500D /* squishgen.cpp in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		D2AAC044055464E500DB518D /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				1350D71A092AA858005EE038 /* clusterfit.cpp in Sources */,
+				1350D71E092AA858005EE038 /* colourblock.cpp in Sources */,
+				1350D721092AA858005EE038 /* maths.cpp in Sources */,
+				1350D725092AA858005EE038 /* rangefit.cpp in Sources */,
+				1350D727092AA858005EE038 /* squish.cpp in Sources */,
+				13C4C7AD0941C18000AC5B89 /* colourset.cpp in Sources */,
+				13A7CCA50952BE63001C963A /* colourfit.cpp in Sources */,
+				133FA0DD096A7B8E0050752E /* alpha.cpp in Sources */,
+				139C235009B0602700A2500D /* singlecolourfit.cpp in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin PBXTargetDependency section */
+		1342B52C099BF72F00152915 /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = D2AAC045055464E500DB518D /* squish */;
+			targetProxy = 1342B52B099BF72F00152915 /* PBXContainerItemProxy */;
+		};
+		1342B58F099BF93D00152915 /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = D2AAC045055464E500DB518D /* squish */;
+			targetProxy = 1342B58E099BF93D00152915 /* PBXContainerItemProxy */;
+		};
+/* End PBXTargetDependency section */
+
+/* Begin XCBuildConfiguration section */
+		1342B4140999DE9F00152915 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				HEADER_SEARCH_PATHS = (
+					..,
+					/sw/include,
+				);
+				INSTALL_PATH = "$(HOME)/bin";
+				LIBRARY_SEARCH_PATHS = /sw/lib;
+				OTHER_LDFLAGS = "-lpng";
+				PRODUCT_NAME = squishpng;
+			};
+			name = Debug;
+		};
+		1342B4150999DE9F00152915 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				HEADER_SEARCH_PATHS = (
+					..,
+					/sw/include,
+				);
+				INSTALL_PATH = "$(HOME)/bin";
+				LIBRARY_SEARCH_PATHS = /sw/lib;
+				OTHER_LDFLAGS = "-lpng";
+				PRODUCT_NAME = squishpng;
+			};
+			name = Release;
+		};
+		1342B43C0999E0C000152915 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				HEADER_SEARCH_PATHS = ..;
+				INSTALL_PATH = "$(HOME)/bin";
+				PRODUCT_NAME = squishtest;
+			};
+			name = Debug;
+		};
+		1342B43D0999E0C000152915 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				HEADER_SEARCH_PATHS = ..;
+				INSTALL_PATH = "$(HOME)/bin";
+				PRODUCT_NAME = squishtest;
+			};
+			name = Release;
+		};
+		139C21CC09ADAB0300A2500D /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				HEADER_SEARCH_PATHS = ..;
+				INSTALL_PATH = "$(HOME)/bin";
+				PRODUCT_NAME = squishgen;
+			};
+			name = Debug;
+		};
+		139C21CD09ADAB0300A2500D /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				HEADER_SEARCH_PATHS = ..;
+				INSTALL_PATH = "$(HOME)/bin";
+				PRODUCT_NAME = squishgen;
+			};
+			name = Release;
+		};
+		1DEB91EC08733DB70010E9CD /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				COPY_PHASE_STRIP = NO;
+				GCC_PREPROCESSOR_DEFINITIONS = "SQUISH_USE_ALTIVEC=1";
+				INSTALL_PATH = /usr/local/lib;
+				OTHER_CFLAGS = "-maltivec";
+				PRODUCT_NAME = squish;
+				STRIP_INSTALLED_PRODUCT = NO;
+			};
+			name = Debug;
+		};
+		1DEB91ED08733DB70010E9CD /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_PREPROCESSOR_DEFINITIONS = "SQUISH_USE_ALTIVEC=1";
+				INSTALL_PATH = /usr/local/lib;
+				OTHER_CFLAGS = "-maltivec";
+				PRODUCT_NAME = squish;
+				STRIP_INSTALLED_PRODUCT = YES;
+			};
+			name = Release;
+		};
+		1DEB91F008733DB70010E9CD /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_DYNAMIC_NO_PIC = YES;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_TREAT_WARNINGS_AS_ERRORS = YES;
+				GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES;
+				GCC_WARN_PEDANTIC = YES;
+				GCC_WARN_SHADOW = YES;
+				GCC_WARN_SIGN_COMPARE = YES;
+				GCC_WARN_UNUSED_PARAMETER = YES;
+				GCC_WARN_UNUSED_VALUE = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				PREBINDING = NO;
+				SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
+			};
+			name = Debug;
+		};
+		1DEB91F108733DB70010E9CD /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_DYNAMIC_NO_PIC = YES;
+				GCC_OPTIMIZATION_LEVEL = 3;
+				GCC_TREAT_WARNINGS_AS_ERRORS = YES;
+				GCC_UNROLL_LOOPS = YES;
+				GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES;
+				GCC_WARN_PEDANTIC = YES;
+				GCC_WARN_SHADOW = YES;
+				GCC_WARN_SIGN_COMPARE = YES;
+				GCC_WARN_UNUSED_PARAMETER = YES;
+				GCC_WARN_UNUSED_VALUE = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				PREBINDING = NO;
+				SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		1342B4130999DE9F00152915 /* Build configuration list for PBXNativeTarget "squishpng" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				1342B4140999DE9F00152915 /* Debug */,
+				1342B4150999DE9F00152915 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		1342B43B0999E0C000152915 /* Build configuration list for PBXNativeTarget "squishtest" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				1342B43C0999E0C000152915 /* Debug */,
+				1342B43D0999E0C000152915 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		139C21CB09ADAB0300A2500D /* Build configuration list for PBXNativeTarget "squishgen" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				139C21CC09ADAB0300A2500D /* Debug */,
+				139C21CD09ADAB0300A2500D /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "squish" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				1DEB91EC08733DB70010E9CD /* Debug */,
+				1DEB91ED08733DB70010E9CD /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "squish" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				1DEB91F008733DB70010E9CD /* Debug */,
+				1DEB91F108733DB70010E9CD /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = 08FB7793FE84155DC02AAC07 /* Project object */;
+}
--- a/src/nvtt/squish/texture_compression_s3tc.txt
+++ b/src/nvtt/squish/texture_compression_s3tc.txt
@ -0,0 +1,508 @@
+Name
+
+    EXT_texture_compression_s3tc
+
+Name Strings
+
+    GL_EXT_texture_compression_s3tc
+
+Contact
+
+    Pat Brown, NVIDIA Corporation (pbrown 'at' nvidia.com)
+
+Status
+
+    FINAL
+
+Version
+
+    1.1, 16 November 2001 (containing only clarifications relative to
+                           version 1.0, dated 7 July 2000)
+
+Number
+
+    198
+
+Dependencies
+
+    OpenGL 1.1 is required.
+
+    GL_ARB_texture_compression is required.
+
+    This extension is written against the OpenGL 1.2.1 Specification.
+
+Overview
+
+    This extension provides additional texture compression functionality
+    specific to S3's S3TC format (called DXTC in Microsoft's DirectX API),
+    subject to all the requirements and limitations described by the extension
+    GL_ARB_texture_compression.
+
+    This extension supports DXT1, DXT3, and DXT5 texture compression formats.
+    For the DXT1 image format, this specification supports an RGB-only mode
+    and a special RGBA mode with single-bit "transparent" alpha.
+
+IP Status
+
+    Contact S3 Incorporated (http://www.s3.com) regarding any intellectual
+    property issues associated with implementing this extension.
+
+    WARNING:  Vendors able to support S3TC texture compression in Direct3D
+    drivers do not necessarily have the right to use the same functionality in
+    OpenGL.
+
+Issues
+
+    (1) Should DXT2 and DXT4 (premultiplied alpha) formats be supported?
+
+        RESOLVED:  No -- insufficient interest.  Supporting DXT2 and DXT4
+        would require some rework to the TexEnv definition (maybe add a new
+        base internal format RGBA_PREMULTIPLIED_ALPHA) for these formats.
+        Note that the EXT_texture_env_combine extension (which extends normal
+        TexEnv modes) can be used to support textures with premultipled alpha.
+
+    (2) Should generic "RGB_S3TC_EXT" and "RGBA_S3TC_EXT" enums be supported
+        or should we use only the DXT<n> enums?  
+
+        RESOLVED:  No.  A generic RGBA_S3TC_EXT is problematic because DXT3
+        and DXT5 are both nominally RGBA (and DXT1 with the 1-bit alpha is
+        also) yet one format must be chosen up front.
+
+    (3) Should TexSubImage support all block-aligned edits or just the minimal
+        functionality required by the ARB_texture_compression extension?
+
+        RESOLVED:  Allow all valid block-aligned edits.
+
+    (4) A pre-compressed image with a DXT1 format can be used as either an
+        RGB_S3TC_DXT1 or an RGBA_S3TC_DXT1 image.  If the image has
+        transparent texels, how are they treated in each format?
+
+        RESOLVED:  The renderer has to make sure that an RGB_S3TC_DXT1 format
+        is decoded as RGB (where alpha is effectively one for all texels),
+        while RGBA_S3TC_DXT1 is decoded as RGBA (where alpha is zero for all
+        texels with "transparent" encodings).  Otherwise, the formats are
+        identical.
+
+    (5) Is the encoding of the RGB components for DXT1 formats correct in this
+        spec?  MSDN documentation does not specify an RGB color for the
+        "transparent" encoding.  Is it really black?
+
+        RESOLVED:  Yes.  The specification for the DXT1 format initially
+        required black, but later changed that requirement to a
+        recommendation.  All vendors involved in the definition of this
+        specification support black.  In addition, specifying black has a
+        useful behavior.
+
+        When blending multiple texels (GL_LINEAR filtering), mixing opaque and
+        transparent samples is problematic.  Defining a black color on
+        transparent texels achieves a sensible result that works like a
+        texture with premultiplied alpha.  For example, if three opaque white
+        and one transparent sample is being averaged, the result would be a
+        75% intensity gray (with an alpha of 75%).  This is the same result on
+        the color channels as would be obtained using a white color, 75%
+        alpha, and a SRC_ALPHA blend factor.
+
+    (6) Is the encoding of the RGB components for DXT3 and DXT5 formats
+        correct in this spec?  MSDN documentation suggests that the RGB blocks
+        for DXT3 and DXT5 are decoded as described by the DXT1 format.
+
+        RESOLVED:  Yes -- this appears to be a bug in the MSDN documentation.
+        The specification for the DXT2-DXT5 formats require decoding using the
+        opaque block encoding, regardless of the relative values of "color0"
+        and "color1".
+
+New Procedures and Functions
+
+    None.
+
+New Tokens
+
+    Accepted by the <internalformat> parameter of TexImage2D, CopyTexImage2D,
+    and CompressedTexImage2DARB and the <format> parameter of
+    CompressedTexSubImage2DARB:
+
+        COMPRESSED_RGB_S3TC_DXT1_EXT                   0x83F0
+        COMPRESSED_RGBA_S3TC_DXT1_EXT                  0x83F1
+        COMPRESSED_RGBA_S3TC_DXT3_EXT                  0x83F2
+        COMPRESSED_RGBA_S3TC_DXT5_EXT                  0x83F3
+
+Additions to Chapter 2 of the OpenGL 1.2.1 Specification (OpenGL Operation)
+
+    None.
+
+Additions to Chapter 3 of the OpenGL 1.2.1 Specification (Rasterization)
+
+    Add to Table 3.16.1:  Specific Compressed Internal Formats
+
+        Compressed Internal Format         Base Internal Format
+        ==========================         ====================
+        COMPRESSED_RGB_S3TC_DXT1_EXT       RGB
+        COMPRESSED_RGBA_S3TC_DXT1_EXT      RGBA
+        COMPRESSED_RGBA_S3TC_DXT3_EXT      RGBA
+        COMPRESSED_RGBA_S3TC_DXT5_EXT      RGBA
+
+    
+    Modify Section 3.8.2, Alternate Image Specification
+
+    (add to end of TexSubImage discussion, p.123 -- after edit from the
+    ARB_texture_compression spec)
+
+    If the internal format of the texture image being modified is
+    COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
+    COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT, the
+    texture is stored using one of the several S3TC compressed texture image
+    formats.  Such images are easily edited along 4x4 texel boundaries, so the
+    limitations on TexSubImage2D or CopyTexSubImage2D parameters are relaxed.
+    TexSubImage2D and CopyTexSubImage2D will result in an INVALID_OPERATION
+    error only if one of the following conditions occurs:
+
+        * <width> is not a multiple of four or equal to TEXTURE_WIDTH, 
+          unless <xoffset> and <yoffset> are both zero.
+        * <height> is not a multiple of four or equal to TEXTURE_HEIGHT,
+          unless <xoffset> and <yoffset> are both zero.
+        * <xoffset> or <yoffset> is not a multiple of four.
+
+    The contents of any 4x4 block of texels of an S3TC compressed texture
+    image that does not intersect the area being modified are preserved during
+    valid TexSubImage2D and CopyTexSubImage2D calls.
+
+
+    Add to Section 3.8.2, Alternate Image Specification (adding to the end of
+    the CompressedTexImage section introduced by the ARB_texture_compression
+    spec)
+
+    If <internalformat> is COMPRESSED_RGB_S3TC_DXT1_EXT,
+    COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or
+    COMPRESSED_RGBA_S3TC_DXT5_EXT, the compressed texture is stored using one
+    of several S3TC compressed texture image formats.  The S3TC texture
+    compression algorithm supports only 2D images without borders.
+    CompressedTexImage1DARB and CompressedTexImage3DARB produce an
+    INVALID_ENUM error if <internalformat> is an S3TC format.
+    CompressedTexImage2DARB will produce an INVALID_OPERATION error if
+    <border> is non-zero.
+
+
+    Add to Section 3.8.2, Alternate Image Specification (adding to the end of
+    the CompressedTexSubImage section introduced by the
+    ARB_texture_compression spec)
+
+    If the internal format of the texture image being modified is
+    COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
+    COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT, the
+    texture is stored using one of the several S3TC compressed texture image
+    formats.  Since the S3TC texture compression algorithm supports only 2D
+    images, CompressedTexSubImage1DARB and CompressedTexSubImage3DARB produce
+    an INVALID_ENUM error if <format> is an S3TC format.  Since S3TC images
+    are easily edited along 4x4 texel boundaries, the limitations on
+    CompressedTexSubImage2D are relaxed.  CompressedTexSubImage2D will result
+    in an INVALID_OPERATION error only if one of the following conditions
+    occurs:
+
+        * <width> is not a multiple of four or equal to TEXTURE_WIDTH.
+        * <height> is not a multiple of four or equal to TEXTURE_HEIGHT.
+        * <xoffset> or <yoffset> is not a multiple of four.
+
+    The contents of any 4x4 block of texels of an S3TC compressed texture
+    image that does not intersect the area being modified are preserved during
+    valid TexSubImage2D and CopyTexSubImage2D calls.
+
+Additions to Chapter 4 of the OpenGL 1.2.1 Specification (Per-Fragment
+Operations and the Frame Buffer)
+
+    None.
+
+Additions to Chapter 5 of the OpenGL 1.2.1 Specification (Special Functions)
+
+    None.
+
+Additions to Chapter 6 of the OpenGL 1.2.1 Specification (State and
+State Requests)
+
+    None.
+
+Additions to Appendix A of the OpenGL 1.2.1 Specification (Invariance)
+
+    None.
+
+Additions to the AGL/GLX/WGL Specifications
+
+    None.
+
+GLX Protocol
+
+    None.
+
+Errors
+
+    INVALID_ENUM is generated by CompressedTexImage1DARB or
+    CompressedTexImage3DARB if <internalformat> is
+    COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
+    COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT.
+
+    INVALID_OPERATION is generated by CompressedTexImage2DARB if
+    <internalformat> is COMPRESSED_RGB_S3TC_DXT1_EXT,
+    COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or
+    COMPRESSED_RGBA_S3TC_DXT5_EXT and <border> is not equal to zero.
+
+    INVALID_ENUM is generated by CompressedTexSubImage1DARB or
+    CompressedTexSubImage3DARB if <format> is COMPRESSED_RGB_S3TC_DXT1_EXT,
+    COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or
+    COMPRESSED_RGBA_S3TC_DXT5_EXT.
+
+    INVALID_OPERATION is generated by TexSubImage2D CopyTexSubImage2D, or
+    CompressedTexSubImage2D if TEXTURE_INTERNAL_FORMAT is
+    COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
+    COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT and any of
+    the following apply: <width> is not a multiple of four or equal to
+    TEXTURE_WIDTH; <height> is not a multiple of four or equal to
+    TEXTURE_HEIGHT; <xoffset> or <yoffset> is not a multiple of four.
+
+
+    The following restrictions from the ARB_texture_compression specification
+    do not apply to S3TC texture formats, since subimage modification is
+    straightforward as long as the subimage is properly aligned.
+
+    DELETE: INVALID_OPERATION is generated by TexSubImage1D, TexSubImage2D,
+    DELETE: TexSubImage3D, CopyTexSubImage1D, CopyTexSubImage2D, or
+    DELETE: CopyTexSubImage3D if the internal format of the texture image is
+    DELETE: compressed and <xoffset>, <yoffset>, or <zoffset> does not equal
+    DELETE: -b, where b is value of TEXTURE_BORDER.
+
+    DELETE: INVALID_VALUE is generated by CompressedTexSubImage1DARB,
+    DELETE: CompressedTexSubImage2DARB, or CompressedTexSubImage3DARB if the
+    DELETE: entire texture image is not being edited:  if <xoffset>,
+    DELETE: <yoffset>, or <zoffset> is greater than -b, <xoffset> + <width> is
+    DELETE: less than w+b, <yoffset> + <height> is less than h+b, or <zoffset>
+    DELETE: + <depth> is less than d+b, where b is the value of
+    DELETE: TEXTURE_BORDER, w is the value of TEXTURE_WIDTH, h is the value of
+    DELETE: TEXTURE_HEIGHT, and d is the value of TEXTURE_DEPTH.
+
+    See also errors in the GL_ARB_texture_compression specification.
+
+New State
+
+    In the "Textures" state table, increment the TEXTURE_INTERNAL_FORMAT
+    subscript for Z by 4 in the "Type" row.
+
+New Implementation Dependent State
+
+    None
+
+Appendix
+
+    S3TC Compressed Texture Image Formats
+
+    Compressed texture images stored using the S3TC compressed image formats
+    are represented as a collection of 4x4 texel blocks, where each block
+    contains 64 or 128 bits of texel data.  The image is encoded as a normal
+    2D raster image in which each 4x4 block is treated as a single pixel.  If
+    an S3TC image has a width or height less than four, the data corresponding
+    to texels outside the image are irrelevant and undefined.
+
+    When an S3TC image with a width of <w>, height of <h>, and block size of
+    <blocksize> (8 or 16 bytes) is decoded, the corresponding image size (in
+    bytes) is:
+    
+        ceil(<w>/4) * ceil(<h>/4) * blocksize.
+
+    When decoding an S3TC image, the block containing the texel at offset
+    (<x>, <y>) begins at an offset (in bytes) relative to the base of the
+    image of:
+
+        blocksize * (ceil(<w>/4) * floor(<y>/4) + floor(<x>/4)).
+
+    The data corresponding to a specific texel (<x>, <y>) are extracted from a
+    4x4 texel block using a relative (x,y) value of
+    
+        (<x> modulo 4, <y> modulo 4).
+
+    There are four distinct S3TC image formats:
+
+    COMPRESSED_RGB_S3TC_DXT1_EXT:  Each 4x4 block of texels consists of 64
+    bits of RGB image data.  
+
+    Each RGB image data block is encoded as a sequence of 8 bytes, called (in
+    order of increasing address):
+
+            c0_lo, c0_hi, c1_lo, c1_hi, bits_0, bits_1, bits_2, bits_3
+
+        The 8 bytes of the block are decoded into three quantities:
+
+            color0 = c0_lo + c0_hi * 256
+            color1 = c1_lo + c1_hi * 256
+            bits   = bits_0 + 256 * (bits_1 + 256 * (bits_2 + 256 * bits_3))
+        
+        color0 and color1 are 16-bit unsigned integers that are unpacked to
+        RGB colors RGB0 and RGB1 as though they were 16-bit packed pixels with
+        a <format> of RGB and a type of UNSIGNED_SHORT_5_6_5.
+
+        bits is a 32-bit unsigned integer, from which a two-bit control code
+        is extracted for a texel at location (x,y) in the block using:
+
+            code(x,y) = bits[2*(4*y+x)+1..2*(4*y+x)+0]
+        
+        where bit 31 is the most significant and bit 0 is the least
+        significant bit.
+
+        The RGB color for a texel at location (x,y) in the block is given by:
+
+            RGB0,              if color0 > color1 and code(x,y) == 0
+            RGB1,              if color0 > color1 and code(x,y) == 1
+            (2*RGB0+RGB1)/3,   if color0 > color1 and code(x,y) == 2
+            (RGB0+2*RGB1)/3,   if color0 > color1 and code(x,y) == 3
+
+            RGB0,              if color0 <= color1 and code(x,y) == 0
+            RGB1,              if color0 <= color1 and code(x,y) == 1
+            (RGB0+RGB1)/2,     if color0 <= color1 and code(x,y) == 2
+            BLACK,             if color0 <= color1 and code(x,y) == 3
+
+        Arithmetic operations are done per component, and BLACK refers to an
+        RGB color where red, green, and blue are all zero.
+
+    Since this image has an RGB format, there is no alpha component and the
+    image is considered fully opaque.
+
+
+    COMPRESSED_RGBA_S3TC_DXT1_EXT:  Each 4x4 block of texels consists of 64
+    bits of RGB image data and minimal alpha information.  The RGB components
+    of a texel are extracted in the same way as COMPRESSED_RGB_S3TC_DXT1_EXT.
+ 
+        The alpha component for a texel at location (x,y) in the block is
+        given by:
+
+            0.0,               if color0 <= color1 and code(x,y) == 3
+            1.0,               otherwise
+
+        IMPORTANT:  When encoding an RGBA image into a format using 1-bit
+        alpha, any texels with an alpha component less than 0.5 end up with an
+        alpha of 0.0 and any texels with an alpha component greater than or
+        equal to 0.5 end up with an alpha of 1.0.  When encoding an RGBA image
+        into the COMPRESSED_RGBA_S3TC_DXT1_EXT format, the resulting red,
+        green, and blue components of any texels with a final alpha of 0.0
+        will automatically be zero (black).  If this behavior is not desired
+        by an application, it should not use COMPRESSED_RGBA_S3TC_DXT1_EXT.
+        This format will never be used when a generic compressed internal
+        format (Table 3.16.2) is specified, although the nearly identical
+        format COMPRESSED_RGB_S3TC_DXT1_EXT (above) may be.
+
+
+    COMPRESSED_RGBA_S3TC_DXT3_EXT:  Each 4x4 block of texels consists of 64
+    bits of uncompressed alpha image data followed by 64 bits of RGB image
+    data.  
+
+    Each RGB image data block is encoded according to the
+    COMPRESSED_RGB_S3TC_DXT1_EXT format, with the exception that the two code
+    bits always use the non-transparent encodings.  In other words, they are
+    treated as though color0 > color1, regardless of the actual values of
+    color0 and color1.
+
+    Each alpha image data block is encoded as a sequence of 8 bytes, called
+    (in order of increasing address):
+
+            a0, a1, a2, a3, a4, a5, a6, a7
+
+        The 8 bytes of the block are decoded into one 64-bit integer:
+
+            alpha = a0 + 256 * (a1 + 256 * (a2 + 256 * (a3 + 256 * (a4 +
+                         256 * (a5 + 256 * (a6 + 256 * a7))))))
+
+        alpha is a 64-bit unsigned integer, from which a four-bit alpha value
+        is extracted for a texel at location (x,y) in the block using:
+
+            alpha(x,y) = bits[4*(4*y+x)+3..4*(4*y+x)+0]
+
+        where bit 63 is the most significant and bit 0 is the least
+        significant bit.
+
+        The alpha component for a texel at location (x,y) in the block is
+        given by alpha(x,y) / 15.
+
+ 
+    COMPRESSED_RGBA_S3TC_DXT5_EXT:  Each 4x4 block of texels consists of 64
+    bits of compressed alpha image data followed by 64 bits of RGB image data.
+
+    Each RGB image data block is encoded according to the
+    COMPRESSED_RGB_S3TC_DXT1_EXT format, with the exception that the two code
+    bits always use the non-transparent encodings.  In other words, they are
+    treated as though color0 > color1, regardless of the actual values of
+    color0 and color1.
+
+    Each alpha image data block is encoded as a sequence of 8 bytes, called
+    (in order of increasing address):
+
+        alpha0, alpha1, bits_0, bits_1, bits_2, bits_3, bits_4, bits_5
+
+        The alpha0 and alpha1 are 8-bit unsigned bytes converted to alpha
+        components by multiplying by 1/255.
+
+        The 6 "bits" bytes of the block are decoded into one 48-bit integer:
+
+          bits = bits_0 + 256 * (bits_1 + 256 * (bits_2 + 256 * (bits_3 + 
+                          256 * (bits_4 + 256 * bits_5))))
+
+        bits is a 48-bit unsigned integer, from which a three-bit control code
+        is extracted for a texel at location (x,y) in the block using:
+
+            code(x,y) = bits[3*(4*y+x)+1..3*(4*y+x)+0]
+
+        where bit 47 is the most significant and bit 0 is the least
+        significant bit.
+
+        The alpha component for a texel at location (x,y) in the block is
+        given by:
+
+              alpha0,                   code(x,y) == 0
+              alpha1,                   code(x,y) == 1
+
+              (6*alpha0 + 1*alpha1)/7,  alpha0 > alpha1 and code(x,y) == 2
+              (5*alpha0 + 2*alpha1)/7,  alpha0 > alpha1 and code(x,y) == 3
+              (4*alpha0 + 3*alpha1)/7,  alpha0 > alpha1 and code(x,y) == 4
+              (3*alpha0 + 4*alpha1)/7,  alpha0 > alpha1 and code(x,y) == 5
+              (2*alpha0 + 5*alpha1)/7,  alpha0 > alpha1 and code(x,y) == 6
+              (1*alpha0 + 6*alpha1)/7,  alpha0 > alpha1 and code(x,y) == 7
+
+              (4*alpha0 + 1*alpha1)/5,  alpha0 <= alpha1 and code(x,y) == 2
+              (3*alpha0 + 2*alpha1)/5,  alpha0 <= alpha1 and code(x,y) == 3
+              (2*alpha0 + 3*alpha1)/5,  alpha0 <= alpha1 and code(x,y) == 4
+              (1*alpha0 + 4*alpha1)/5,  alpha0 <= alpha1 and code(x,y) == 5
+              0.0,                      alpha0 <= alpha1 and code(x,y) == 6
+              1.0,                      alpha0 <= alpha1 and code(x,y) == 7
+
+
+Revision History
+
+    1.1,  11/16/01 pbrown:    Updated contact info, clarified where texels
+                              fall within a single block.
+
+    1.0,  07/07/00 prbrown1:  Published final version agreed to by working
+                              group members.
+
+    0.9,  06/24/00 prbrown1:  Documented that block-aligned TexSubImage calls
+                              do not modify existing texels outside the
+                              modified blocks.  Added caveat to allow for a
+                              (0,0)-anchored TexSubImage operation of
+                              arbitrary size.
+
+    0.7,  04/11/00 prbrown1:  Added issues on DXT1, DXT3, and DXT5 encodings
+                              where the MSDN documentation doesn't match what
+                              is really done.  Added enum values from the
+                              extension registry.
+
+    0.4,  03/28/00 prbrown1:  Updated to reflect final version of the
+                              ARB_texture_compression extension.  Allowed
+                              block-aligned TexSubImage calls.
+
+    0.3,  03/07/00 prbrown1:  Resolved issues pertaining to the format of RGB
+                              blocks in the DXT3 and DXT5 formats (they don't
+                              ever use the "transparent" encoding).  Fixed
+                              decoding of DXT1 blocks.  Pointed out issue of
+                              "transparent" texels in DXT1 encodings having
+                              different behaviors for RGB and RGBA internal
+                              formats.
+
+    0.2,  02/23/00 prbrown1:  Minor revisions; added several issues.
+
+    0.11, 02/17/00 prbrown1:  Slight modification to error semantics
+                              (INVALID_ENUM instead of INVALID_OPERATION).
+
+    0.1,  02/15/00 prbrown1:  Initial revision.
--- a/src/nvtt/squish/vs7/squish.sln
+++ b/src/nvtt/squish/vs7/squish.sln
@ -0,0 +1,39 @@
+Microsoft Visual Studio Solution File, Format Version 8.00
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "squish", "squish\squish.vcproj", "{6A8518C3-D81A-4428-BD7F-C37933088AC1}"
+	ProjectSection(ProjectDependencies) = postProject
+	EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "squishpng", "squishpng\squishpng.vcproj", "{3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}"
+	ProjectSection(ProjectDependencies) = postProject
+		{6A8518C3-D81A-4428-BD7F-C37933088AC1} = {6A8518C3-D81A-4428-BD7F-C37933088AC1}
+	EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "squishtest", "squishtest\squishtest.vcproj", "{77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}"
+	ProjectSection(ProjectDependencies) = postProject
+		{6A8518C3-D81A-4428-BD7F-C37933088AC1} = {6A8518C3-D81A-4428-BD7F-C37933088AC1}
+	EndProjectSection
+EndProject
+Global
+	GlobalSection(SolutionConfiguration) = preSolution
+		Debug = Debug
+		Release = Release
+	EndGlobalSection
+	GlobalSection(ProjectConfiguration) = postSolution
+		{6A8518C3-D81A-4428-BD7F-C37933088AC1}.Debug.ActiveCfg = Debug|Win32
+		{6A8518C3-D81A-4428-BD7F-C37933088AC1}.Debug.Build.0 = Debug|Win32
+		{6A8518C3-D81A-4428-BD7F-C37933088AC1}.Release.ActiveCfg = Release|Win32
+		{6A8518C3-D81A-4428-BD7F-C37933088AC1}.Release.Build.0 = Release|Win32
+		{3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}.Debug.ActiveCfg = Debug|Win32
+		{3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}.Debug.Build.0 = Debug|Win32
+		{3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}.Release.ActiveCfg = Release|Win32
+		{3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}.Release.Build.0 = Release|Win32
+		{77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}.Debug.ActiveCfg = Debug|Win32
+		{77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}.Debug.Build.0 = Debug|Win32
+		{77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}.Release.ActiveCfg = Release|Win32
+		{77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}.Release.Build.0 = Release|Win32
+	EndGlobalSection
+	GlobalSection(ExtensibilityGlobals) = postSolution
+	EndGlobalSection
+	GlobalSection(ExtensibilityAddIns) = postSolution
+	EndGlobalSection
+EndGlobal
--- a/src/nvtt/squish/vs7/squish/squish.vcproj
+++ b/src/nvtt/squish/vs7/squish/squish.vcproj
@ -0,0 +1,198 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="7.10"
+	Name="squish"
+	ProjectGUID="{6A8518C3-D81A-4428-BD7F-C37933088AC1}"
+	Keyword="Win32Proj">
+	<Platforms>
+		<Platform
+			Name="Win32"/>
+	</Platforms>
+	<Configurations>
+		<Configuration
+			Name="Debug|Win32"
+			OutputDirectory="Debug"
+			IntermediateDirectory="Debug"
+			ConfigurationType="4"
+			CharacterSet="2">
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				AdditionalIncludeDirectories="..\.."
+				PreprocessorDefinitions="WIN32;_DEBUG;_LIB;SQUISH_USE_SSE=1"
+				MinimalRebuild="TRUE"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="3"
+				EnableEnhancedInstructionSet="1"
+				ForceConformanceInForLoopScope="TRUE"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				WarnAsError="TRUE"
+				Detect64BitPortabilityProblems="TRUE"
+				DebugInformationFormat="3"/>
+			<Tool
+				Name="VCCustomBuildTool"/>
+			<Tool
+				Name="VCLibrarianTool"
+				OutputFile="$(OutDir)/squish.lib"/>
+			<Tool
+				Name="VCMIDLTool"/>
+			<Tool
+				Name="VCPostBuildEventTool"/>
+			<Tool
+				Name="VCPreBuildEventTool"/>
+			<Tool
+				Name="VCPreLinkEventTool"/>
+			<Tool
+				Name="VCResourceCompilerTool"/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"/>
+			<Tool
+				Name="VCManagedWrapperGeneratorTool"/>
+			<Tool
+				Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
+		</Configuration>
+		<Configuration
+			Name="Release|Win32"
+			OutputDirectory="Release"
+			IntermediateDirectory="Release"
+			ConfigurationType="4"
+			CharacterSet="2"
+			WholeProgramOptimization="TRUE">
+			<Tool
+				Name="VCCLCompilerTool"
+				GlobalOptimizations="TRUE"
+				InlineFunctionExpansion="2"
+				FavorSizeOrSpeed="1"
+				OmitFramePointers="TRUE"
+				AdditionalIncludeDirectories="..\.."
+				PreprocessorDefinitions="WIN32;NDEBUG;_LIB;SQUISH_USE_SSE=1"
+				RuntimeLibrary="2"
+				ForceConformanceInForLoopScope="TRUE"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				WarnAsError="TRUE"
+				Detect64BitPortabilityProblems="TRUE"
+				DebugInformationFormat="3"/>
+			<Tool
+				Name="VCCustomBuildTool"/>
+			<Tool
+				Name="VCLibrarianTool"
+				OutputFile="$(OutDir)/squish.lib"/>
+			<Tool
+				Name="VCMIDLTool"/>
+			<Tool
+				Name="VCPostBuildEventTool"/>
+			<Tool
+				Name="VCPreBuildEventTool"/>
+			<Tool
+				Name="VCPreLinkEventTool"/>
+			<Tool
+				Name="VCResourceCompilerTool"/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"/>
+			<Tool
+				Name="VCManagedWrapperGeneratorTool"/>
+			<Tool
+				Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter
+			Name="Source Files"
+			Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx"
+			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}">
+			<File
+				RelativePath="..\..\alpha.cpp">
+			</File>
+			<File
+				RelativePath="..\..\clusterfit.cpp">
+			</File>
+			<File
+				RelativePath="..\..\colourblock.cpp">
+			</File>
+			<File
+				RelativePath="..\..\colourfit.cpp">
+			</File>
+			<File
+				RelativePath="..\..\colourset.cpp">
+			</File>
+			<File
+				RelativePath="..\..\maths.cpp">
+			</File>
+			<File
+				RelativePath="..\..\rangefit.cpp">
+			</File>
+			<File
+				RelativePath="..\..\singlecolourfit.cpp">
+			</File>
+			<File
+				RelativePath="..\..\squish.cpp">
+			</File>
+		</Filter>
+		<Filter
+			Name="Header Files"
+			Filter="h;hpp;hxx;hm;inl;inc;xsd"
+			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}">
+			<File
+				RelativePath="..\..\alpha.h">
+			</File>
+			<File
+				RelativePath="..\..\clusterfit.h">
+			</File>
+			<File
+				RelativePath="..\..\colourblock.h">
+			</File>
+			<File
+				RelativePath="..\..\colourfit.h">
+			</File>
+			<File
+				RelativePath="..\..\colourset.h">
+			</File>
+			<File
+				RelativePath="..\..\config.h">
+			</File>
+			<File
+				RelativePath="..\..\maths.h">
+			</File>
+			<File
+				RelativePath="..\..\rangefit.h">
+			</File>
+			<File
+				RelativePath="..\..\simd.h">
+			</File>
+			<File
+				RelativePath="..\..\simd_sse.h">
+			</File>
+			<File
+				RelativePath="..\..\simd_ve.h">
+			</File>
+			<File
+				RelativePath="..\..\singlecolourfit.h">
+			</File>
+			<File
+				RelativePath="..\..\singlecolourlookup.inl">
+			</File>
+			<File
+				RelativePath="..\..\squish.h">
+			</File>
+		</Filter>
+		<Filter
+			Name="Resource Files"
+			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx"
+			UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}">
+			<File
+				RelativePath="..\..\texture_compression_s3tc.txt">
+			</File>
+		</Filter>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>
--- a/src/nvtt/squish/vs7/squishpng/squishpng.vcproj
+++ b/src/nvtt/squish/vs7/squishpng/squishpng.vcproj
@ -0,0 +1,140 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="7.10"
+	Name="squishpng"
+	ProjectGUID="{3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}"
+	Keyword="Win32Proj">
+	<Platforms>
+		<Platform
+			Name="Win32"/>
+	</Platforms>
+	<Configurations>
+		<Configuration
+			Name="Debug|Win32"
+			OutputDirectory="Debug"
+			IntermediateDirectory="Debug"
+			ConfigurationType="1"
+			CharacterSet="2">
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				AdditionalIncludeDirectories="..\.."
+				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
+				MinimalRebuild="TRUE"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="3"
+				ForceConformanceInForLoopScope="TRUE"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				WarnAsError="TRUE"
+				Detect64BitPortabilityProblems="TRUE"
+				DebugInformationFormat="3"/>
+			<Tool
+				Name="VCCustomBuildTool"/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalDependencies="libpng13d.lib"
+				OutputFile="$(OutDir)/squishpng.exe"
+				LinkIncremental="2"
+				GenerateDebugInformation="TRUE"
+				ProgramDatabaseFile="$(OutDir)/squishpng.pdb"
+				SubSystem="1"
+				TargetMachine="1"/>
+			<Tool
+				Name="VCMIDLTool"/>
+			<Tool
+				Name="VCPostBuildEventTool"/>
+			<Tool
+				Name="VCPreBuildEventTool"/>
+			<Tool
+				Name="VCPreLinkEventTool"/>
+			<Tool
+				Name="VCResourceCompilerTool"/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"/>
+			<Tool
+				Name="VCWebDeploymentTool"/>
+			<Tool
+				Name="VCManagedWrapperGeneratorTool"/>
+			<Tool
+				Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
+		</Configuration>
+		<Configuration
+			Name="Release|Win32"
+			OutputDirectory="Release"
+			IntermediateDirectory="Release"
+			ConfigurationType="1"
+			CharacterSet="2">
+			<Tool
+				Name="VCCLCompilerTool"
+				AdditionalIncludeDirectories="..\.."
+				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
+				RuntimeLibrary="2"
+				ForceConformanceInForLoopScope="TRUE"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				WarnAsError="TRUE"
+				Detect64BitPortabilityProblems="TRUE"
+				DebugInformationFormat="3"/>
+			<Tool
+				Name="VCCustomBuildTool"/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalDependencies="libpng13.lib"
+				OutputFile="$(OutDir)/squishpng.exe"
+				LinkIncremental="1"
+				GenerateDebugInformation="TRUE"
+				SubSystem="1"
+				OptimizeReferences="2"
+				EnableCOMDATFolding="2"
+				TargetMachine="1"/>
+			<Tool
+				Name="VCMIDLTool"/>
+			<Tool
+				Name="VCPostBuildEventTool"/>
+			<Tool
+				Name="VCPreBuildEventTool"/>
+			<Tool
+				Name="VCPreLinkEventTool"/>
+			<Tool
+				Name="VCResourceCompilerTool"/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"/>
+			<Tool
+				Name="VCWebDeploymentTool"/>
+			<Tool
+				Name="VCManagedWrapperGeneratorTool"/>
+			<Tool
+				Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter
+			Name="Source Files"
+			Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx"
+			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}">
+			<File
+				RelativePath="..\..\extra\squishpng.cpp">
+			</File>
+		</Filter>
+		<Filter
+			Name="Header Files"
+			Filter="h;hpp;hxx;hm;inl;inc;xsd"
+			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}">
+		</Filter>
+		<Filter
+			Name="Resource Files"
+			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx"
+			UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}">
+		</Filter>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>
--- a/src/nvtt/squish/vs7/squishtest/squishtest.vcproj
+++ b/src/nvtt/squish/vs7/squishtest/squishtest.vcproj
@ -0,0 +1,138 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="7.10"
+	Name="squishtest"
+	ProjectGUID="{77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}"
+	Keyword="Win32Proj">
+	<Platforms>
+		<Platform
+			Name="Win32"/>
+	</Platforms>
+	<Configurations>
+		<Configuration
+			Name="Debug|Win32"
+			OutputDirectory="Debug"
+			IntermediateDirectory="Debug"
+			ConfigurationType="1"
+			CharacterSet="2">
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				AdditionalIncludeDirectories="..\.."
+				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
+				MinimalRebuild="TRUE"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="3"
+				ForceConformanceInForLoopScope="TRUE"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				WarnAsError="TRUE"
+				Detect64BitPortabilityProblems="TRUE"
+				DebugInformationFormat="3"/>
+			<Tool
+				Name="VCCustomBuildTool"/>
+			<Tool
+				Name="VCLinkerTool"
+				OutputFile="$(OutDir)/squishtest.exe"
+				LinkIncremental="2"
+				GenerateDebugInformation="TRUE"
+				ProgramDatabaseFile="$(OutDir)/squishtest.pdb"
+				SubSystem="1"
+				TargetMachine="1"/>
+			<Tool
+				Name="VCMIDLTool"/>
+			<Tool
+				Name="VCPostBuildEventTool"/>
+			<Tool
+				Name="VCPreBuildEventTool"/>
+			<Tool
+				Name="VCPreLinkEventTool"/>
+			<Tool
+				Name="VCResourceCompilerTool"/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"/>
+			<Tool
+				Name="VCWebDeploymentTool"/>
+			<Tool
+				Name="VCManagedWrapperGeneratorTool"/>
+			<Tool
+				Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
+		</Configuration>
+		<Configuration
+			Name="Release|Win32"
+			OutputDirectory="Release"
+			IntermediateDirectory="Release"
+			ConfigurationType="1"
+			CharacterSet="2">
+			<Tool
+				Name="VCCLCompilerTool"
+				AdditionalIncludeDirectories="..\.."
+				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
+				RuntimeLibrary="2"
+				ForceConformanceInForLoopScope="TRUE"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				WarnAsError="TRUE"
+				Detect64BitPortabilityProblems="TRUE"
+				DebugInformationFormat="3"/>
+			<Tool
+				Name="VCCustomBuildTool"/>
+			<Tool
+				Name="VCLinkerTool"
+				OutputFile="$(OutDir)/squishtest.exe"
+				LinkIncremental="1"
+				GenerateDebugInformation="TRUE"
+				SubSystem="1"
+				OptimizeReferences="2"
+				EnableCOMDATFolding="2"
+				TargetMachine="1"/>
+			<Tool
+				Name="VCMIDLTool"/>
+			<Tool
+				Name="VCPostBuildEventTool"/>
+			<Tool
+				Name="VCPreBuildEventTool"/>
+			<Tool
+				Name="VCPreLinkEventTool"/>
+			<Tool
+				Name="VCResourceCompilerTool"/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"/>
+			<Tool
+				Name="VCWebDeploymentTool"/>
+			<Tool
+				Name="VCManagedWrapperGeneratorTool"/>
+			<Tool
+				Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter
+			Name="Source Files"
+			Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx"
+			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}">
+			<File
+				RelativePath="..\..\extra\squishtest.cpp">
+			</File>
+		</Filter>
+		<Filter
+			Name="Header Files"
+			Filter="h;hpp;hxx;hm;inl;inc;xsd"
+			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}">
+		</Filter>
+		<Filter
+			Name="Resource Files"
+			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx"
+			UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}">
+		</Filter>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>
--- a/src/nvtt/squish/weightedclusterfit.cpp
+++ b/src/nvtt/squish/weightedclusterfit.cpp
@ -30,7 +30,7 @@
 #include <cfloat>


-namespace nvsquish {
+namespace squish {

 WeightedClusterFit::WeightedClusterFit()
 {
@ -131,13 +131,10 @@ float WeightedClusterFit::GetBestError() const

 void WeightedClusterFit::Compress3( void* block )
 {
-    int const count = m_colours->GetCount();
 	Vec4 const one = VEC4_CONST(1.0f);
 	Vec4 const zero = VEC4_CONST(0.0f);
 	Vec4 const half(0.5f, 0.5f, 0.5f, 0.25f);
 	Vec4 const two = VEC4_CONST(2.0);
-	Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
-	Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
 	 
 	// declare variables
 	Vec4 beststart = VEC4_CONST( 0.0f );
@ -149,11 +146,11 @@ void WeightedClusterFit::Compress3( void* block )
 	int b0 = 0, b1 = 0;

 	// check all possible clusters for this total order
-	for( int c0 = 0; c0 <= count; c0++)
+	for( int c0 = 0; c0 <= 16; c0++)
 	{	
 		Vec4 x1 = zero;
 		
-		for( int c1 = 0; c1 <= count-c0; c1++)
+		for( int c1 = 0; c1 <= 16-c0; c1++)
 		{
 			Vec4 const x2 = m_xsum - x1 - x0;
 			
@ -176,21 +173,24 @@ void WeightedClusterFit::Compress3( void* block )
 			Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
 			Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
 			
-			// clamp to the grid
+			// clamp the output to [0, 1]
 			a = Min( one, Max( zero, a ) );
 			b = Min( one, Max( zero, b ) );
+			
+			// clamp to the grid
+			Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
+			Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f );
 			a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp;
 			b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp;
 			
-			// compute the error (we skip the constant xxsum)
-			Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
-			Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
-			Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
-			Vec4 e4 = MultiplyAdd( two, e3, e1 );
-
+			// compute the error
+			Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum );
+			Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
+			Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 );
+			
 			// apply the metric to the error term
-			Vec4 e5 = e4 * m_metricSqr;
-			Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
+			Vec4 e4 = e3 * m_metricSqr;
+			Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ();
 			
 			// keep the solution if it wins
 			if( CompareAnyLessThan( error, besterror ) )
@ -221,17 +221,17 @@ void WeightedClusterFit::Compress3( void* block )
 			for(; i < b0+b1; i++) {
 				bestindices[i] = 2;
 			}
-			for(; i < count; i++) {
+			for(; i < 16; i++) {
 				bestindices[i] = 1;
 			}
 		}
 		
 		// remap the indices
 		u8 ordered[16];
-		for( int i = 0; i < count; ++i )
+		for( int i = 0; i < 16; ++i )
 			ordered[m_order[i]] = bestindices[i];
 		
-		m_colours->RemapIndices( ordered, bestindices );
+		m_colours->RemapIndices( ordered, bestindices ); // Set alpha indices.


 		// save the block
@ -244,16 +244,12 @@ void WeightedClusterFit::Compress3( void* block )

 void WeightedClusterFit::Compress4( void* block )
 {
-    int const count = m_colours->GetCount();
 	Vec4 const one = VEC4_CONST(1.0f);
 	Vec4 const zero = VEC4_CONST(0.0f);
 	Vec4 const half = VEC4_CONST(0.5f);
 	Vec4 const two = VEC4_CONST(2.0);
 	Vec4 const onethird( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
 	Vec4 const twothirds( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
-    Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f );
-	Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
-	Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
 	
 	// declare variables
 	Vec4 beststart = VEC4_CONST( 0.0f );
@ -264,30 +260,30 @@ void WeightedClusterFit::Compress4( void* block )
 	int b0 = 0, b1 = 0, b2 = 0;

 	// check all possible clusters for this total order
-	for( int c0 = 0; c0 <= count; c0++)
+	for( int c0 = 0; c0 <= 16; c0++)
 	{	
 		Vec4 x1 = zero;
 		
-		for( int c1 = 0; c1 <= count-c0; c1++)
+		for( int c1 = 0; c1 <= 16-c0; c1++)
 		{	
 			Vec4 x2 = zero;
 			
-			for( int c2 = 0; c2 <= count-c0-c1; c2++)
+			for( int c2 = 0; c2 <= 16-c0-c1; c2++)
 			{
 				Vec4 const x3 = m_xsum - x2 - x1 - x0;
 				
 				//Vec3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f);
 				//float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f);
-                Vec4 const alphax_sum = MultiplyAdd(x2, onethird, MultiplyAdd(x1, twothirds, x0)); // alphax_sum, alpha2_sum
+				Vec4 const alphax_sum = x0 + MultiplyAdd(x1, twothirds, x2 * onethird); // alphax_sum, alpha2_sum
 				Vec4 const alpha2_sum = alphax_sum.SplatW();
 				
 				//Vec3 const betax_sum = x3 + x2 * (2.0f / 3.0f) + x1 * (1.0f / 3.0f);
 				//float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f);
-				Vec4 const betax_sum = MultiplyAdd(x2, twothirds, MultiplyAdd(x1, onethird, x3)); // betax_sum, beta2_sum
+				Vec4 const betax_sum = x3 + MultiplyAdd(x2, twothirds, x1 * onethird); // betax_sum, beta2_sum
 				Vec4 const beta2_sum = betax_sum.SplatW();
 				
-				//float const alphabeta_sum = (w1 + w2) * (2.0f/9.0f);
-                Vec4 const alphabeta_sum = twonineths*( x1 + x2 ).SplatW(); // alphabeta_sum
+				//float const alphabeta_sum = w1 * (2.0f/9.0f) + w2 * (2.0f/9.0f);
+				Vec4 const alphabeta_sum = two * (x1 * onethird + x2 * onethird).SplatW(); // alphabeta_sum
 				
 				// float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
 				Vec4 const factor = Reciprocal( NegativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) );
@ -295,21 +291,24 @@ void WeightedClusterFit::Compress4( void* block )
 				Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
 				Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
 				
-				// clamp to the grid
+				// clamp the output to [0, 1]
 				a = Min( one, Max( zero, a ) );
 				b = Min( one, Max( zero, b ) );
+				
+				// clamp to the grid
+				Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
+				Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f );
 				a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp;
 				b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp;
 				
-				// compute the error (we skip the constant xxsum)
-				Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
-				Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
-				Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
-				Vec4 e4 = MultiplyAdd( two, e3, e1 );
-
+				// compute the error
+				Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum );
+				Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
+				Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 );
+				
 				// apply the metric to the error term
-				Vec4 e5 = e4 * m_metricSqr;
-				Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
+				Vec4 e4 = e3 * m_metricSqr;
+				Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ();
 				
 				// keep the solution if it wins
 				if( CompareAnyLessThan( error, besterror ) )
@ -347,20 +346,18 @@ void WeightedClusterFit::Compress4( void* block )
 			for(; i < b0+b1+b2; i++) {
 				bestindices[i] = 3;
 			}
-			for(; i < count; i++) {
+			for(; i < 16; i++) {
 				bestindices[i] = 1;
 			}
 		}
 		
 		// remap the indices
 		u8 ordered[16];
-		for( int i = 0; i < count; ++i )
+		for( int i = 0; i < 16; ++i )
 			ordered[m_order[i]] = bestindices[i];
 		
-        m_colours->RemapIndices( ordered, bestindices );
-
 		// save the block
-		WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
+		WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), ordered, block );
 		
 		// save the error
 		m_besterror = besterror;
@ -371,13 +368,6 @@ void WeightedClusterFit::Compress4( void* block )

 void WeightedClusterFit::Compress3( void* block )
 {
-    int const count = m_colours->GetCount();
-	Vec3 const one( 1.0f );
-	Vec3 const zero( 0.0f );
-	Vec3 const half( 0.5f );
-    Vec3 const grid( 31.0f, 63.0f, 31.0f );
-    Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
-
 	// declare variables
 	Vec3 beststart( 0.0f );
 	Vec3 bestend( 0.0f );
@ -389,12 +379,12 @@ void WeightedClusterFit::Compress3( void* block )
 	int b0 = 0, b1 = 0;

 	// check all possible clusters for this total order
-	for( int c0 = 0; c0 <= count; c0++)
+	for( int c0 = 0; c0 <= 16; c0++)
 	{	
 		Vec3 x1(0.0f);
 		float w1 = 0.0f;
 		
-		for( int c1 = 0; c1 <= count-c0; c1++)
+		for( int c1 = 0; c1 <= 16-c0; c1++)
 		{	
 			float w2 = m_wsum - w0 - w1;
 			
@ -410,9 +400,16 @@ void WeightedClusterFit::Compress3( void* block )
 			Vec3 a = (alphax_sum*beta2_sum - betax_sum*alphabeta_sum) * factor;
 			Vec3 b = (betax_sum*alpha2_sum - alphax_sum*alphabeta_sum) * factor;
 			
-			// clamp to the grid
+			// clamp the output to [0, 1]
+			Vec3 const one( 1.0f );
+			Vec3 const zero( 0.0f );
 			a = Min( one, Max( zero, a ) );
 			b = Min( one, Max( zero, b ) );
+			
+			// clamp to the grid
+			Vec3 const grid( 31.0f, 63.0f, 31.0f );
+			Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f );
+			Vec3 const half( 0.5f );
 			a = Floor( grid*a + half )*gridrcp;
 			b = Floor( grid*b + half )*gridrcp;
 			
@ -453,20 +450,18 @@ void WeightedClusterFit::Compress3( void* block )
 			for(; i < b0+b1; i++) {
 				bestindices[i] = 2;
 			}
-			for(; i < count; i++) {
+			for(; i < 16; i++) {
 				bestindices[i] = 1;
 			}
 		}
 		
 		// remap the indices
 		u8 ordered[16];
-		for( int i = 0; i < count; ++i )
+		for( int i = 0; i < 16; ++i )
 			ordered[m_order[i]] = bestindices[i];
 		
-        m_colours->RemapIndices( ordered, bestindices );
-
 		// save the block
-		WriteColourBlock3( beststart, bestend, bestindices, block );
+		WriteColourBlock3( beststart, bestend, ordered, block );
 		
 		// save the error
 		m_besterror = besterror;
@ -475,13 +470,6 @@ void WeightedClusterFit::Compress3( void* block )

 void WeightedClusterFit::Compress4( void* block )
 {
-    int const count = m_colours->GetCount();
-	Vec3 const one( 1.0f );
-	Vec3 const zero( 0.0f );
-	Vec3 const half( 0.5f );
-	Vec3 const grid( 31.0f, 63.0f, 31.0f );
-	Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
-
 	// declare variables
 	Vec3 beststart( 0.0f );
 	Vec3 bestend( 0.0f );
@ -492,17 +480,17 @@ void WeightedClusterFit::Compress4( void* block )
 	int b0 = 0, b1 = 0, b2 = 0;

 	// check all possible clusters for this total order
-	for( int c0 = 0; c0 <= count; c0++)
+	for( int c0 = 0; c0 <= 16; c0++)
 	{	
 		Vec3 x1(0.0f);
 		float w1 = 0.0f;
 		
-		for( int c1 = 0; c1 <= count-c0; c1++)
+		for( int c1 = 0; c1 <= 16-c0; c1++)
 		{	
 			Vec3 x2(0.0f);
 			float w2 = 0.0f;
 			
-			for( int c2 = 0; c2 <= count-c0-c1; c2++)
+			for( int c2 = 0; c2 <= 16-c0-c1; c2++)
 			{
 				float w3 = m_wsum - w0 - w1 - w2;
 				
@ -517,9 +505,16 @@ void WeightedClusterFit::Compress4( void* block )
 				Vec3 a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor;
 				Vec3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor;
 				
-				// clamp to the grid
+				// clamp the output to [0, 1]
+				Vec3 const one( 1.0f );
+				Vec3 const zero( 0.0f );
 				a = Min( one, Max( zero, a ) );
 				b = Min( one, Max( zero, b ) );
+				
+				// clamp to the grid
+				Vec3 const grid( 31.0f, 63.0f, 31.0f );
+				Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f );
+				Vec3 const half( 0.5f );
 				a = Floor( grid*a + half )*gridrcp;
 				b = Floor( grid*b + half )*gridrcp;
 				
@ -568,20 +563,18 @@ void WeightedClusterFit::Compress4( void* block )
 			for(; i < b0+b1+b2; i++) {
 				bestindices[i] = 3;
 			}
-			for(; i < count; i++) {
+			for(; i < 16; i++) {
 				bestindices[i] = 1;
 			}
 		}
 		
 		// remap the indices
 		u8 ordered[16];
-		for( int i = 0; i < count; ++i )
+		for( int i = 0; i < 16; ++i )
 			ordered[m_order[i]] = bestindices[i];
-
-        m_colours->RemapIndices( ordered, bestindices );
 		
 		// save the block
-		WriteColourBlock4( beststart, bestend, bestindices, block );
+		WriteColourBlock4( beststart, bestend, ordered, block );

 		// save the error
 		m_besterror = besterror;
--- a/src/nvtt/squish/weightedclusterfit.h
+++ b/src/nvtt/squish/weightedclusterfit.h
@ -24,15 +24,15 @@
 	
   -------------------------------------------------------------------------- */
   
-#ifndef NV_SQUISH_WEIGHTEDCLUSTERFIT_H
-#define NV_SQUISH_WEIGHTEDCLUSTERFIT_H
+#ifndef SQUISH_WEIGHTEDCLUSTERFIT_H
+#define SQUISH_WEIGHTEDCLUSTERFIT_H

 #include "squish.h"
 #include "maths.h"
 #include "simd.h"
 #include "colourfit.h"

-namespace nvsquish {
+namespace squish {

 class WeightedClusterFit : public ColourFit
 {
--- a/src/nvtt/tests/CMakeLists.txt
+++ b/src/nvtt/tests/CMakeLists.txt
@ -1,28 +0,0 @@
-
-ADD_EXECUTABLE(filtertest filtertest.cpp ../tools/cmdline.h)
-TARGET_LINK_LIBRARIES(filtertest nvcore nvmath nvimage)
-
-ADD_EXECUTABLE(nvtestsuite testsuite.cpp)
-TARGET_LINK_LIBRARIES(nvtestsuite nvcore nvmath nvimage nvtt)
-ADD_TEST(NVTT.TestSuite.Kodak.cuda nvtestsuite -path ${NV_SOURCE_DIR}/data/testsuite -set 0 -out output-cuda-kodak)
-ADD_TEST(NVTT.TestSuite.Waterloo.cuda nvtestsuite -path ${NV_SOURCE_DIR}/data/testsuite -set 1 -out output-cuda-waterloo)
-ADD_TEST(NVTT.TestSuite.Epic.cuda nvtestsuite -path ${NV_SOURCE_DIR}/data/testsuite -set 2 -out output-cuda-epic)
-ADD_TEST(NVTT.TestSuite.Kodak.nocuda nvtestsuite -path ${NV_SOURCE_DIR}/data/testsuite -set 0 -nocuda -out output-nocuda-kodak)
-ADD_TEST(NVTT.TestSuite.Waterloo.nocuda nvtestsuite -path ${NV_SOURCE_DIR}/data/testsuite -set 1 -nocuda -out output-nocuda-waterloo)
-ADD_TEST(NVTT.TestSuite.Epic.nocuda nvtestsuite -path ${NV_SOURCE_DIR}/data/testsuite -set 2 -nocuda -out output-nocuda-epic)
-
-IF (CUDA_FOUND)
-	ADD_EXECUTABLE(driverapitest driverapi.cpp)
-	TARGET_LINK_LIBRARIES(driverapitest nvcore nvmath nvimage)
-ENDIF (CUDA_FOUND)
-
-ADD_EXECUTABLE(imperativeapi imperativeapi.cpp)
-TARGET_LINK_LIBRARIES(imperativeapi nvcore nvmath nvimage nvtt)
-
-INSTALL(TARGETS nvtestsuite DESTINATION bin)
- 
-#include_directories("/usr/include/ffmpeg/")
-#ADD_EXECUTABLE(nvmpegenc tools/mpegenc.cpp tools/cmdline.h)
-#TARGET_LINK_LIBRARIES(nvmpegenc nvcore nvmath nvimage avcodec z)
-#INSTALL(TARGETS nvmpegenc DESTINATION bin)
-
--- a/src/nvtt/tests/driverapi.cpp
+++ b/src/nvtt/tests/driverapi.cpp
@ -1,144 +0,0 @@
-
-#include <nvcore/Library.h>
-
-#include <cuda.h>
-#include <stdio.h>
-
-// Typedefs
-typedef CUresult CUDAAPI (*cuInitPtr)( unsigned int Flags );	
-
-typedef CUresult  CUDAAPI (*cuDeviceGetPtr)(CUdevice *device, int ordinal);
-typedef CUresult  CUDAAPI (*cuDeviceGetCountPtr)(int *count);
-typedef CUresult  CUDAAPI (*cuDeviceGetNamePtr)(char *name, int len, CUdevice dev);
-typedef CUresult  CUDAAPI (*cuDeviceComputeCapabilityPtr)(int *major, int *minor, CUdevice dev);
-typedef CUresult  CUDAAPI (*cuDeviceTotalMemPtr)(unsigned int *bytes, CUdevice dev);
-typedef CUresult  CUDAAPI (*cuDeviceGetPropertiesPtr)(CUdevprop *prop, CUdevice dev);
-typedef CUresult  CUDAAPI (*cuDeviceGetAttributePtr)(int *pi, CUdevice_attribute attrib, CUdevice dev);
-
-typedef CUresult  CUDAAPI (*cuCtxCreatePtr)(CUcontext *pctx, unsigned int flags, CUdevice dev );
-typedef CUresult  CUDAAPI (*cuCtxDestroyPtr)( CUcontext ctx );
-typedef CUresult  CUDAAPI (*cuCtxAttachPtr)(CUcontext *pctx, unsigned int flags);
-typedef CUresult  CUDAAPI (*cuCtxDetachPtr)(CUcontext ctx);
-typedef CUresult  CUDAAPI (*cuCtxPushCurrentPtr)( CUcontext ctx );
-typedef CUresult  CUDAAPI (*cuCtxPopCurrentPtr)( CUcontext *pctx );
-typedef CUresult  CUDAAPI (*cuCtxGetDevicePtr)(CUdevice *device);
-typedef CUresult  CUDAAPI (*cuCtxSynchronizePtr)(void);
-
-
-// A compressor inits CUDA and creates a context for each device.
-// 
-
-struct CudaDevice
-{
-	CUdevice device;
-	CUcontext context;
-};
-
-struct CudaContext
-{
-	CudaContext()
-	{
-		printf("CudaContext()\n");
-
-	#if NV_OS_WIN32
-		Library nvcuda("nvcuda.dll");
-	#else
-		Library nvcuda(NV_LIBRARY_NAME(cuda));
-	#endif
-
-		cuInit = (cuInitPtr)nvcuda.bindSymbol("cuInit");
-
-		cuDeviceGet = (cuDeviceGetPtr)nvcuda.bindSymbol("cuDeviceGet");
-		cuDeviceGetCount = (cuDeviceGetCountPtr)nvcuda.bindSymbol("cuDeviceGetCount");
-		cuDeviceGetName = (cuDeviceGetNamePtr)nvcuda.bindSymbol("cuDeviceGetName");
-		cuDeviceComputeCapability = (cuDeviceComputeCapabilityPtr)nvcuda.bindSymbol("cuDeviceComputeCapability");
-		cuDeviceTotalMem = (cuDeviceTotalMemPtr)nvcuda.bindSymbol("cuDeviceTotalMem");
-		cuDeviceGetProperties = (cuDeviceGetPropertiesPtr)nvcuda.bindSymbol("cuDeviceGetProperties");
-		cuDeviceGetAttribute = (cuDeviceGetAttributePtr)nvcuda.bindSymbol("cuDeviceGetAttribute");
-		
-		cuCtxCreate = (cuCtxCreatePtr)nvcuda.bindSymbol("cuCtxCreate");
-		cuCtxDestroy = (cuCtxDestroyPtr)nvcuda.bindSymbol("cuCtxDestroy");
-		cuCtxAttach = (cuCtxAttachPtr)nvcuda.bindSymbol("cuCtxAttach");
-		cuCtxDetach = (cuCtxDetachPtr)nvcuda.bindSymbol("cuCtxDetach");
-		cuCtxPushCurrent = (cuCtxPushCurrentPtr)nvcuda.bindSymbol("cuCtxPushCurrent");
-		cuCtxPopCurrent = (cuCtxPopCurrentPtr)nvcuda.bindSymbol("cuCtxPopCurrent");
-		cuCtxGetDevice = (cuCtxGetDevicePtr)nvcuda.bindSymbol("cuCtxGetDevice");
-		cuCtxSynchronize = (cuCtxSynchronizePtr)nvcuda.bindSymbol("cuCtxSynchronize");
-	
-		CUresult status = cuInit(0);
-	
-		if (status == CUDA_SUCCESS)
-		{
-			printf("cuInit succeeded.\n");
-		}
-		
-		m_deviceCount = 0;
-		cuDeviceGetCount(&m_deviceCount);
-		
-		printf("%d devices found.\n", m_deviceCount);
-		
-		if (m_deviceCount > 0)
-		{
-			m_devices = new CudaDevice[m_deviceCount];
-			
-			uint flags = CU_CTX_SCHED_AUTO;
-			if (m_deviceCount > 1) flags = CU_CTX_SCHED_YIELD;
-			
-			for (int i = 0; i < m_deviceCount; i++)
-			{
-				cuDeviceGet(&m_devices[i].device, i);
-				cuCtxCreate(&m_devices[i].context, flags, m_devices[i].device);
-			
-				cuCtxDestroy(m_devices[i].context);
-			}
-			
-		}
-	}
-	
-	~CudaContext()
-	{
-		printf("~CudaContext()\n");
-
-		if (m_deviceCount > 0)
-		{
-			for (int i = 0; i < m_deviceCount; i++)
-			{
-				cuCtxDestroy(m_devices[i].context);
-			}		
-
-			delete [] m_devices;
-		}
-	}
-	
-
-public:	
-	cuInitPtr cuInit;
-
-	cuDeviceGetPtr cuDeviceGet;
-	cuDeviceGetCountPtr cuDeviceGetCount;
-	cuDeviceGetNamePtr cuDeviceGetName;
-	cuDeviceComputeCapabilityPtr cuDeviceComputeCapability;
-	cuDeviceTotalMemPtr cuDeviceTotalMem;
-	cuDeviceGetPropertiesPtr cuDeviceGetProperties;
-	cuDeviceGetAttributePtr cuDeviceGetAttribute;
-
-	cuCtxCreatePtr cuCtxCreate;
-	cuCtxDestroyPtr cuCtxDestroy;
-	cuCtxAttachPtr cuCtxAttach;
-	cuCtxDetachPtr cuCtxDetach;
-	cuCtxPushCurrentPtr cuCtxPushCurrent;
-	cuCtxPopCurrentPtr  cuCtxPopCurrent;
-	cuCtxGetDevicePtr cuCtxGetDevice;
-	cuCtxSynchronizePtr cuCtxSynchronize;
-
-	int m_deviceCount;
-	CudaDevice * m_devices;
-};
-
-int main(void)
-{
-	CudaContext ctx;
-//	cuInit(0);
-
-	return 0;
-}
--- a/src/nvtt/tests/imperativeapi.cpp
+++ b/src/nvtt/tests/imperativeapi.cpp
@ -1,56 +0,0 @@
-// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
-// 
-// Permission is hereby granted, free of charge, to any person
-// obtaining a copy of this software and associated documentation
-// files (the "Software"), to deal in the Software without
-// restriction, including without limitation the rights to use,
-// copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following
-// conditions:
-// 
-// The above copyright notice and this permission notice shall be
-// included in all copies or substantial portions of the Software.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-// OTHER DEALINGS IN THE SOFTWARE.
-
-#include <nvtt/nvtt.h>
-
-#include <stdlib.h>
-
-
-int main(int argc, char *argv[])
-{
-	nvtt::CompressionOptions compressionOptions;
-	compressionOptions.setFormat(nvtt::Format_BC1);
-
-	nvtt::OutputOptions outputOptions;
-	outputOptions.setFileName("output.dds");
-
-	nvtt::Context context;
-	nvtt::TexImage image = context.createTexImage();
-
-	image.load("kodim01.png");
-
-	float gamma = 2.2;
-	image.toLinear(gamma);
-
-	while (image.buildNextMipmap(nvtt::MipmapFilter_Box))
-	{
-		nvtt::TexImage tmpImage = image;
-		tmpImage.toGamma(gamma);
-
-		context.outputCompressed(tmpImage, compressionOptions, outputOptions);
-	//	tmpImage.outputCompressed(compressionOptions, outputOptions);
-	}
-
-	return EXIT_SUCCESS;
-}
-
--- a/src/nvtt/tests/mpegenc.cpp
+++ b/src/nvtt/tests/mpegenc.cpp
@ -1,344 +0,0 @@
-// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
-// 
-// Permission is hereby granted, free of charge, to any person
-// obtaining a copy of this software and associated documentation
-// files (the "Software"), to deal in the Software without
-// restriction, including without limitation the rights to use,
-// copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following
-// conditions:
-// 
-// The above copyright notice and this permission notice shall be
-// included in all copies or substantial portions of the Software.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-// OTHER DEALINGS IN THE SOFTWARE.
-
-#include <nvcore/StrLib.h>
-#include <nvcore/StdStream.h>
-
-#include <nvmath/Color.h>
-
-#include <nvimage/Image.h>
-#include <nvimage/DirectDrawSurface.h>
-
-#include <nvtt/nvtt.h>
-
-#include "cmdline.h"
-
-extern "C" {
-#include <libavcodec/avcodec.h>
-//#include <libavformat/avformat.h>
-}
-
-// http://ffmpeg.mplayerhq.hu/general.html
-// http://cekirdek.pardus.org.tr/~ismail/ffmpeg-docs/apiexample_8c-source.html
-
-
-using namespace nv;
-
-static float s_quality = 0.5f;
-
-static AVFrame * createPicture(const Image & image)
-{
-	const uint w = image.width();
-	const uint h = image.height();
-	const uint size = w * h;
-	
-	AVFrame * picture = avcodec_alloc_frame();
-	
-	uint8_t * buffer = (uint8_t *)malloc((size * 3) / 2);
-	
-	picture->data[0] = buffer;
-	picture->data[1] = buffer + size;
-	picture->data[2] = buffer + size + size / 4;
-	picture->linesize[0] = w;
-	picture->linesize[1] = w / 2;
-	picture->linesize[2] = w / 2;
-
-	memset(buffer, 0, (size * 3) / 2);
-	
-	// Convert image to YCbCr 4:2:0
-	
-	// Y
-	for (uint y=0;y<h;y++)
-	{
-		for (uint x=0;x<w;x++)
-		{
-			Color32 c = image.pixel(x, y);
-			
-			float R = (1 / 255.0f) * c.r;
-			float G = (1 / 255.0f) * c.g;
-			float B = (1 / 255.0f) * c.b;
-			
-			//float Y = 0.299f * R + 0.587f * G + 0.114f * B;
-			float Y = 16  + (65.481f  * R + 128.553f * G +  24.966f * B);
-			
-			picture->data[0][y * picture->linesize[0] + x] = (uint8)clamp(Y, 0.0f, 255.0f);
-		}
-	}
-
-	// Cb and Cr
-	for (uint y=0;y<h/2;y++)
-	{
-		for (uint x=0;x<w/2;x++)
-		{
-			Color32 c0 = image.pixel(2*x+0, 2*y+0);
-			Color32 c1 = image.pixel(2*x+1, 2*y+0);
-			Color32 c2 = image.pixel(2*x+0, 2*y+1);
-			Color32 c3 = image.pixel(2*x+1, 2*y+1);
-
-			float R = (1 / 255.0f) * 0.25f * (c0.r + c1.r + c2.r + c3.r);
-			float G = (1 / 255.0f) * 0.25f * (c0.g + c1.g + c2.g + c3.g);
-			float B = (1 / 255.0f) * 0.25f * (c0.b + c1.b + c2.b + c3.b);
-			
-			//float Pb = - 0.168736f * R - 0.331264f * G + 0.5f * B;
-			//float Pr = + 0.5f * R - 0.418688f * G - 0.081312f * B;
-			float Cb = 128 + (-37.797f * R - 74.203f * G + 112.0f * B);
-			float Cr = 128 + (112.0f * R - 93.786 * G - 18.214f * B);
-			
-			picture->data[1][y * picture->linesize[1] + x] = (uint8)clamp(Cb, 0.0f, 255.0f);;
-			picture->data[2][y * picture->linesize[2] + x] = (uint8)clamp(Cr, 0.0f, 255.0f);;
-		}
-	}
-	
-	return picture;
-}
-
-static void pgm_save(unsigned char *buf, int wrap, int xsize, int ysize, const char * filename)
-{
-	FILE * f = fopen(filename, "w");
-	fprintf(f,"P5\n%d %d\n%d\n",xsize, ysize, 255);
-	
-	for (int i = 0; i < ysize; i++)
-		fwrite(buf + i * wrap,1,xsize,f);
-	
-	fclose(f);
-}
-
-static void savePicture(const AVFrame * picture, int w, int h)
-{
-	// @@ Combine planes.
-	pgm_save(picture->data[0], picture->linesize[0], w, h, "test_y.pgm");
-	pgm_save(picture->data[1], picture->linesize[1], w/2, h/2, "test_u.pgm");
-	pgm_save(picture->data[2], picture->linesize[2], w/2, h/2, "test_v.pgm");
-}
-
-static double psnr(double d) {
-	return -10.0*log(d)/log(10.0);
-}
-
-
-static void encodeFrame(const Image & image, CodecID format, Array<uint8> & frame)
-{
-	AVFrame * picture = createPicture(image);
-	
-	AVCodec * encoder = avcodec_find_encoder(format);
-
-	if (encoder == NULL)
-	{
-		printf("MPEG encoder not found.\n");
-		exit(1);
-	}
-
-	AVCodecContext * encoder_context = avcodec_alloc_context();
-
-	//encoder_context->me_method = 0;
-	encoder_context->width = image.width();
-	encoder_context->height = image.height();
-	encoder_context->pix_fmt = PIX_FMT_YUV420P;
-	//encoder_context->pix_fmt = PIX_FMT_YUV422P;
-	//encoder_context->pix_fmt = PIX_FMT_YUVJ420P;
-	
-	encoder_context->time_base = (AVRational){1,25};   // required parameter. 25 fps?
-	encoder_context->bit_rate = 400000;   // Quality?
-	//encoder_context->bit_rate = 200000;   // Default
-	//encoder_context->bit_rate_tolerance = 20000;
-	//encoder_context->qmin = ?;
-	//encoder_context->qmax = ?;
-	//encoder_context->qcompress = ?;
-	//encoder_context->qblur = ?;
-	
-	encoder_context->flags |= CODEC_FLAG_PSNR;
-	encoder_context->qcompress = s_quality;
-	//encoder_context->qblur = 1.0f;
-	//encoder_context->global_quality = FF_QP2LAMBDA * 0;
-	//encoder_context->max_qdiff = 3;
-	
-
-
-	
-	// Intra frames only
-	encoder_context->gop_size = 0;
-
-	if (avcodec_open(encoder_context, encoder) < 0)
-	{
-		printf("MPEG encoder initialization failed.\n");
-		exit(1);
-	}
-
-	frame.resize(1024 * 1024, 0);	// resize and initialize to 0.
-	
-	int out_size = avcodec_encode_video(encoder_context, frame.mutableBuffer(), frame.size(), picture);
-	frame.resize(out_size);
-	
-	// Append sequence end code.
-	frame.append(0x00);
-	frame.append(0x00);
-	frame.append(0x01);
-	frame.append(0xb7);
-	
-	int in_size = image.width() * image.height() * 3;
-	printf("Image size %d -> %d (1:%d)\n", in_size, out_size, in_size/out_size);
-	printf("PSNR = %4.2f\n", psnr(encoder_context->coded_frame->error[0]/(encoder_context->width*encoder_context->height*255.0*255.0)));
-
-	
-	avcodec_close(encoder_context);
-	av_free(encoder_context);
-	av_free(picture);
-}
-
-static void decodeFrame(const Array<uint8> & frame, CodecID format)
-{
-	AVCodec * decoder = avcodec_find_decoder(format);
-	if (decoder == NULL) {
-		printf("MPEG decoder not found.\n");
-		exit(1);
-	}
-
-	AVCodecContext * decoder_context = avcodec_alloc_context();
-	AVFrame * picture = avcodec_alloc_frame();
-	
-	if (decoder->capabilities & CODEC_CAP_TRUNCATED)
-		decoder_context->flags |= CODEC_FLAG_TRUNCATED; /* we do not send complete frames */
-	
-	
-	if (avcodec_open(decoder_context, decoder) < 0) {
-		printf("MPEG decoder initialization failed.\n");
-		exit(1);
-	}
-	
-	//memset(picture->data[0], 0, in_size / 2);
-	
-	int got_picture = 0;
-	int len = avcodec_decode_video(decoder_context, picture, &got_picture, frame.buffer(), frame.size());
-	
-	printf("decoded %d bytes\n", len);
-	
-	if (len < 0) {
-		printf("Error while decoding frame.\n");
-		exit(1);
-	}
-	
-	if (!got_picture) {
-		printf("Did not get any picture.\n");
-		exit(1);
-	}
-	
-	//nvDebugCheck(outbuf_size == len);
-	//nvDebugCheck(got_picture == true);
-
-	savePicture(picture, decoder_context->width, decoder_context->height);
-	
-	avcodec_close(decoder_context);
-	av_free(decoder_context);
-	av_free(picture);
-}
-
-
-
-int main(int argc, char *argv[])
-{
-	MyAssertHandler assertHandler;
-	MyMessageHandler messageHandler;
-
-	nv::Path input;
-	nv::Path output;
-
-	// Parse arguments.
-	for (int i = 1; i < argc; i++)
-	{
-		if (strcmp(argv[i], "-q") == 0 || strcmp(argv[i], "--quality") == 0)
-		{
-			if (i+1 < argc && argv[i+1][0] != '-')
-			{
-				s_quality = atof(argv[i+1]);
-				i++;
-			}
-		}
-			
-		else if (argv[i][0] != '-')
-		{
-			input = argv[i];
-
-			if (i+1 < argc && argv[i+1][0] != '-')
-			{
-				output = argv[i+1];
-				i++;
-			}
-			else
-			{
-				output.copy(input.str());
-				output.stripExtension();
-				output.append(".mpeg");
-			}
-
-			break;
-		}
-	}
-
-	printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007-2008\n\n");
-
-	if (input.isNull())
-	{
-		printf("usage: nvmpegcompress [options] infile [outfile]\n\n");
-		
-		return 1;
-	}
-
-	// Load image.
-	Image image;
-	if (!image.load(input))
-	{
-		fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str());
-		return 1;
-	}
-	
-	// Initialize codecs.
-	avcodec_init();
-	avcodec_register_all();
-
-	//CodecID format = CODEC_ID_MPEG1VIDEO;
-	CodecID format = CODEC_ID_MPEG2VIDEO;
-	//CodecID format = CODEC_ID_MJPEG;
-	//CodecID format = CODEC_ID_THEORA;
-	//CodecID format = CODEC_ID_H264;
-	
-	// Encode frame.
-	Array<uint8> frame;
-	encodeFrame(image, format, frame);
-
-	// Save resulting I-frame.
-	StdOutputStream outputStream(output.str());
-	if (outputStream.isError())
-	{
-		printf("Error opening '%s' for writing.\n", output.str());
-		return 1;
-	}
-
-	outputStream.serialize(frame.mutableBuffer(), frame.size());
-
-	//decodeFrame(frame, format);
-	
-	// @@ Compare image against original, and compute RMS.
-	
-	return 0;
-}
-
--- a/src/nvtt/tests/testsuite.cpp
+++ b/src/nvtt/tests/testsuite.cpp
@ -1,435 +0,0 @@
-// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
-// 
-// Permission is hereby granted, free of charge, to any person
-// obtaining a copy of this software and associated documentation
-// files (the "Software"), to deal in the Software without
-// restriction, including without limitation the rights to use,
-// copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following
-// conditions:
-// 
-// The above copyright notice and this permission notice shall be
-// included in all copies or substantial portions of the Software.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-// OTHER DEALINGS IN THE SOFTWARE.
-
-#include <nvtt/nvtt.h>
-#include <nvimage/Image.h>
-#include <nvimage/ImageIO.h>
-#include <nvimage/BlockDXT.h>
-#include <nvimage/ColorBlock.h>
-#include <nvcore/Ptr.h>
-#include <nvcore/Debug.h>
-#include <nvcore/StrLib.h>
-#include <nvcore/StdStream.h>
-#include <nvcore/TextWriter.h>
-#include <nvcore/FileSystem.h>
-
-#include <stdlib.h> // free
-#include <string.h> // memcpy
-#include <time.h> // clock
-
-
-using namespace nv;
-
-// Kodak image set
-static const char * s_kodakImageSet[] = {
-	"kodim01.png",
-	"kodim02.png",
-	"kodim03.png",
-	"kodim04.png",
-	"kodim05.png",
-	"kodim06.png",
-	"kodim07.png",
-	"kodim08.png",
-	"kodim09.png",
-	"kodim10.png",
-	"kodim11.png",
-	"kodim12.png",
-	"kodim13.png",
-	"kodim14.png",
-	"kodim15.png",
-	"kodim16.png",
-	"kodim17.png",
-	"kodim18.png",
-	"kodim19.png",
-	"kodim20.png",
-	"kodim21.png",
-	"kodim22.png",
-	"kodim23.png",
-	"kodim24.png",
-};
-
-// Waterloo image set
-static const char * s_waterlooImageSet[] = {
-	"clegg.png",
-	"frymire.png",
-	"lena.png",
-	"monarch.png",
-	"peppers.png",
-	"sail.png",
-	"serrano.png",
-	"tulips.png",
-};
-
-// Epic image set
-static const char * s_epicImageSet[] = {
-	"Bradley1.png",
-	"Gradient.png",
-	"MoreRocks.png",
-	"Wall.png",
-	"Rainbow.png",
-	"Text.png",
-};
-
-// Farbrausch
-static const char * s_farbrauschImageSet[] = {
-	"t.2d.pn02.bmp",
-	"t.aircondition.01.bmp",
-	"t.bricks.02.bmp",
-	"t.bricks.05.bmp",
-	"t.concrete.cracked.01.bmp",
-	"t.envi.colored02.bmp",
-	"t.envi.colored03.bmp",
-	"t.font.01.bmp",
-	"t.sewers.01.bmp",
-	"t.train.03.bmp",
-	"t.yello.01.bmp",
-};
-
-struct ImageSet
-{
-	const char ** fileNames;
-	int fileCount;
-};
-
-static ImageSet s_imageSets[] = {
-	{s_kodakImageSet, sizeof(s_kodakImageSet)/sizeof(s_kodakImageSet[0])},
-	{s_waterlooImageSet, sizeof(s_waterlooImageSet)/sizeof(s_waterlooImageSet[0])},
-	{s_epicImageSet, sizeof(s_epicImageSet)/sizeof(s_epicImageSet[0])},
-	{s_farbrauschImageSet, sizeof(s_farbrauschImageSet)/sizeof(s_farbrauschImageSet[0])},
-};
-const int s_imageSetCount = sizeof(s_imageSets)/sizeof(s_imageSets[0]);
-
-
-struct MyOutputHandler : public nvtt::OutputHandler
-{
-	MyOutputHandler() : m_data(NULL), m_ptr(NULL) {}
-	~MyOutputHandler()
-	{
-		free(m_data);
-	}
-
-	virtual void beginImage(int size, int width, int height, int depth, int face, int miplevel)
-	{
-		m_size = size;
-		m_width = width;
-		m_height = height;
-		free(m_data);
-		m_data = (unsigned char *)malloc(size);
-		m_ptr = m_data;
-	}
-	
-	virtual bool writeData(const void * data, int size)
-	{
-		memcpy(m_ptr, data, size);
-		m_ptr += size;
-		return true;
-	}
-
-	Image * decompress(nvtt::Format format)
-	{
-		int bw = (m_width + 3) / 4;
-		int bh = (m_height + 3) / 4;
-
-		AutoPtr<Image> img( new Image() );
-		img->allocate(m_width, m_height);
-
-		if (format == nvtt::Format_BC1)
-		{
-			BlockDXT1 * block = (BlockDXT1 *)m_data;
-
-			for (int y = 0; y < bh; y++)
-			{
-				for (int x = 0; x < bw; x++)
-				{
-					ColorBlock colors;
-					block->decodeBlock(&colors);
-
-					for (int yy = 0; yy < 4; yy++)
-					{
-						for (int xx = 0; xx < 4; xx++)
-						{
-							Color32 c = colors.color(xx, yy);
-
-							if (x * 4 + xx < m_width && y * 4 + yy < m_height)
-							{
-								img->pixel(x * 4 + xx, y * 4 + yy) = c;
-							}
-						}
-					}
-
-					block++;
-				}
-			}
-		}
-
-		return img.release();
-	}
-
-	int m_size;
-	int m_width;
-	int m_height;
-	unsigned char * m_data;
-	unsigned char * m_ptr;
-};
-
-
-float rmsError(const Image * a, const Image * b)
-{
-	nvCheck(a != NULL);
-	nvCheck(b != NULL);
-	nvCheck(a->width() == b->width());
-	nvCheck(a->height() == b->height());
-
-	int mse = 0;
-
-	const uint count = a->width() * a->height();
-
-	for (uint i = 0; i < count; i++)
-	{
-		Color32 c0 = a->pixel(i);
-		Color32 c1 = b->pixel(i);
-
-		int r = c0.r - c1.r;
-		int g = c0.g - c1.g;
-		int b = c0.b - c1.b;
-		//int a = c0.a - c1.a;
-
-		mse += r * r;
-		mse += g * g;
-		mse += b * b;
-	}
-
-	return sqrtf(float(mse) / count);
-}
-
-
-int main(int argc, char *argv[])
-{
-	const uint version = nvtt::version();
-	const uint major = version / 100;
-	const uint minor = version % 100;
-	
-	printf("NVIDIA Texture Tools %u.%u - Copyright NVIDIA Corporation 2007 - 2008\n\n", major, minor);
-	
-	int set = 0;
-	bool fast = false;
-	bool nocuda = false;
-	bool showHelp = false;
-	const char * basePath = "";
-	const char * outPath = "output";
-	const char * regressPath = NULL;
-	
-	// Parse arguments.
-	for (int i = 1; i < argc; i++)
-	{
-		if (strcmp("-set", argv[i]) == 0)
-		{
-			if (i+1 < argc && argv[i+1][0] != '-') {
-				set = atoi(argv[i+1]);
-				i++;
-			}
-		}
-		else if (strcmp("-fast", argv[i]) == 0)
-		{
-			fast = true;
-		}
-		else if (strcmp("-nocuda", argv[i]) == 0)
-		{
-			nocuda = true;
-		}
-		else if (strcmp("-help", argv[i]) == 0)
-		{
-			showHelp = true;
-		}
-		else if (strcmp("-path", argv[i]) == 0)
-		{
-			if (i+1 < argc && argv[i+1][0] != '-') {
-				basePath = argv[i+1];
-				i++;
-			}
-		}
-		else if (strcmp("-out", argv[i]) == 0)
-		{
-			if (i+1 < argc && argv[i+1][0] != '-') {
-				outPath = argv[i+1];
-				i++;
-			}
-		}
-		else if (strcmp("-regress", argv[i]) == 0)
-		{
-			if (i+1 < argc && argv[i+1][0] != '-') {
-				regressPath = argv[i+1];
-				i++;
-			}
-		}
-	}
-
-	if (showHelp)
-	{
-		printf("usage: nvtestsuite [options]\n\n");
-		
-		printf("Input options:\n");
-		printf("  -path <path>\tInput image path.\n");
-		printf("  -set [0:2]\tImage set.\n");
-		printf("  -regress <path>\tRegression directory.\n");
-
-		printf("Compression options:\n");
-		printf("  -fast          \tFast compression.\n");
-		printf("  -nocuda        \tDo not use cuda compressor.\n");
-		
-		printf("Output options:\n");
-		printf("  -out <path>    \tOutput directory.\n");
-
-		return 1;
-	}
-	
-	nvtt::InputOptions inputOptions;
-	inputOptions.setMipmapGeneration(false);
-
-	nvtt::CompressionOptions compressionOptions;
-	compressionOptions.setFormat(nvtt::Format_BC1);
-	if (fast)
-	{
-		compressionOptions.setQuality(nvtt::Quality_Fastest);
-	}
-	else
-	{
-		compressionOptions.setQuality(nvtt::Quality_Production);
-	}
-
-	nvtt::OutputOptions outputOptions;
-	outputOptions.setOutputHeader(false);
-
-	MyOutputHandler outputHandler;
-	outputOptions.setOutputHandler(&outputHandler);
-
-	nvtt::Context context;
-	context.enableCudaAcceleration(!nocuda);
-
-	FileSystem::changeDirectory(basePath);
-	FileSystem::createDirectory(outPath);
-
-	Path csvFileName;
-	csvFileName.format("%s/result.csv", outPath);
-	StdOutputStream csvStream(csvFileName);
-	TextWriter csvWriter(&csvStream);
-
-	float totalTime = 0;
-	float totalRMSE = 0;
-	int failedTests = 0;
-	float totalDiff = 0;
-
-	const char ** fileNames = s_imageSets[set].fileNames;
-	int fileCount = s_imageSets[set].fileCount;
-
-	for (int i = 0; i < fileCount; i++)
-	{
-		AutoPtr<Image> img( new Image() );
-		
-		if (!img->load(fileNames[i]))
-		{
-			printf("Input image '%s' not found.\n", fileNames[i]);
-			return EXIT_FAILURE;
-		}
-
-		inputOptions.setTextureLayout(nvtt::TextureType_2D, img->width(), img->height());
-		inputOptions.setMipmapData(img->pixels(), img->width(), img->height());
-
-		printf("Compressing: \t'%s'\n", fileNames[i]);
-
-		clock_t start = clock();
-
-		context.process(inputOptions, compressionOptions, outputOptions);
-
-		clock_t end = clock();
-		printf("  Time: \t%.3f sec\n", float(end-start) / CLOCKS_PER_SEC);
-		totalTime += float(end-start);
-
-		AutoPtr<Image> img_out( outputHandler.decompress(nvtt::Format_BC1) );
-
-		Path outputFileName;
-		outputFileName.format("%s/%s", outPath, fileNames[i]);
-		outputFileName.stripExtension();
-		outputFileName.append(".png");
-		if (!ImageIO::save(outputFileName, img_out.ptr()))
-		{
-			printf("Error saving file '%s'.\n", outputFileName.str());
-		}
-
-		float rmse = rmsError(img.ptr(), img_out.ptr());
-		totalRMSE += rmse;
-
-		printf("  RMSE:  \t%.4f\n", rmse);
-
-		// Output csv file
-		csvWriter << "\"" << fileNames[i] << "\"," << rmse << "\n";
-
-		if (regressPath != NULL)
-		{
-			Path regressFileName;
-			regressFileName.format("%s/%s", regressPath, fileNames[i]);
-			regressFileName.stripExtension();
-			regressFileName.append(".png");
-
-			AutoPtr<Image> img_reg( new Image() );
-			if (!img_reg->load(regressFileName.str()))
-			{
-				printf("Regression image '%s' not found.\n", regressFileName.str());
-				return EXIT_FAILURE;
-			}
-
-			float rmse_reg = rmsError(img.ptr(), img_reg.ptr());
-
-			float diff = rmse_reg - rmse;
-			totalDiff += diff;
-
-			const char * text = "PASSED";
-			if (equal(diff, 0)) text = "PASSED";
-			else if (diff < 0) {
-				text = "FAILED";
-				failedTests++;
-			}
-
-			printf("  Diff: \t%.4f (%s)\n", diff, text);
-		}
-
-		fflush(stdout);
-	}
-
-	totalRMSE /= fileCount;
-	totalDiff /= fileCount;
-
-	printf("Total Results:\n");
-	printf("  Total Time: \t%.3f sec\n", totalTime / CLOCKS_PER_SEC);
-	printf("  Average RMSE:\t%.4f\n", totalRMSE);
-
-	if (regressPath != NULL)
-	{
-		printf("Regression Results:\n");
-		printf("  Diff: %.4f\n", totalDiff);
-		printf("  %d/%d tests failed.\n", failedTests, fileCount);
-	}
-
-	return EXIT_SUCCESS;
-}
-
--- a/src/nvtt/tools/CMakeLists.txt
+++ b/src/nvtt/tools/CMakeLists.txt
@ -1,63 +0,0 @@
-
-
-ADD_EXECUTABLE(nvcompress compress.cpp cmdline.h)
-TARGET_LINK_LIBRARIES(nvcompress nvcore nvmath nvimage nvtt)
-
-ADD_EXECUTABLE(nvdecompress decompress.cpp cmdline.h)
-TARGET_LINK_LIBRARIES(nvdecompress nvcore nvmath nvimage)
-
-ADD_EXECUTABLE(nvddsinfo ddsinfo.cpp cmdline.h)
-TARGET_LINK_LIBRARIES(nvddsinfo nvcore nvmath nvimage)
-
-ADD_EXECUTABLE(nvimgdiff imgdiff.cpp cmdline.h)
-TARGET_LINK_LIBRARIES(nvimgdiff nvcore nvmath nvimage)
-
-ADD_EXECUTABLE(nvassemble assemble.cpp cmdline.h)
-TARGET_LINK_LIBRARIES(nvassemble nvcore nvmath nvimage)
-
-ADD_EXECUTABLE(nvzoom resize.cpp cmdline.h)
-TARGET_LINK_LIBRARIES(nvzoom nvcore nvmath nvimage)
-
-ADD_EXECUTABLE(nv-gnome-thumbnailer thumbnailer.cpp cmdline.h)
-TARGET_LINK_LIBRARIES(nv-gnome-thumbnailer nvcore nvmath nvimage)
-  
-INSTALL(TARGETS nvcompress nvdecompress nvddsinfo nvimgdiff nvassemble nvzoom nv-gnome-thumbnailer DESTINATION bin)
- 
- # Use gconftool-2 to install gnome thumbnailer
-FIND_PROGRAM(GCONFTOOL2 gconftool-2)
-
-IF(GCONFTOOL2)
-	CONFIGURE_FILE(nvtt-thumbnailer.schema.in ${CMAKE_CURRENT_BINARY_DIR}/nvtt-thumbnailer.schema)
-
- 	INSTALL(CODE "MESSAGE(STATUS \"Installing thumbnailer schema\")")
- 	#gconftool-2 --get-default-source
- 	INSTALL(CODE "EXECUTE_PROCESS(COMMAND ${GCONFTOOL2} --get-default-source OUTPUT_VARIABLE GCONF_CONFIG_SOURCE OUTPUT_STRIP_TRAILING_WHITESPACE)")
- 	INSTALL(CODE "set(ENV{GCONF_CONFIG_SOURCE} \"\${GCONF_CONFIG_SOURCE}\")")
- 	INSTALL(CODE "EXECUTE_PROCESS(COMMAND ${GCONFTOOL2} --makefile-install-rule ${CMAKE_CURRENT_BINARY_DIR}/nvtt-thumbnailer.schema)")
-ENDIF(GCONFTOOL2)
-
-# UI tools
-IF(QT4_FOUND) # AND NOT MSVC)
-	SET(QT_USE_QTOPENGL TRUE)
-	INCLUDE_DIRECTORIES(${QT_INCLUDE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
-	
-	SET(SRCS
-		ui/main.cpp
-		ui/configdialog.h
-		ui/configdialog.cpp)
-
-	SET(LIBS
-		nvtt
-		${QT_QTCORE_LIBRARY}
-		${QT_QTGUI_LIBRARY}
-		${QT_QTOPENGL_LIBRARY})
-
-	QT4_WRAP_UI(UICS ui/configdialog.ui)
-	QT4_WRAP_CPP(MOCS ui/configdialog.h)
-	#QT4_ADD_RESOURCES(RCCS ui/configdialog.rc)
-
-	ADD_EXECUTABLE(nvcompressui MACOSX_BUNDLE ${SRCS} ${UICS} ${MOCS})
-	TARGET_LINK_LIBRARIES(nvcompressui ${LIBS})
-
-ENDIF(QT4_FOUND) # AND NOT MSVC)
-
--- a/src/nvtt/tools/compress.cpp
+++ b/src/nvtt/tools/compress.cpp
@ -21,20 +21,20 @@
 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 // OTHER DEALINGS IN THE SOFTWARE.

-#include "cmdline.h"
+#include <nvcore/StrLib.h>
+#include <nvcore/StdStream.h>
+
+#include <nvimage/Image.h>
+#include <nvimage/DirectDrawSurface.h>

 #include <nvtt/nvtt.h>

-#include <nvimage/Image.h>    // @@ It might be a good idea to use FreeImage directly instead of ImageIO.
-#include <nvimage/ImageIO.h>
-#include <nvimage/FloatImage.h>
-#include <nvimage/DirectDrawSurface.h>
+#include "cmdline.h"

-#include <nvcore/Ptr.h>
-#include <nvcore/StrLib.h>
-#include <nvcore/StdStream.h>
-#include <nvcore/FileSystem.h>
-#include <nvcore/Timer.h>
+#include <time.h> // clock
+
+//#define WINDOWS_LEAN_AND_MEAN
+//#include <windows.h> // TIMER


 struct MyOutputHandler : public nvtt::OutputHandler
@ -87,10 +87,7 @@ struct MyErrorHandler : public nvtt::ErrorHandler
 {
 	virtual void error(nvtt::Error e)
 	{
-#if _DEBUG
 		nvDebugBreak();
-#endif
-		printf("Error: '%s'\n", nvtt::errorString(e));
 	}
 };

@ -134,24 +131,18 @@ int main(int argc, char *argv[])
 	MyAssertHandler assertHandler;
 	MyMessageHandler messageHandler;

-	bool alpha = false;
 	bool normal = false;
 	bool color2normal = false;
 	bool wrapRepeat = false;
 	bool noMipmaps = false;
 	bool fast = false;
 	bool nocuda = false;
+	bool silent = false;
 	bool bc1n = false;
 	nvtt::Format format = nvtt::Format_BC1;
-	bool premultiplyAlpha = false;
-	nvtt::MipmapFilter mipmapFilter = nvtt::MipmapFilter_Box;
-	bool loadAsFloat = false;

 	const char * externalCompressor = NULL;

-	bool silent = false;
-	bool dds10 = false;
-
 	nv::Path input;
 	nv::Path output;

@ -163,10 +154,6 @@ int main(int argc, char *argv[])
 		if (strcmp("-color", argv[i]) == 0)
 		{
 		}
-		else if (strcmp("-alpha", argv[i]) == 0)
-		{
-			alpha = true;
-		}
 		else if (strcmp("-normal", argv[i]) == 0)
 		{
 			normal = true;
@ -186,23 +173,6 @@ int main(int argc, char *argv[])
 		{
 			noMipmaps = true;
 		}
-		else if (strcmp("-premula", argv[i]) == 0)
-		{
-			premultiplyAlpha = true;
-		}
-		else if (strcmp("-mipfilter", argv[i]) == 0)
-		{
-			if (i+1 == argc) break;
-			i++;
-
-			if (strcmp("box", argv[i]) == 0) mipmapFilter = nvtt::MipmapFilter_Box;
-			else if (strcmp("triangle", argv[i]) == 0) mipmapFilter = nvtt::MipmapFilter_Triangle;
-			else if (strcmp("kaiser", argv[i]) == 0) mipmapFilter = nvtt::MipmapFilter_Kaiser;
-		}
-		else if (strcmp("-float", argv[i]) == 0)
-		{
-			loadAsFloat = true;
-		}

 		// Compression options.
 		else if (strcmp("-fast", argv[i]) == 0)
@ -260,15 +230,11 @@ int main(int argc, char *argv[])
 			}
 		}

-		// Output options
+		// Misc options
 		else if (strcmp("-silent", argv[i]) == 0)
 		{
 			silent = true;
 		}
-		else if (strcmp("-dds10", argv[i]) == 0)
-		{
-			dds10 = true;
-		}

 		else if (argv[i][0] != '-')
 		{
@ -288,27 +254,19 @@ int main(int argc, char *argv[])
 		}
 	}

-	const uint version = nvtt::version();
-	const uint major = version / 100;
-	const uint minor = version % 100;
-	
-
-	printf("NVIDIA Texture Tools %u.%u - Copyright NVIDIA Corporation 2007\n\n", major, minor);
+	printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n");

 	if (input.isNull())
 	{
 		printf("usage: nvcompress [options] infile [outfile]\n\n");
 		
 		printf("Input options:\n");
-		printf("  -color     \tThe input image is a color map (default).\n");
-		printf("  -alpha     \tThe input image has an alpha channel used for transparency.\n");
-		printf("  -normal    \tThe input image is a normal map.\n");
-		printf("  -tonormal  \tConvert input to normal map.\n");
-		printf("  -clamp     \tClamp wrapping mode (default).\n");
-		printf("  -repeat    \tRepeat wrapping mode.\n");
-		printf("  -nomips    \tDisable mipmap generation.\n");
-		printf("  -premula   \tPremultiply alpha into color channel.\n");
-		printf("  -mipfilter \tMipmap filter. One of the following: box, triangle, kaiser.\n\n");
+		printf("  -color   \tThe input image is a color map (default).\n");
+		printf("  -normal  \tThe input image is a normal map.\n");
+		printf("  -tonormal\tConvert input to normal map.\n");
+		printf("  -clamp   \tClamp wrapping mode (default).\n");
+		printf("  -repeat  \tRepeat wrapping mode.\n");
+		printf("  -nomips  \tDisable mipmap generation.\n\n");

 		printf("Compression options:\n");
 		printf("  -fast    \tFast compression.\n");
@ -323,19 +281,10 @@ int main(int argc, char *argv[])
 		printf("  -bc4     \tBC4 format (ATI1)\n");
 		printf("  -bc5     \tBC5 format (3Dc/ATI2)\n\n");
 		
-		printf("Output options:\n");
-		printf("  -silent  \tDo not output progress messages\n");
-		printf("  -dds10   \tUse DirectX 10 DDS format\n\n");
-
 		return 1;
 	}

-	// Make sure input file exists.
-	if (!nv::FileSystem::exists(input.str()))
-	{
-		fprintf(stderr, "The file '%s' does not exist.\n", input.str());
-		return 1;
-	}
+	// @@ Make sure input file exists.
 	
 	// Set input options.
 	nvtt::InputOptions inputOptions;
@ -377,7 +326,7 @@ int main(int argc, char *argv[])
 		{
 			for (uint m = 0; m < mipmapCount; m++)
 			{
-				dds.mipmap(&mipmap, f, m);	// @@ Load as float.
+				dds.mipmap(&mipmap, f, m);
 				
 				inputOptions.setMipmapData(mipmap.pixels(), mipmap.width(), mipmap.height(), 1, f, m);
 			}
@ -385,42 +334,16 @@ int main(int argc, char *argv[])
 	}
 	else
 	{
-		if (nv::strCaseCmp(input.extension(), ".exr") == 0)
+		// Regular image.
+		nv::Image image;
+		if (!image.load(input))
 		{
-			loadAsFloat = true;
-		}
-
-		if (loadAsFloat)
-		{
-			nv::AutoPtr<nv::FloatImage> image(nv::ImageIO::loadFloat(input));
-
-			if (image == NULL)
-			{
-				fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str());
-				return 1;
-			}
-			
-			inputOptions.setFormat(nvtt::InputFormat_RGBA_32F);
-			inputOptions.setTextureLayout(nvtt::TextureType_2D, image->width(), image->height());
-
-			for (uint i = 0; i < image->componentNum(); i++)
-			{
-				inputOptions.setMipmapChannelData(image->channel(i), i, image->width(), image->height());
-			}
-		}
-		else
-		{
-			// Regular image.
-			nv::Image image;
-			if (!image.load(input))
-			{
-				fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str());
-				return 1;
-			}
-			
-			inputOptions.setTextureLayout(nvtt::TextureType_2D, image.width(), image.height());
-			inputOptions.setMipmapData(image.pixels(), image.width(), image.height());
+			fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str());
+			return 1;
 		}
+		
+		inputOptions.setTextureLayout(nvtt::TextureType_2D, image.width(), image.height());
+		inputOptions.setMipmapData(image.pixels(), image.width(), image.height());
 	}

 	if (wrapRepeat)
@ -432,15 +355,6 @@ int main(int argc, char *argv[])
 		inputOptions.setWrapMode(nvtt::WrapMode_Clamp);
 	}

-	if (alpha)
-	{
-		inputOptions.setAlphaMode(nvtt::AlphaMode_Transparency);
-	}
-	else
-	{
-		inputOptions.setAlphaMode(nvtt::AlphaMode_None);
-	}
-
 	if (normal)
 	{
 		setNormalMap(inputOptions);
@ -459,23 +373,8 @@ int main(int argc, char *argv[])
 		inputOptions.setMipmapGeneration(false);
 	}

-	if (premultiplyAlpha)
-	{
-		inputOptions.setPremultiplyAlpha(true);
-		inputOptions.setAlphaMode(nvtt::AlphaMode_Premultiplied);
-	}
-	
-	inputOptions.setMipmapFilter(mipmapFilter);
-
 	nvtt::CompressionOptions compressionOptions;
 	compressionOptions.setFormat(format);
-
-	/*if (format == nvtt::Format_RGBA)
-	{
-		compressionOptions.setPixelType(nvtt::PixelType_Float);
-		compressionOptions.setPixelFormat(16, 16, 16, 16);
-	}*/
-
 	if (fast)
 	{
 		compressionOptions.setQuality(nvtt::Quality_Fastest);
@ -506,20 +405,20 @@ int main(int argc, char *argv[])
 		return 1;
 	}

-	nvtt::Context context;
-	context.enableCudaAcceleration(!nocuda);
+	nvtt::Compressor compressor;
+	compressor.enableCudaAcceleration(!nocuda);

 	printf("CUDA acceleration ");
-	if (context.isCudaAccelerationEnabled())
+	if (compressor.isCudaAccelerationEnabled())
 	{
 		printf("ENABLED\n\n");
 	}
 	else
 	{
 		printf("DISABLED\n\n");
-	}
+	}	
 	
-	outputHandler.setTotal(context.estimateSize(inputOptions, compressionOptions));
+	outputHandler.setTotal(compressor.estimateSize(inputOptions, compressionOptions));
 	outputHandler.setDisplayProgress(!silent);

 	nvtt::OutputOptions outputOptions;
@ -527,24 +426,30 @@ int main(int argc, char *argv[])
 	outputOptions.setOutputHandler(&outputHandler);
 	outputOptions.setErrorHandler(&errorHandler);
 	
-	if (dds10)
-	{
-		outputOptions.setContainer(nvtt::Container_DDS10);
-	}
-
 //	printf("Press ENTER.\n");
 //	fflush(stdout);
 //	getchar();

-	Timer timer;
-	timer.start();
-	
-	bool success = context.process(inputOptions, compressionOptions, outputOptions);
+/*	LARGE_INTEGER temp;
+	QueryPerformanceFrequency((LARGE_INTEGER*) &temp);
+	double freq = ((double) temp.QuadPart) / 1000.0;

-	if (success)
-	{
-		printf("\rtime taken: %.3f seconds\n", float(timer.elapsed()) / 1000.0f);
-	}
+    LARGE_INTEGER start_time;
+    QueryPerformanceCounter((LARGE_INTEGER*) &start_time);
+*/
+	clock_t start = clock();
+	
+	compressor.process(inputOptions, compressionOptions, outputOptions);
+/*
+	LARGE_INTEGER end_time;
+	QueryPerformanceCounter((LARGE_INTEGER*) &end_time);
+
+	float diff_time = (float) (((double) end_time.QuadPart - (double) start_time.QuadPart) / freq);
+	printf("\rtime taken: %.3f seconds\n", diff_time/1000);
+*/
+
+	clock_t end = clock();
+	printf("\rtime taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
 	
 	return 0;
 }
--- a/src/nvtt/tools/ui/configdialog.cpp
+++ b/src/nvtt/tools/ui/configdialog.cpp
--- a/src/nvtt/tools/ui/configdialog.h
+++ b/src/nvtt/tools/ui/configdialog.h
--- a/src/nvtt/tools/ui/configdialog.ui
+++ b/src/nvtt/tools/ui/configdialog.ui
@ -12,6 +12,9 @@
  <property name="windowTitle" >
   <string>NVIDIA Texture Tools</string>
  </property>
+  <property name="windowIcon" >
+   <iconset/>
+  </property>
  <property name="sizeGripEnabled" >
   <bool>true</bool>
  </property>
--- a/src/nvtt/tools/decompress.cpp
+++ b/src/nvtt/tools/decompress.cpp
@ -31,161 +31,41 @@

 #include "cmdline.h"

-#include <time.h> // clock
-
 int main(int argc, char *argv[])
 {
 	MyAssertHandler assertHandler;
 	MyMessageHandler messageHandler;

-	bool forcenormal = false;
-	bool mipmaps = false;
-	bool faces = false;
-	bool savePNG = false;
-
-	nv::Path input;
-	nv::Path output;
-
-	// Parse arguments.
-	for (int i = 1; i < argc; i++)
- 	{
-		if (strcmp("-forcenormal", argv[i]) == 0)
-		{
-			forcenormal = true;
-		}
-		else if (strcmp("-mipmaps", argv[i]) == 0)
-		{
-			mipmaps = true;
-		}
-		else if (strcmp("-faces", argv[i]) == 0)
-		{
-			faces = true;
-		}
-		else if (strcmp("-format", argv[i]) == 0)
-		{
-			if (i+1 == argc) break;
-			i++;
-
-#ifdef HAVE_PNG
-			if (strcmp("png", argv[i]) == 0) savePNG = true;
-			else 
-#endif
-			if (strcmp("tga", argv[i]) == 0) savePNG = false;
-			else
-			{
-				fprintf(stderr, "Unsupported output format '%s', defaulting to 'tga'.\n", argv[i]);
-				savePNG = false;
-			}
-		}
-		else if (argv[i][0] != '-')
-		{
-			input = argv[i];
-
-			if (i+1 < argc && argv[i+1][0] != '-')
-			{
-				output = argv[i+1];
-			}
-			else
-			{
-				output.copy(input.str());
-			}
-
-			break;
-		}
-	}
-	
-	printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n");
-
-	if (input.isNull())
+	if (argc != 2)
 	{
-		printf("usage: nvdecompress [options] infile [outfile]\n\n");
-
-		printf("Note: the .tga or .png extension is forced on outfile\n\n");
-
-		printf("Input options:\n");
-		printf("  -forcenormal    \tThe input image is a normal map.\n");
-		printf("  -mipmaps        \tDecompress all mipmaps.\n");
-		printf("  -faces          \tDecompress all faces.\n");
-		printf("  -format <format>\tOutput format ('tga' or 'png').\n");
-
- 		return 1;
- 	}
+		printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n");
+		printf("usage: nvdecompress 'ddsfile'\n\n");
+		return 1;
+	}

 	// Load surface.
-	nv::DirectDrawSurface dds(input);
+	nv::DirectDrawSurface dds(argv[1]);
 	if (!dds.isValid())
 	{
-		fprintf(stderr, "The file '%s' is not a valid DDS file.\n", input.str());
-		return 1;
-	}
-
-	if (!dds.isSupported() || dds.isTexture3D())
-	{
-		fprintf(stderr, "The file '%s' is not a supported DDS file.\n", input.str());
+		printf("The file '%s' is not a valid DDS file.\n", argv[1]);
 		return 1;
 	}
 	
-	uint faceCount;
-	if (dds.isTexture2D())
-	{
-		faceCount = 1;
-	}
-	else
-	{
-		nvCheck(dds.isTextureCube());
-		faceCount = 6;
+	nv::Path name(argv[1]);
+	name.stripExtension();
+	name.append(".tga");
+	
+	nv::StdOutputStream stream(name.str());
+	if (stream.isError()) {
+		printf("Error opening '%s' for writting\n", name.str());
+		return 1;
 	}
 	
-	uint mipmapCount = dds.mipmapCount();
-	
-	clock_t start = clock();
- 
-	// apply arguments
-	if (forcenormal)
-	{
-		dds.setNormalFlag(true);
-	}
-	if (!faces)
-	{
-		faceCount = 1;
-	}
-	if (!mipmaps)
-	{
-		mipmapCount = 1;
-	}
+	// @@ TODO: Add command line options to output mipmaps, cubemap faces, etc.
+	nv::Image img;
+	dds.mipmap(&img, 0, 0); // get first image
+	nv::ImageIO::saveTGA(stream, &img);

-	nv::Image mipmap;	
-	nv::Path name;
-
-	// strip extension, we force the tga extension
-	output.stripExtension();
-
-	// extract faces and mipmaps
-	for (uint f = 0; f < faceCount; f++)
-	{
-		for (uint m = 0; m < mipmapCount; m++)
-		{
-			dds.mipmap(&mipmap, f, m);
-	
-			// set output filename, if we are doing faces and/or mipmaps
-			name.copy(output);
-			if (faces) name.appendFormat("_face%d", f);
-			if (mipmaps) name.appendFormat("_mipmap%d", m);
-			name.append(savePNG ? ".png" : ".tga");
-			
-			nv::StdOutputStream stream(name.str());
-			if (stream.isError()) {
-				fprintf(stderr, "Error opening '%s' for writting\n", name.str());
-				return 1;
-			}
-			
-			nv::ImageIO::save(name, stream, &mipmap);
-		}
-	}
-
-	clock_t end = clock();
-	printf("\rtime taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
-	
 	return 0;
 }

--- a/src/nvtt/tools/ui/main.cpp
+++ b/src/nvtt/tools/ui/main.cpp
--- a/src/nvtt/tools/nvtt-thumbnailer.schema.in
+++ b/src/nvtt/tools/nvtt-thumbnailer.schema.in
@ -1,26 +0,0 @@
-<gconfschemafile>
-  <schemalist>
-<schema>
-<key>/schemas/desktop/gnome/thumbnailers/image@x-dds/enable</key>
-<applyto>/desktop/gnome/thumbnailers/image@x-dds/enable</applyto>
-<owner>nvtt-thumbnailer</owner>
-<type>bool</type>
-<default>true</default>
-<locale name="C">
-<short></short>
-<long></long>
-</locale>
-</schema>
-<schema>
-<key>/schemas/desktop/gnome/thumbnailers/image@x-dds/command</key>
-<applyto>/desktop/gnome/thumbnailers/image@x-dds/command</applyto>
-<owner>nvtt-thumbnailer</owner>
-<type>string</type>
-<default>@CMAKE_INSTALL_PREFIX@/bin/nv-gnome-thumbnailer -s %s %i %o</default>
-<locale name="C">
-<short></short>
-<long></long>
-</locale>
-</schema>
-  </schemalist>
-</gconfschemafile>
--- a/src/nvtt/tools/resize.cpp
+++ b/src/nvtt/tools/resize.cpp
@ -176,7 +176,7 @@ int main(int argc, char *argv[])
 	result->setFormat(nv::Image::Format_ARGB);

 	nv::StdOutputStream stream(output);
-	nv::ImageIO::save(output, stream, result.ptr());
+	nv::ImageIO::saveTGA(stream, result.ptr());	// @@ Add generic save function. Add support for png too.
 	
 	return 0;
 }
--- a/src/nvtt/tools/thumbnailer.cpp
+++ b/src/nvtt/tools/thumbnailer.cpp
@ -1,158 +0,0 @@
-// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
-// 
-// Permission is hereby granted, free of charge, to any person
-// obtaining a copy of this software and associated documentation
-// files (the "Software"), to deal in the Software without
-// restriction, including without limitation the rights to use,
-// copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following
-// conditions:
-// 
-// The above copyright notice and this permission notice shall be
-// included in all copies or substantial portions of the Software.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-// OTHER DEALINGS IN THE SOFTWARE.
-
-#include <nvcore/Ptr.h>
-#include <nvcore/StrLib.h>
-#include <nvcore/StdStream.h>
-#include <nvcore/Containers.h>
-
-#include <nvimage/Image.h>
-#include <nvimage/ImageIO.h>
-#include <nvimage/FloatImage.h>
-#include <nvimage/Filter.h>
-#include <nvimage/DirectDrawSurface.h>
-
-#include <nvmath/Color.h>
-#include <nvmath/Vector.h>
-
-#include <math.h>
-
-#include "cmdline.h"
-
-static bool loadImage(nv::Image & image, const char * fileName)
-{
-	if (nv::strCaseCmp(nv::Path::extension(fileName), ".dds") == 0)
-	{
-		nv::DirectDrawSurface dds(fileName);
-		if (!dds.isValid())
-		{
-			fprintf(stderr, "The file '%s' is not a valid DDS file.\n", fileName);
-			return false;
-		}
-		
-		dds.mipmap(&image, 0, 0); // get first image
-	}
-	else
-	{
-		// Regular image.
-		if (!image.load(fileName))
-		{
-			fprintf(stderr, "The file '%s' is not a supported image type.\n", fileName);
-			return false;
-		}
-	}
-
-	return true;
-}
-
-
-int main(int argc, char *argv[])
-{
-	//MyAssertHandler assertHandler;
-	MyMessageHandler messageHandler;
-
-	float gamma = 2.2f;
-	nv::Path input;
-	nv::Path output;
-	int size = 128;
-	
-	// Parse arguments.
-	for (int i = 1; i < argc; i++)
-	{
-		// Input options.
-		if (strcmp("-s", argv[i]) == 0)
-		{
-			if (i+1 < argc && argv[i+1][0] != '-') {
-				size = (int)atoi(argv[i+1]);
-				i++;
-			}
-		}
-		else if (argv[i][0] != '-')
-		{
-			input = argv[i];
-
-			if (i+1 < argc && argv[i+1][0] != '-') {
-				output = argv[i+1];
-			}
-			else
-			{
-				fprintf(stderr, "No output filename.\n");
-				return 1;
-			}
-
-			break;
-		}
-	}
-
-	if (input.isNull() || output.isNull())
-	{
-		printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n");	
-		
-		printf("usage: nv-gnome-thumbnailer [options] input output\n\n");
-		
-		printf("Options:\n");
-		printf("  -s size\tThumbnail size (default = 128)\n");
-
-		return 1;
-	}
-	
-	nv::Image image;
-	if (!loadImage(image, input)) return 1;
-
-	nv::ImageIO::ImageMetaData metaData;
-	metaData.tagMap.add("Thumb::Image::Width", nv::StringBuilder().number (image.width()));
-	metaData.tagMap.add("Thumb::Image::Height", nv::StringBuilder().number (image.height()));
-
-	if ((image.width() > size) || (image.height() > size))
-	{
-		nv::FloatImage fimage(&image);
-		fimage.toLinear(0, 3, gamma);
-
-		uint thumbW, thumbH;
-		if (image.width() > image.height())
-		{
-			thumbW = size;
-			thumbH = uint ((float (image.height()) / float (image.width())) * size);
-		}
-		else
-		{
-			thumbW = uint ((float (image.width()) / float (image.height())) * size);
-			thumbH = size;
-		}
-		nv::AutoPtr<nv::FloatImage> fresult(fimage.resize(nv::BoxFilter(), thumbW, thumbH, nv::FloatImage::WrapMode_Clamp));
-		
-		nv::AutoPtr<nv::Image> result(fresult->createImageGammaCorrect(gamma));
-		result->setFormat(nv::Image::Format_ARGB);
-
-		nv::StdOutputStream stream(output);
-		nv::ImageIO::save(output, stream, result.ptr(), &metaData);
-	}
-	else
-	{
-		nv::StdOutputStream stream(output);
-		nv::ImageIO::save(output, stream, &image, &metaData);
-	}
-	
-	return 0;
-}
-