Tag 2.0.8 for release.

2010-05-14 18:01:41 +00:00
parent f6a39d6eab
commit eb01ca604f
375 changed files with 12760 additions and 28091 deletions
--- a/src/nvtt/CMakeLists.txt
+++ b/src/nvtt/CMakeLists.txt
@ -5,17 +5,14 @@ ADD_SUBDIRECTORY(squish)
 SET(NVTT_SRCS
 	nvtt.h 
 	nvtt.cpp
-	Context.h
-	Context.cpp
+	Compressor.h
+	Compressor.cpp
 	nvtt_wrapper.h
 	nvtt_wrapper.cpp
-	Compressor.h
-	CompressorDXT.h
-	CompressorDXT.cpp
-	CompressorRGB.h
-	CompressorRGB.cpp
-	CompressorRGBE.h
-	CompressorRGBE.cpp
+	CompressDXT.h
+	CompressDXT.cpp
+	CompressRGB.h
+	CompressRGB.cpp
 	QuickCompressDXT.h
 	QuickCompressDXT.cpp
 	OptimalCompressDXT.h
@ -27,27 +24,27 @@ SET(NVTT_SRCS
 	InputOptions.cpp
 	OutputOptions.h
 	OutputOptions.cpp
-	TexImage.h TexImage.cpp
 	cuda/CudaUtils.h
 	cuda/CudaUtils.cpp
 	cuda/CudaMath.h
-	cuda/BitmapTable.h
-	cuda/CudaCompressorDXT.h
-	cuda/CudaCompressorDXT.cpp)
+	cuda/Bitmaps.h
+	cuda/CudaCompressDXT.h
+	cuda/CudaCompressDXT.cpp)

-IF (CUDA_FOUND)
+IF(CUDA_FOUND)
 	ADD_DEFINITIONS(-DHAVE_CUDA)
-	CUDA_COMPILE(CUDA_SRCS cuda/CompressKernel.cu)
+	WRAP_CUDA(CUDA_SRCS cuda/CompressKernel.cu)
 	SET(NVTT_SRCS ${NVTT_SRCS} ${CUDA_SRCS})
 	SET(LIBS ${LIBS} ${CUDA_LIBRARIES})
-	INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS})
-ENDIF (CUDA_FOUND)
+	INCLUDE_DIRECTORIES(${CUDA_INCLUDE_PATH})
+ENDIF(CUDA_FOUND)

 INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})

 ADD_DEFINITIONS(-DNVTT_EXPORTS)

-IF(NVTT_SHARED)	
+IF(NVTT_SHARED)
+	ADD_DEFINITIONS(-DNVTT_SHARED=1)
 	ADD_LIBRARY(nvtt SHARED ${NVTT_SRCS})
 ELSE(NVTT_SHARED)
 	ADD_LIBRARY(nvtt ${NVTT_SRCS})
@ -63,5 +60,54 @@ INSTALL(TARGETS nvtt
 INSTALL(FILES nvtt.h DESTINATION include/nvtt)


-ADD_SUBDIRECTORY(tools)
-ADD_SUBDIRECTORY(tests)
+
+# test executables
+ADD_EXECUTABLE(nvcompress tools/compress.cpp tools/cmdline.h)
+TARGET_LINK_LIBRARIES(nvcompress nvcore nvmath nvimage nvtt)
+
+ADD_EXECUTABLE(nvdecompress tools/decompress.cpp tools/cmdline.h)
+TARGET_LINK_LIBRARIES(nvdecompress nvcore nvmath nvimage)
+
+ADD_EXECUTABLE(nvddsinfo tools/ddsinfo.cpp tools/cmdline.h)
+TARGET_LINK_LIBRARIES(nvddsinfo nvcore nvmath nvimage)
+
+ADD_EXECUTABLE(nvimgdiff tools/imgdiff.cpp tools/cmdline.h)
+TARGET_LINK_LIBRARIES(nvimgdiff nvcore nvmath nvimage)
+
+ADD_EXECUTABLE(nvassemble tools/assemble.cpp tools/cmdline.h)
+TARGET_LINK_LIBRARIES(nvassemble nvcore nvmath nvimage)
+
+ADD_EXECUTABLE(filtertest tests/filtertest.cpp tools/cmdline.h)
+TARGET_LINK_LIBRARIES(filtertest nvcore nvmath nvimage)
+
+ADD_EXECUTABLE(nvzoom tools/resize.cpp tools/cmdline.h)
+TARGET_LINK_LIBRARIES(nvzoom nvcore nvmath nvimage)
+
+INSTALL(TARGETS nvcompress nvdecompress nvddsinfo nvimgdiff nvassemble nvzoom DESTINATION bin)
+
+# UI tools
+IF(QT4_FOUND AND NOT MSVC)
+	SET(QT_USE_QTOPENGL TRUE)
+	INCLUDE_DIRECTORIES(${QT_INCLUDE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
+	
+	SET(SRCS
+		tools/main.cpp
+		tools/configdialog.h
+		tools/configdialog.cpp)
+
+	SET(LIBS
+		nvtt
+		${QT_QTCORE_LIBRARY}
+		${QT_QTGUI_LIBRARY}
+		${QT_QTOPENGL_LIBRARY})
+
+	QT4_WRAP_UI(UICS tools/configdialog.ui)
+	QT4_WRAP_CPP(MOCS tools/configdialog.h)
+	#QT4_ADD_RESOURCES(RCCS tools/configdialog.rc)
+
+	ADD_EXECUTABLE(nvcompressui MACOSX_BUNDLE ${SRCS} ${UICS} ${MOCS})
+	TARGET_LINK_LIBRARIES(nvcompressui ${LIBS})
+
+ENDIF(QT4_FOUND AND NOT MSVC)
+
+
--- a/src/nvtt/CompressDXT.cpp
+++ b/src/nvtt/CompressDXT.cpp
@ -0,0 +1,597 @@
+// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
+// 
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
+// 
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+
+#include <nvcore/Memory.h>
+
+#include <nvimage/Image.h>
+#include <nvimage/ColorBlock.h>
+#include <nvimage/BlockDXT.h>
+
+#include "nvtt.h"
+#include "CompressDXT.h"
+#include "QuickCompressDXT.h"
+#include "OptimalCompressDXT.h"
+#include "CompressionOptions.h"
+#include "OutputOptions.h"
+
+// squish
+#include "squish/colourset.h"
+//#include "squish/clusterfit.h"
+#include "squish/fastclusterfit.h"
+#include "squish/weightedclusterfit.h"
+
+
+// s3_quant
+#if defined(HAVE_S3QUANT)
+#include "s3tc/s3_quant.h"
+#endif
+
+// ati tc
+#if defined(HAVE_ATITC)
+#include "atitc/ATI_Compress.h"
+#endif
+
+//#include <time.h>
+
+using namespace nv;
+using namespace nvtt;
+
+
+nv::FastCompressor::FastCompressor() : m_image(NULL), m_alphaMode(AlphaMode_None)
+{
+}
+
+nv::FastCompressor::~FastCompressor()
+{
+}
+
+void nv::FastCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode)
+{
+	m_image = image;
+	m_alphaMode = alphaMode;
+}
+
+void nv::FastCompressor::compressDXT1(const OutputOptions::Private & outputOptions)
+{
+	const uint w = m_image->width();
+	const uint h = m_image->height();
+	
+	ColorBlock rgba;
+	BlockDXT1 block;
+
+	for (uint y = 0; y < h; y += 4) {
+		for (uint x = 0; x < w; x += 4) {
+			rgba.init(m_image, x, y);
+			
+			QuickCompress::compressDXT1(rgba, &block);
+			
+			if (outputOptions.outputHandler != NULL) {
+				outputOptions.outputHandler->writeData(&block, sizeof(block));
+			}
+		}
+	}
+}
+
+
+void nv::FastCompressor::compressDXT1a(const OutputOptions::Private & outputOptions)
+{
+	const uint w = m_image->width();
+	const uint h = m_image->height();
+	
+	ColorBlock rgba;
+	BlockDXT1 block;
+
+	for (uint y = 0; y < h; y += 4) {
+		for (uint x = 0; x < w; x += 4) {
+			rgba.init(m_image, x, y);
+			
+			QuickCompress::compressDXT1a(rgba, &block);
+			
+			if (outputOptions.outputHandler != NULL) {
+				outputOptions.outputHandler->writeData(&block, sizeof(block));
+			}
+		}
+	}
+}
+
+
+void nv::FastCompressor::compressDXT3(const nvtt::OutputOptions::Private & outputOptions)
+{
+	const uint w = m_image->width();
+	const uint h = m_image->height();
+	
+	ColorBlock rgba;
+	BlockDXT3 block;
+
+	for (uint y = 0; y < h; y += 4) {
+		for (uint x = 0; x < w; x += 4) {
+			rgba.init(m_image, x, y);
+
+			QuickCompress::compressDXT3(rgba, &block);
+			
+			if (outputOptions.outputHandler != NULL) {
+				outputOptions.outputHandler->writeData(&block, sizeof(block));
+			}
+		}
+	}
+}
+
+
+void nv::FastCompressor::compressDXT5(const nvtt::OutputOptions::Private & outputOptions)
+{
+	const uint w = m_image->width();
+	const uint h = m_image->height();
+	
+	ColorBlock rgba;
+	BlockDXT5 block;
+
+	for (uint y = 0; y < h; y += 4) {
+		for (uint x = 0; x < w; x += 4) {
+			rgba.init(m_image, x, y);
+			
+			QuickCompress::compressDXT5(rgba, &block, 0);
+			
+			if (outputOptions.outputHandler != NULL) {
+				outputOptions.outputHandler->writeData(&block, sizeof(block));
+			}
+		}
+	}
+}
+
+
+void nv::FastCompressor::compressDXT5n(const nvtt::OutputOptions::Private & outputOptions)
+{
+	const uint w = m_image->width();
+	const uint h = m_image->height();
+	
+	ColorBlock rgba;
+	BlockDXT5 block;
+
+	for (uint y = 0; y < h; y += 4) {
+		for (uint x = 0; x < w; x += 4) {
+			rgba.init(m_image, x, y);
+			
+			rgba.swizzleDXT5n();
+
+			QuickCompress::compressDXT5(rgba, &block, 0);
+			
+			if (outputOptions.outputHandler != NULL) {
+				outputOptions.outputHandler->writeData(&block, sizeof(block));
+			}
+		}
+	}
+}
+
+
+nv::SlowCompressor::SlowCompressor() : m_image(NULL), m_alphaMode(AlphaMode_None)
+{
+}
+
+nv::SlowCompressor::~SlowCompressor()
+{
+}
+
+void nv::SlowCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode)
+{
+	m_image = image;
+	m_alphaMode = alphaMode;
+}
+
+void nv::SlowCompressor::compressDXT1(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
+{
+	const uint w = m_image->width();
+	const uint h = m_image->height();
+	
+	ColorBlock rgba;
+	BlockDXT1 block;
+
+	squish::WeightedClusterFit fit;
+	//squish::ClusterFit fit;
+	//squish::FastClusterFit fit;
+	fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
+
+	for (uint y = 0; y < h; y += 4) {
+		for (uint x = 0; x < w; x += 4) {
+			
+			rgba.init(m_image, x, y);
+			
+			if (rgba.isSingleColor())
+			{
+				OptimalCompress::compressDXT1(rgba.color(0), &block);
+			}
+			else
+			{
+				squish::ColourSet colours((uint8 *)rgba.colors(), 0, true);
+				fit.SetColourSet(&colours, squish::kDxt1);
+				fit.Compress(&block);
+			}
+			
+			if (outputOptions.outputHandler != NULL) {
+				outputOptions.outputHandler->writeData(&block, sizeof(block));
+			}
+		}
+	}
+}
+
+
+void nv::SlowCompressor::compressDXT1a(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
+{
+	const uint w = m_image->width();
+	const uint h = m_image->height();
+	
+	ColorBlock rgba;
+	BlockDXT1 block;
+
+	squish::WeightedClusterFit fit;
+	fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
+
+	for (uint y = 0; y < h; y += 4) {
+		for (uint x = 0; x < w; x += 4) {
+			
+			rgba.init(m_image, x, y);
+			
+			bool anyAlpha = false;
+			bool allAlpha = true;
+			
+			for (uint i = 0; i < 16; i++)
+			{
+				if (rgba.color(i).a < 128) anyAlpha = true;
+				else allAlpha = false;
+			}
+			
+			if ((!anyAlpha && rgba.isSingleColor() || allAlpha))
+			{
+				OptimalCompress::compressDXT1a(rgba.color(0), &block);
+			}
+			else
+			{
+				squish::ColourSet colours((uint8 *)rgba.colors(), squish::kDxt1|squish::kWeightColourByAlpha);
+				fit.SetColourSet(&colours, squish::kDxt1);
+				fit.Compress(&block);
+			}
+			
+			if (outputOptions.outputHandler != NULL) {
+				outputOptions.outputHandler->writeData(&block, sizeof(block));
+			}
+		}
+	}
+}
+
+
+void nv::SlowCompressor::compressDXT3(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
+{
+	const uint w = m_image->width();
+	const uint h = m_image->height();
+	
+	ColorBlock rgba;
+	BlockDXT3 block;
+	
+	squish::WeightedClusterFit fit;
+	//squish::FastClusterFit fit;
+	fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
+
+	for (uint y = 0; y < h; y += 4) {
+		for (uint x = 0; x < w; x += 4) {
+			
+			rgba.init(m_image, x, y);
+			
+			// Compress explicit alpha.
+			OptimalCompress::compressDXT3A(rgba, &block.alpha);
+
+			// Compress color.
+			if (rgba.isSingleColor())
+			{
+				OptimalCompress::compressDXT1(rgba.color(0), &block.color);
+			}
+			else
+			{
+				squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
+				fit.SetColourSet(&colours, 0);
+				fit.Compress(&block.color);
+			}
+			
+			if (outputOptions.outputHandler != NULL) {
+				outputOptions.outputHandler->writeData(&block, sizeof(block));
+			}
+		}
+	}
+}
+
+void nv::SlowCompressor::compressDXT5(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
+{
+	const uint w = m_image->width();
+	const uint h = m_image->height();
+	
+	ColorBlock rgba;
+	BlockDXT5 block;
+
+	squish::WeightedClusterFit fit;
+	fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
+
+	for (uint y = 0; y < h; y += 4) {
+		for (uint x = 0; x < w; x += 4) {
+			
+			rgba.init(m_image, x, y);
+
+			// Compress alpha.
+			if (compressionOptions.quality == Quality_Highest)
+			{
+				OptimalCompress::compressDXT5A(rgba, &block.alpha);
+			}
+			else
+			{
+				QuickCompress::compressDXT5A(rgba, &block.alpha);
+			}
+		
+			// Compress color.
+			if (rgba.isSingleColor())
+			{
+				OptimalCompress::compressDXT1(rgba.color(0), &block.color);
+			}
+			else
+			{
+				squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
+				fit.SetColourSet(&colours, 0);
+				fit.Compress(&block.color);
+			}
+			
+			if (outputOptions.outputHandler != NULL) {
+				outputOptions.outputHandler->writeData(&block, sizeof(block));
+			}
+		}
+	}
+}
+
+
+void nv::SlowCompressor::compressDXT5n(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
+{
+	const uint w = m_image->width();
+	const uint h = m_image->height();
+	
+	ColorBlock rgba;
+	BlockDXT5 block;
+	
+	for (uint y = 0; y < h; y += 4) {
+		for (uint x = 0; x < w; x += 4) {
+			
+			rgba.init(m_image, x, y);
+			
+			rgba.swizzleDXT5n();			
+			
+			// Compress X.
+			if (compressionOptions.quality == Quality_Highest)
+			{
+				OptimalCompress::compressDXT5A(rgba, &block.alpha);
+			}
+			else
+			{
+				QuickCompress::compressDXT5A(rgba, &block.alpha);
+			}
+			
+			// Compress Y.
+			OptimalCompress::compressDXT1G(rgba, &block.color);
+			
+			if (outputOptions.outputHandler != NULL) {
+				outputOptions.outputHandler->writeData(&block, sizeof(block));
+			}
+		}
+	}
+}
+
+
+void nv::SlowCompressor::compressBC4(const CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
+{
+	const uint w = m_image->width();
+	const uint h = m_image->height();
+	
+	ColorBlock rgba;
+	AlphaBlockDXT5 block;
+	
+	for (uint y = 0; y < h; y += 4) {
+		for (uint x = 0; x < w; x += 4) {
+			
+			rgba.init(m_image, x, y);
+
+			if (compressionOptions.quality == Quality_Highest)
+			{
+				OptimalCompress::compressDXT5A(rgba, &block);
+			}
+			else
+			{
+				QuickCompress::compressDXT5A(rgba, &block);
+			}
+
+			if (outputOptions.outputHandler != NULL) {
+				outputOptions.outputHandler->writeData(&block, sizeof(block));
+			}
+		}
+	}
+}
+
+
+void nv::SlowCompressor::compressBC5(const CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
+{
+	const uint w = m_image->width();
+	const uint h = m_image->height();
+
+	ColorBlock xcolor;
+	ColorBlock ycolor;
+
+	BlockATI2 block;
+
+	for (uint y = 0; y < h; y += 4) {
+		for (uint x = 0; x < w; x += 4) {
+			
+			xcolor.init(m_image, x, y);
+			xcolor.splatX();
+			
+			ycolor.init(m_image, x, y);
+			ycolor.splatY();
+
+			if (compressionOptions.quality == Quality_Highest)
+			{
+				OptimalCompress::compressDXT5A(xcolor, &block.x);
+				OptimalCompress::compressDXT5A(ycolor, &block.y);
+			}
+			else
+			{
+				QuickCompress::compressDXT5A(xcolor, &block.x);
+				QuickCompress::compressDXT5A(ycolor, &block.y);
+			}
+
+			if (outputOptions.outputHandler != NULL) {
+				outputOptions.outputHandler->writeData(&block, sizeof(block));
+			}
+		}
+	}
+}
+
+
+#if defined(HAVE_S3QUANT)
+
+void nv::s3CompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions)
+{
+	const uint w = image->width();
+	const uint h = image->height();
+	
+	float error = 0.0f;
+
+	BlockDXT1 dxtBlock3;
+	BlockDXT1 dxtBlock4;
+	ColorBlock block;
+
+	for (uint y = 0; y < h; y += 4) {
+		for (uint x = 0; x < w; x += 4) {
+			block.init(image, x, y);
+
+			// Init rgb block.
+			RGBBlock rgbBlock;
+			rgbBlock.n = 16;
+			for (uint i = 0; i < 16; i++) {
+				rgbBlock.colorChannel[i][0] = clamp(float(block.color(i).r) / 255.0f, 0.0f, 1.0f);
+				rgbBlock.colorChannel[i][1] = clamp(float(block.color(i).g) / 255.0f, 0.0f, 1.0f);
+				rgbBlock.colorChannel[i][2] = clamp(float(block.color(i).b) / 255.0f, 0.0f, 1.0f);
+			}
+			rgbBlock.weight[0] = 1.0f;
+			rgbBlock.weight[1] = 1.0f;
+			rgbBlock.weight[2] = 1.0f;
+
+			rgbBlock.inLevel = 4;
+			CodeRGBBlock(&rgbBlock);
+
+			// Copy results to DXT block.
+			dxtBlock4.col0.r = rgbBlock.endPoint[0][0];
+			dxtBlock4.col0.g = rgbBlock.endPoint[0][1];
+			dxtBlock4.col0.b = rgbBlock.endPoint[0][2];
+
+			dxtBlock4.col1.r = rgbBlock.endPoint[1][0];
+			dxtBlock4.col1.g = rgbBlock.endPoint[1][1];
+			dxtBlock4.col1.b = rgbBlock.endPoint[1][2];
+
+			dxtBlock4.setIndices(rgbBlock.index);
+
+			if (dxtBlock4.col0.u < dxtBlock4.col1.u) {
+				swap(dxtBlock4.col0.u, dxtBlock4.col1.u);
+				dxtBlock4.indices ^= 0x55555555;
+			}
+
+			uint error4 = blockError(block, dxtBlock4);
+
+			rgbBlock.inLevel = 3;
+
+			CodeRGBBlock(&rgbBlock);
+
+			// Copy results to DXT block.
+			dxtBlock3.col0.r = rgbBlock.endPoint[0][0];
+			dxtBlock3.col0.g = rgbBlock.endPoint[0][1];
+			dxtBlock3.col0.b = rgbBlock.endPoint[0][2];
+
+			dxtBlock3.col1.r = rgbBlock.endPoint[1][0];
+			dxtBlock3.col1.g = rgbBlock.endPoint[1][1];
+			dxtBlock3.col1.b = rgbBlock.endPoint[1][2];
+
+			dxtBlock3.setIndices(rgbBlock.index);
+
+			if (dxtBlock3.col0.u > dxtBlock3.col1.u) {
+				swap(dxtBlock3.col0.u, dxtBlock3.col1.u);
+				dxtBlock3.indices ^= (~dxtBlock3.indices  >> 1) & 0x55555555;
+			}
+
+			uint error3 = blockError(block, dxtBlock3);
+
+			if (error3 < error4) {
+				error += error3;
+
+				if (outputOptions.outputHandler != NULL) {
+					outputOptions.outputHandler->writeData(&dxtBlock3, sizeof(dxtBlock3));
+				}
+			}
+			else {
+				error += error4;
+
+				if (outputOptions.outputHandler != NULL) {
+					outputOptions.outputHandler->writeData(&dxtBlock4, sizeof(dxtBlock4));
+				}
+			}
+		}
+	}
+
+	printf("error = %f\n", error/((w+3)/4 * (h+3)/4));
+}
+
+#endif // defined(HAVE_S3QUANT)
+
+
+#if defined(HAVE_ATITC)
+
+void nv::atiCompressDXT1(const Image * image, const OutputOptions::Private & outputOptions)
+{
+	// Init source texture
+	ATI_TC_Texture srcTexture;
+	srcTexture.dwSize = sizeof(srcTexture);
+	srcTexture.dwWidth = image->width();
+	srcTexture.dwHeight = image->height();
+	srcTexture.dwPitch = image->width() * 4;
+	srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
+	srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
+	srcTexture.pData = (ATI_TC_BYTE*) image->pixels();
+
+	// Init dest texture
+	ATI_TC_Texture destTexture;
+	destTexture.dwSize = sizeof(destTexture);
+	destTexture.dwWidth = image->width();
+	destTexture.dwHeight = image->height();
+	destTexture.dwPitch = 0;
+	destTexture.format = ATI_TC_FORMAT_DXT1;
+	destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
+	destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize);
+
+	// Compress
+	ATI_TC_ConvertTexture(&srcTexture, &destTexture, NULL, NULL, NULL, NULL);
+
+	if (outputOptions.outputHandler != NULL) {
+		outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize);
+	}
+}
+
+#endif // defined(HAVE_ATITC)
--- a/src/nvtt/CompressDXT.h
+++ b/src/nvtt/CompressDXT.h
@ -0,0 +1,87 @@
+// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
+// 
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
+// 
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+
+#ifndef NV_TT_COMPRESSDXT_H
+#define NV_TT_COMPRESSDXT_H
+
+#include <nvimage/nvimage.h>
+#include "nvtt.h"
+
+namespace nv
+{
+	class Image;
+	class FloatImage;
+
+	class FastCompressor
+	{
+	public:
+		FastCompressor();
+		~FastCompressor();
+
+		void setImage(const Image * image, nvtt::AlphaMode alphaMode);
+
+		void compressDXT1(const nvtt::OutputOptions::Private & outputOptions);
+		void compressDXT1a(const nvtt::OutputOptions::Private & outputOptions);
+		void compressDXT3(const nvtt::OutputOptions::Private & outputOptions);
+		void compressDXT5(const nvtt::OutputOptions::Private & outputOptions);
+		void compressDXT5n(const nvtt::OutputOptions::Private & outputOptions);
+
+	private:
+		const Image * m_image;
+		nvtt::AlphaMode m_alphaMode;
+	};
+
+	class SlowCompressor
+	{
+	public:
+		SlowCompressor();
+		~SlowCompressor();
+
+		void setImage(const Image * image, nvtt::AlphaMode alphaMode);
+
+		void compressDXT1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
+		void compressDXT1a(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
+		void compressDXT3(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
+		void compressDXT5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
+		void compressDXT5n(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
+		void compressBC4(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
+		void compressBC5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
+
+	private:
+		const Image * m_image;
+		nvtt::AlphaMode m_alphaMode;
+	};
+
+	// External compressors.
+#if defined(HAVE_S3QUANT)
+	void s3CompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
+#endif
+	
+#if defined(HAVE_ATITC)
+	void atiCompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
+#endif
+
+} // nv namespace
+
+
+#endif // NV_TT_COMPRESSDXT_H
--- a/src/nvtt/CompressRGB.cpp
+++ b/src/nvtt/CompressRGB.cpp
@ -0,0 +1,140 @@
+// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
+// 
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
+// 
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+
+#include <nvcore/Debug.h>
+
+#include <nvimage/Image.h>
+#include <nvimage/PixelFormat.h>
+#include <nvmath/Color.h>
+
+#include "CompressRGB.h"
+#include "CompressionOptions.h"
+#include "OutputOptions.h"
+
+using namespace nv;
+using namespace nvtt;
+
+namespace 
+{
+
+	inline uint computePitch(uint w, uint bitsize)
+	{
+		uint p = w * ((bitsize + 7) / 8);
+
+		// Align to 32 bits.
+		return ((p + 3) / 4) * 4;
+	}
+
+	inline void convert_to_a8r8g8b8(const void * src, void * dst, uint w)
+	{
+		memcpy(dst, src, 4 * w);
+	}
+
+	inline void convert_to_x8r8g8b8(const void * src, void * dst, uint w)
+	{
+		memcpy(dst, src, 4 * w);
+	}
+
+} // namespace
+
+
+// Pixel format converter.
+void nv::compressRGB(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
+{
+	nvCheck(image != NULL);
+
+	const uint w = image->width();
+	const uint h = image->height();
+
+	const uint bitCount = compressionOptions.bitcount;
+	nvCheck(bitCount == 8 || bitCount == 16 || bitCount == 24 || bitCount == 32);
+
+	const uint byteCount = bitCount / 8;
+
+	const uint rmask = compressionOptions.rmask;
+	uint rshift, rsize;
+	PixelFormat::maskShiftAndSize(rmask, &rshift, &rsize);
+	
+	const uint gmask = compressionOptions.gmask;
+	uint gshift, gsize;
+	PixelFormat::maskShiftAndSize(gmask, &gshift, &gsize);
+	
+	const uint bmask = compressionOptions.bmask;
+	uint bshift, bsize;
+	PixelFormat::maskShiftAndSize(bmask, &bshift, &bsize);
+	
+	const uint amask = compressionOptions.amask;
+	uint ashift, asize;
+	PixelFormat::maskShiftAndSize(amask, &ashift, &asize);
+
+	// Determine pitch.
+	uint pitch = computePitch(w, compressionOptions.bitcount);
+
+	uint8 * dst = (uint8 *)mem::malloc(pitch + 4);
+
+	for (uint y = 0; y < h; y++)
+	{
+		const Color32 * src = image->scanline(y);
+
+		if (bitCount == 32 && rmask == 0xFF0000 && gmask == 0xFF00 && bmask == 0xFF && amask == 0xFF000000)
+		{
+			convert_to_a8r8g8b8(src, dst, w);
+		}
+		else if (bitCount == 32 && rmask == 0xFF0000 && gmask == 0xFF00 && bmask == 0xFF && amask == 0)
+		{
+			convert_to_x8r8g8b8(src, dst, w);
+		}
+		else
+		{
+			// Generic pixel format conversion.
+			for (uint x = 0; x < w; x++)
+			{
+				uint c = 0;
+				c |= PixelFormat::convert(src[x].r, 8, rsize) << rshift;
+				c |= PixelFormat::convert(src[x].g, 8, gsize) << gshift;
+				c |= PixelFormat::convert(src[x].b, 8, bsize) << bshift;
+				c |= PixelFormat::convert(src[x].a, 8, asize) << ashift;
+				
+				// Output one byte at a time.
+				for (uint i = 0; i < byteCount; i++)
+				{
+					*(dst + x * byteCount + i) = (c >> (i * 8)) & 0xFF;
+				}
+			}
+			
+			// Zero padding.
+			for (uint x = w * byteCount; x < pitch; x++)
+			{
+				*(dst + x) = 0;
+			}
+		}
+
+		if (outputOptions.outputHandler != NULL)
+		{
+			outputOptions.outputHandler->writeData(dst, pitch);
+		}
+	}
+
+	mem::free(dst);
+}
+
--- a/src/nvtt/CompressorRGBE.h
+++ b/src/nvtt/CompressorRGBE.h
@ -21,20 +21,19 @@
 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 // OTHER DEALINGS IN THE SOFTWARE.

-#ifndef NV_TT_COMPRESSORRGBE_H
-#define NV_TT_COMPRESSORRGBE_H
+#ifndef NV_TT_COMPRESSRGB_H
+#define NV_TT_COMPRESSRGB_H

 #include "nvtt.h"
-#include "Compressor.h"

 namespace nv
 {
-    struct CompressorRGBE : public CompressorInterface
-	{
-		virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, const void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
-	};
+	class Image;

+	// Pixel format converter.
+	void compressRGB(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
+	
 } // nv namespace


-#endif // NV_TT_COMPRESSORRGBE_H
+#endif // NV_TT_COMPRESSDXT_H
--- a/src/nvtt/CompressionOptions.cpp
+++ b/src/nvtt/CompressionOptions.cpp
@ -55,12 +55,6 @@ void CompressionOptions::reset()
 	m.rmask = 0x00FF0000;
 	m.amask = 0xFF000000;

-	m.rsize = 8;
-	m.gsize = 8;
-	m.bsize = 8;
-	m.asize = 8;
-	m.pixelType = PixelType_UnsignedNorm;
-
 	m.enableColorDithering = false;
 	m.enableAlphaDithering = false;
 	m.binaryAlpha = false;
@ -123,36 +117,8 @@ void CompressionOptions::setPixelFormat(uint bitcount, uint rmask, uint gmask, u
 	m.gmask = gmask;
 	m.bmask = bmask;
 	m.amask = amask;
-
-	m.rsize = 0;
-	m.gsize = 0;
-	m.bsize = 0;
-	m.asize = 0;
 }

-void CompressionOptions::setPixelFormat(uint8 rsize, uint8 gsize, uint8 bsize, uint8 asize)
-{
-	nvCheck(rsize <= 32 || gsize <= 32 || bsize <= 32 || asize <= 32);
-
-	m.bitcount = 0;
-	m.rmask = 0;
-	m.gmask = 0;
-	m.bmask = 0;
-	m.amask = 0;
-
-	m.rsize = rsize;
-	m.gsize = gsize;
-	m.bsize = bsize;
-	m.asize = asize;
-}
-
-/// Set pixel type.
-void CompressionOptions::setPixelType(PixelType pixelType)
-{
-	m.pixelType = pixelType;
-}
-
-
 /// Use external compressor.
 void CompressionOptions::setExternalCompressor(const char * name)
 {
--- a/src/nvtt/CompressionOptions.h
+++ b/src/nvtt/CompressionOptions.h
@ -45,12 +45,6 @@ namespace nvtt
 		uint gmask;
 		uint bmask;
 		uint amask;
-		uint8 rsize;
-		uint8 gsize;
-		uint8 bsize;
-		uint8 asize;
-		
-		PixelType pixelType;
 		
 		nv::String externalCompressor;

@ -59,15 +53,6 @@ namespace nvtt
 		bool enableAlphaDithering;
 		bool binaryAlpha;
 		int alphaThreshold;			// reference value used for binary alpha quantization.
-
-		uint getBitCount() const
-		{
-			if (format == Format_RGBA) {
-				if (bitcount != 0) return bitcount;
-				else return rsize + gsize + bsize + asize;
-			}
-			return 0;
-		}
 	};

 } // nvtt namespace
--- a/src/nvtt/Compressor.cpp
+++ b/src/nvtt/Compressor.cpp
@ -0,0 +1,853 @@
+// Copyright NVIDIA Corporation 2008 -- Ignacio Castano <icastano@nvidia.com>
+// 
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
+// 
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+
+#include <nvtt/nvtt.h>
+
+#include <nvcore/Memory.h>
+#include <nvcore/Ptr.h>
+
+#include <nvimage/DirectDrawSurface.h>
+#include <nvimage/ColorBlock.h>
+#include <nvimage/BlockDXT.h>
+#include <nvimage/Image.h>
+#include <nvimage/FloatImage.h>
+#include <nvimage/Filter.h>
+#include <nvimage/Quantize.h>
+#include <nvimage/NormalMap.h>
+#include <nvimage/PixelFormat.h>
+
+#include "Compressor.h"
+#include "InputOptions.h"
+#include "CompressionOptions.h"
+#include "OutputOptions.h"
+
+#include "CompressDXT.h"
+#include "CompressRGB.h"
+#include "cuda/CudaUtils.h"
+#include "cuda/CudaCompressDXT.h"
+
+
+using namespace nv;
+using namespace nvtt;
+
+
+namespace
+{
+
+	static int blockSize(Format format)
+	{
+		if (format == Format_DXT1 || format == Format_DXT1a) {
+			return 8;
+		}
+		else if (format == Format_DXT3) {
+			return 16;
+		}
+		else if (format == Format_DXT5 || format == Format_DXT5n) {
+			return 16;
+		}
+		else if (format == Format_BC4) {
+			return 8;
+		}
+		else if (format == Format_BC5) {
+			return 16;
+		}
+		return 0;
+	}
+
+	inline uint computePitch(uint w, uint bitsize)
+	{
+		uint p = w * ((bitsize + 7) / 8);
+
+		// Align to 32 bits.
+		return ((p + 3) / 4) * 4;
+	}
+
+	static int computeImageSize(uint w, uint h, uint d, uint bitCount, Format format)
+	{
+		if (format == Format_RGBA) {
+			return d * h * computePitch(w, bitCount);
+		}
+		else {
+			// @@ Handle 3D textures. DXT and VTC have different behaviors.
+			return ((w + 3) / 4) * ((h + 3) / 4) * blockSize(format);
+		}
+	}
+
+} // namespace
+
+namespace nvtt
+{
+	// Mipmap could be:
+	// - a pointer to an input image.
+	// - a fixed point image.
+	// - a floating point image.
+	struct Mipmap
+	{
+		Mipmap() : m_inputImage(NULL) {}
+		~Mipmap() {}
+
+		// Reference input image.
+		void setFromInput(const InputOptions::Private & inputOptions, uint idx)
+		{
+			m_inputImage = inputOptions.image(idx);
+			m_fixedImage = NULL;
+			m_floatImage = NULL;
+		}
+
+		// Assign and take ownership of given image.
+		void setImage(FloatImage * image)
+		{
+			m_inputImage = NULL;
+			m_fixedImage = NULL;
+			m_floatImage = image;
+		}
+
+
+		// Convert linear float image to fixed image ready for compression.
+		void toFixedImage(const InputOptions::Private & inputOptions)
+		{
+			if (m_floatImage != NULL) // apfaffe - We should check that we have a float image, if so convert it!
+			{
+				if (inputOptions.isNormalMap || inputOptions.outputGamma == 1.0f)
+				{
+					m_fixedImage = m_floatImage->createImage();
+				}
+				else
+				{
+					m_fixedImage = m_floatImage->createImageGammaCorrect(inputOptions.outputGamma);
+				}
+			}
+		}
+
+		// Convert input image to linear float image.
+		void toFloatImage(const InputOptions::Private & inputOptions)
+		{
+			if (m_floatImage == NULL)
+			{
+				nvDebugCheck(this->asFixedImage() != NULL);
+
+				m_floatImage = new FloatImage(this->asFixedImage());
+
+				if (inputOptions.isNormalMap)
+				{
+					// Expand normals to [-1, 1] range.
+					//	floatImage->expandNormals(0);
+				}
+				else if (inputOptions.inputGamma != 1.0f)
+				{
+					// Convert to linear space.
+					m_floatImage->toLinear(0, 3, inputOptions.inputGamma);
+				}
+			}
+		}
+
+		const FloatImage * asFloatImage() const
+		{
+			return m_floatImage.ptr();
+		}
+
+		FloatImage * asFloatImage()
+		{
+			return m_floatImage.ptr();
+		}
+
+		const Image * asFixedImage() const
+		{
+			// - apfaffe - switched logic to return the 'processed image' rather than the input!
+			if (m_fixedImage != NULL && m_fixedImage.ptr() != NULL)
+			{
+				return m_fixedImage.ptr();
+			}
+			return m_inputImage;
+		}
+
+		Image * asMutableFixedImage()
+		{
+			if (m_inputImage != NULL)
+			{
+				// Do not modify input image, create a copy.
+				m_fixedImage = new Image(*m_inputImage);
+				m_inputImage = NULL;
+			}
+			return m_fixedImage.ptr();
+		}
+
+
+	private:
+		const Image * m_inputImage;
+		AutoPtr<Image> m_fixedImage;
+		AutoPtr<FloatImage> m_floatImage;
+	};
+
+} // nvtt namespace
+
+
+Compressor::Compressor() : m(*new Compressor::Private())
+{
+	// CUDA initialization.
+	m.cudaSupported = cuda::isHardwarePresent();
+	m.cudaEnabled = false;
+	m.cudaDevice = -1;
+
+	enableCudaAcceleration(m.cudaSupported);
+}
+
+Compressor::~Compressor()
+{
+	enableCudaAcceleration(false);
+	delete &m;
+}
+
+
+/// Enable CUDA acceleration.
+void Compressor::enableCudaAcceleration(bool enable)
+{
+	if (m.cudaSupported)
+	{
+		if (m.cudaEnabled && !enable)
+		{
+			m.cudaEnabled = false;
+			m.cuda = NULL;
+
+			if (m.cudaDevice != -1)
+			{
+				// Exit device.
+				cuda::exitDevice();
+			}
+		}
+		else if (!m.cudaEnabled && enable)
+		{
+			// Init the CUDA device. This may return -1 if CUDA was already initialized by the app.
+			m.cudaEnabled = cuda::initDevice(&m.cudaDevice);
+
+			if (m.cudaEnabled)
+			{
+				// Create compressor if initialization succeeds.
+				m.cuda = new CudaCompressor();
+
+				// But cleanup if failed.
+				if (!m.cuda->isValid())
+				{
+					enableCudaAcceleration(false);
+				}
+			}
+		}
+	}
+}
+
+/// Check if CUDA acceleration is enabled.
+bool Compressor::isCudaAccelerationEnabled() const
+{
+	return m.cudaEnabled;
+}
+
+
+/// Compress the input texture with the given compression options.
+bool Compressor::process(const InputOptions & inputOptions, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const
+{
+	return m.compress(inputOptions.m, compressionOptions.m, outputOptions.m);
+}
+
+
+/// Estimate the size of compressing the input with the given options.
+int Compressor::estimateSize(const InputOptions & inputOptions, const CompressionOptions & compressionOptions) const
+{
+	return m.estimateSize(inputOptions.m, compressionOptions.m);
+}
+
+
+
+
+bool Compressor::Private::compress(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
+{
+	// Make sure enums match.
+	nvStaticCheck(FloatImage::WrapMode_Clamp == (FloatImage::WrapMode)WrapMode_Clamp);
+	nvStaticCheck(FloatImage::WrapMode_Mirror == (FloatImage::WrapMode)WrapMode_Mirror);
+	nvStaticCheck(FloatImage::WrapMode_Repeat == (FloatImage::WrapMode)WrapMode_Repeat);
+
+	// Get output handler.
+	if (!outputOptions.openFile())
+	{
+		if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_FileOpen);
+		return false;
+	}
+
+	inputOptions.computeTargetExtents();
+
+	// Output DDS header.
+	if (!outputHeader(inputOptions, compressionOptions, outputOptions))
+	{
+		return false;
+	}
+
+	for (uint f = 0; f < inputOptions.faceCount; f++)
+	{
+		if (!compressMipmaps(f, inputOptions, compressionOptions, outputOptions))
+		{
+			return false;
+		}
+	}
+
+	outputOptions.closeFile();
+
+	return true;
+}
+
+
+// Output DDS header.
+bool Compressor::Private::outputHeader(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
+{
+	// Output DDS header.
+	if (outputOptions.outputHandler == NULL || !outputOptions.outputHeader)
+	{
+		return true;
+	}
+
+	DDSHeader header;
+
+	header.setWidth(inputOptions.targetWidth);
+	header.setHeight(inputOptions.targetHeight);
+
+	int mipmapCount = inputOptions.realMipmapCount();
+	nvDebugCheck(mipmapCount > 0);
+
+	header.setMipmapCount(mipmapCount);
+
+	if (inputOptions.textureType == TextureType_2D) {
+		header.setTexture2D();
+	}
+	else if (inputOptions.textureType == TextureType_Cube) {
+		header.setTextureCube();
+	}		
+	/*else if (inputOptions.textureType == TextureType_3D) {
+	header.setTexture3D();
+	header.setDepth(inputOptions.targetDepth);
+	}*/
+
+	if (compressionOptions.format == Format_RGBA)
+	{
+		header.setPitch(computePitch(inputOptions.targetWidth, compressionOptions.bitcount));
+		header.setPixelFormat(compressionOptions.bitcount, compressionOptions.rmask, compressionOptions.gmask, compressionOptions.bmask, compressionOptions.amask);
+	}
+	else
+	{
+		header.setLinearSize(computeImageSize(inputOptions.targetWidth, inputOptions.targetHeight, inputOptions.targetDepth, compressionOptions.bitcount, compressionOptions.format));
+
+		if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a) {
+			header.setFourCC('D', 'X', 'T', '1');
+			if (inputOptions.isNormalMap) header.setNormalFlag(true);
+		}
+		else if (compressionOptions.format == Format_DXT3) {
+			header.setFourCC('D', 'X', 'T', '3');
+		}
+		else if (compressionOptions.format == Format_DXT5) {
+			header.setFourCC('D', 'X', 'T', '5');
+		}
+		else if (compressionOptions.format == Format_DXT5n) {
+			header.setFourCC('D', 'X', 'T', '5');
+			if (inputOptions.isNormalMap) header.setNormalFlag(true);
+		}
+		else if (compressionOptions.format == Format_BC4) {
+			header.setFourCC('A', 'T', 'I', '1');
+		}
+		else if (compressionOptions.format == Format_BC5) {
+			header.setFourCC('A', 'T', 'I', '2');
+			if (inputOptions.isNormalMap) header.setNormalFlag(true);
+		}
+	}
+
+	// Swap bytes if necessary.
+	header.swapBytes();
+
+	uint headerSize = 128;
+	if (header.hasDX10Header())
+	{
+		nvStaticCheck(sizeof(DDSHeader) == 128 + 20);
+		headerSize = 128 + 20;
+	}
+
+	bool writeSucceed = outputOptions.outputHandler->writeData(&header, headerSize);
+	if (!writeSucceed && outputOptions.errorHandler != NULL)
+	{
+		outputOptions.errorHandler->error(Error_FileWrite);
+	}
+
+	return writeSucceed;
+}
+
+
+bool Compressor::Private::compressMipmaps(uint f, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
+{
+	uint w = inputOptions.targetWidth;
+	uint h = inputOptions.targetHeight;
+	uint d = inputOptions.targetDepth;
+
+	Mipmap mipmap;
+
+	const uint mipmapCount = inputOptions.realMipmapCount();
+	nvDebugCheck(mipmapCount > 0);
+
+	for (uint m = 0; m < mipmapCount; m++)
+	{
+		if (outputOptions.outputHandler)
+		{
+			int size = computeImageSize(w, h, d, compressionOptions.bitcount, compressionOptions.format);
+			outputOptions.outputHandler->beginImage(size, w, h, d, f, m);
+		}
+
+		// @@ Where to do the color transform?
+		// - Color transform may not be linear, so we cannot do before computing mipmaps.
+		// - Should be done in linear space, that is, after gamma correction.
+
+		if (!initMipmap(mipmap, inputOptions, w, h, d, f, m))
+		{
+			if (outputOptions.errorHandler != NULL)
+			{
+				outputOptions.errorHandler->error(Error_InvalidInput);
+				return false;
+			}
+		}
+
+		quantizeMipmap(mipmap, compressionOptions);
+
+		compressMipmap(mipmap, inputOptions, compressionOptions, outputOptions);
+
+		// Compute extents of next mipmap:
+		w = max(1U, w / 2);
+		h = max(1U, h / 2);
+		d = max(1U, d / 2);
+	}
+
+	return true;
+}
+
+bool Compressor::Private::initMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f, uint m) const
+{
+	// Find image from input.
+	int inputIdx = findExactMipmap(inputOptions, w, h, d, f);
+
+	if ((inputIdx == -1 || inputOptions.convertToNormalMap) && m != 0)
+	{
+		// Generate from last, when mipmap not found, or normal map conversion enabled.
+		downsampleMipmap(mipmap, inputOptions);
+	}
+	else
+	{
+		if (inputIdx != -1)
+		{
+			// If input mipmap found, then get from input.
+			mipmap.setFromInput(inputOptions, inputIdx);
+		}
+		else
+		{
+			// If not found, resize closest mipmap.
+			inputIdx = findClosestMipmap(inputOptions, w, h, d, f);
+
+			if (inputIdx == -1)
+			{
+				return false;
+			}
+
+			mipmap.setFromInput(inputOptions, inputIdx);
+
+			scaleMipmap(mipmap, inputOptions, w, h, d);
+		}
+
+		processInputImage(mipmap, inputOptions);
+	}
+
+	// Convert linear float image to fixed image ready for compression.
+	mipmap.toFixedImage(inputOptions);
+
+	return true;
+}
+
+int Compressor::Private::findExactMipmap(const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f) const
+{
+	for (int m = 0; m < int(inputOptions.mipmapCount); m++)
+	{
+		int idx = f * inputOptions.mipmapCount + m;
+		const InputOptions::Private::InputImage & inputImage = inputOptions.images[idx];
+
+		if (inputImage.width == int(w) && inputImage.height == int(h) && inputImage.depth == int(d))
+		{
+			if (inputImage.data != NULL)
+			{
+				return idx;
+			}
+			return -1;
+		}
+		else if (inputImage.width < int(w) || inputImage.height < int(h) || inputImage.depth < int(d))
+		{
+			return -1;
+		}
+	}
+
+	return -1;
+}
+
+int Compressor::Private::findClosestMipmap(const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f) const
+{
+	int bestIdx = -1;
+
+	for (int m = 0; m < int(inputOptions.mipmapCount); m++)
+	{
+		int idx = f * inputOptions.mipmapCount + m;
+		const InputOptions::Private::InputImage & inputImage = inputOptions.images[idx];
+
+		if (inputImage.data != NULL)
+		{
+			int difference = (inputImage.width - w) + (inputImage.height - h) + (inputImage.depth - d);
+
+			if (difference < 0)
+			{
+				if (bestIdx == -1)
+				{
+					bestIdx = idx;
+				}
+
+				return bestIdx;
+			}
+
+			bestIdx = idx;
+		}
+	}
+
+	return bestIdx;
+}
+
+// Create mipmap from the given image.
+void Compressor::Private::downsampleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const
+{
+	// Make sure that floating point linear representation is available.
+	mipmap.toFloatImage(inputOptions);
+
+	const FloatImage * floatImage = mipmap.asFloatImage();
+
+	if (inputOptions.mipmapFilter == MipmapFilter_Box)
+	{
+		// Use fast downsample.
+		mipmap.setImage(floatImage->fastDownSample());
+	}
+	else if (inputOptions.mipmapFilter == MipmapFilter_Triangle)
+	{
+		TriangleFilter filter;
+		mipmap.setImage(floatImage->downSample(filter, (FloatImage::WrapMode)inputOptions.wrapMode));
+	}
+	else /*if (inputOptions.mipmapFilter == MipmapFilter_Kaiser)*/
+	{
+		nvDebugCheck(inputOptions.mipmapFilter == MipmapFilter_Kaiser);
+		KaiserFilter filter(inputOptions.kaiserWidth);
+		filter.setParameters(inputOptions.kaiserAlpha, inputOptions.kaiserStretch);
+		mipmap.setImage(floatImage->downSample(filter, (FloatImage::WrapMode)inputOptions.wrapMode));
+	}
+
+	// Normalize mipmap.
+	if ((inputOptions.isNormalMap || inputOptions.convertToNormalMap) && inputOptions.normalizeMipmaps)
+	{
+		normalizeNormalMap(mipmap.asFloatImage());
+	}
+}
+
+
+void Compressor::Private::scaleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d) const
+{
+	mipmap.toFloatImage(inputOptions);
+
+	// @@ Add more filters.
+	// @@ Select different filters for downscaling and reconstruction.
+
+	// Resize image. 
+	BoxFilter boxFilter;
+	mipmap.setImage(mipmap.asFloatImage()->resize(boxFilter, w, h, (FloatImage::WrapMode)inputOptions.wrapMode));
+}
+
+
+// Process an input image: Convert to normal map, normalize, or convert to linear space.
+void Compressor::Private::processInputImage(Mipmap & mipmap, const InputOptions::Private & inputOptions) const
+{
+	if (inputOptions.convertToNormalMap)
+	{
+		mipmap.toFixedImage(inputOptions);
+
+		Vector4 heightScale = inputOptions.heightFactors;
+		mipmap.setImage(createNormalMap(mipmap.asFixedImage(), (FloatImage::WrapMode)inputOptions.wrapMode, heightScale, inputOptions.bumpFrequencyScale));
+	}
+	else if (inputOptions.isNormalMap)
+	{
+		if (inputOptions.normalizeMipmaps)
+		{
+			// If floating point image available, normalize in place.
+			if (mipmap.asFloatImage() == NULL)
+			{
+				FloatImage * floatImage = new FloatImage(mipmap.asFixedImage());
+				normalizeNormalMap(floatImage);
+				mipmap.setImage(floatImage);
+			}
+			else
+			{
+				normalizeNormalMap(mipmap.asFloatImage());
+				mipmap.setImage(mipmap.asFloatImage());
+			}
+		}
+	}
+	else
+	{
+		if (inputOptions.inputGamma != inputOptions.outputGamma)
+		{
+			mipmap.toFloatImage(inputOptions);
+		}
+	}
+}
+
+
+// Quantize the given mipmap according to the compression options.
+void Compressor::Private::quantizeMipmap(Mipmap & mipmap, const CompressionOptions::Private & compressionOptions) const
+{
+	nvDebugCheck(mipmap.asFixedImage() != NULL);
+
+	if (compressionOptions.binaryAlpha)
+	{
+		if (compressionOptions.enableAlphaDithering)
+		{
+			Quantize::FloydSteinberg_BinaryAlpha(mipmap.asMutableFixedImage(), compressionOptions.alphaThreshold);
+		}
+		else
+		{
+			Quantize::BinaryAlpha(mipmap.asMutableFixedImage(), compressionOptions.alphaThreshold);
+		}
+	}
+
+	if (compressionOptions.enableColorDithering || compressionOptions.enableAlphaDithering)
+	{
+		uint rsize = 8;
+		uint gsize = 8;
+		uint bsize = 8;
+		uint asize = 8;
+
+		if (compressionOptions.enableColorDithering)
+		{
+			if (compressionOptions.format >= Format_DXT1 && compressionOptions.format <= Format_DXT5)
+			{
+				rsize = 5;
+				gsize = 6;
+				bsize = 5;
+			}
+			else if (compressionOptions.format == Format_RGB)
+			{
+				uint rshift, gshift, bshift;
+				PixelFormat::maskShiftAndSize(compressionOptions.rmask, &rshift, &rsize);
+				PixelFormat::maskShiftAndSize(compressionOptions.gmask, &gshift, &gsize);
+				PixelFormat::maskShiftAndSize(compressionOptions.bmask, &bshift, &bsize);
+			}
+		}
+
+		if (compressionOptions.enableAlphaDithering)
+		{
+			if (compressionOptions.format == Format_DXT3)
+			{
+				asize = 4;
+			}
+			else if (compressionOptions.format == Format_RGB)
+			{
+				uint ashift;
+				PixelFormat::maskShiftAndSize(compressionOptions.amask, &ashift, &asize);
+			}
+		}
+
+		if (compressionOptions.binaryAlpha)
+		{
+			asize = 8; // Already quantized.
+		}
+
+		Quantize::FloydSteinberg(mipmap.asMutableFixedImage(), rsize, gsize, bsize, asize);
+	}
+}
+
+
+// Compress the given mipmap.
+bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
+{
+	const Image * image = mipmap.asFixedImage();
+	nvDebugCheck(image != NULL);
+
+	FastCompressor fast;
+	fast.setImage(image, inputOptions.alphaMode);
+
+	SlowCompressor slow;
+	slow.setImage(image, inputOptions.alphaMode);
+
+	const bool useCuda = cudaEnabled && image->width() * image->height() >= 512;
+
+	if (compressionOptions.format == Format_RGBA || compressionOptions.format == Format_RGB)
+	{
+		compressRGB(image, outputOptions, compressionOptions);
+	}
+	else if (compressionOptions.format == Format_DXT1)
+	{
+#if defined(HAVE_S3QUANT)
+		if (compressionOptions.externalCompressor == "s3")
+		{
+			s3CompressDXT1(image, outputOptions);
+		}
+		else
+#endif
+
+#if defined(HAVE_ATITC)
+			if (compressionOptions.externalCompressor == "ati")
+			{
+				atiCompressDXT1(image, outputOptions);
+			}
+			else
+#endif
+				if (compressionOptions.quality == Quality_Fastest)
+				{
+					fast.compressDXT1(outputOptions);
+				}
+				else
+				{
+					if (useCuda)
+					{
+						nvDebugCheck(cudaSupported);
+						cuda->setImage(image, inputOptions.alphaMode);
+						cuda->compressDXT1(compressionOptions, outputOptions);
+					}
+					else
+					{
+						slow.compressDXT1(compressionOptions, outputOptions);
+					}
+				}
+	}
+	else if (compressionOptions.format == Format_DXT1a)
+	{
+		if (compressionOptions.quality == Quality_Fastest)
+		{
+			fast.compressDXT1a(outputOptions);
+		}
+		else
+		{
+			if (useCuda)
+			{
+				nvDebugCheck(cudaSupported);
+				/*cuda*/slow.compressDXT1a(compressionOptions, outputOptions);
+			}
+			else
+			{
+				slow.compressDXT1a(compressionOptions, outputOptions);
+			}
+		}
+	}
+	else if (compressionOptions.format == Format_DXT3)
+	{
+		if (compressionOptions.quality == Quality_Fastest)
+		{
+			fast.compressDXT3(outputOptions);
+		}
+		else
+		{
+			if (useCuda)
+			{
+				nvDebugCheck(cudaSupported);
+				cuda->setImage(image, inputOptions.alphaMode);
+				cuda->compressDXT3(compressionOptions, outputOptions);
+			}
+			else
+			{
+				slow.compressDXT3(compressionOptions, outputOptions);
+			}
+		}
+	}
+	else if (compressionOptions.format == Format_DXT5)
+	{
+		if (compressionOptions.quality == Quality_Fastest)
+		{
+			fast.compressDXT5(outputOptions);
+		}
+		else
+		{
+			if (useCuda)
+			{
+				nvDebugCheck(cudaSupported);
+				cuda->setImage(image, inputOptions.alphaMode);
+				cuda->compressDXT5(compressionOptions, outputOptions);
+			}
+			else
+			{
+				slow.compressDXT5(compressionOptions, outputOptions);
+			}
+		}
+	}
+	else if (compressionOptions.format == Format_DXT5n)
+	{
+		if (compressionOptions.quality == Quality_Fastest)
+		{
+			fast.compressDXT5n(outputOptions);
+		}
+		else
+		{
+			slow.compressDXT5n(compressionOptions, outputOptions);
+		}
+	}
+	else if (compressionOptions.format == Format_BC4)
+	{
+		slow.compressBC4(compressionOptions, outputOptions);
+	}
+	else if (compressionOptions.format == Format_BC5)
+	{
+		slow.compressBC5(compressionOptions, outputOptions);
+	}
+
+	return true;
+}
+
+
+int Compressor::Private::estimateSize(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions) const
+{
+	const Format format = compressionOptions.format;
+	const uint bitCount = compressionOptions.bitcount;
+
+	inputOptions.computeTargetExtents();
+
+	uint mipmapCount = inputOptions.realMipmapCount();
+
+	int size = 0;
+
+	for (uint f = 0; f < inputOptions.faceCount; f++)
+	{
+		uint w = inputOptions.targetWidth;
+		uint h = inputOptions.targetHeight;
+		uint d = inputOptions.targetDepth;
+
+		for (uint m = 0; m < mipmapCount; m++)
+		{
+			size += computeImageSize(w, h, d, bitCount, format);
+
+			// Compute extents of next mipmap:
+			w = max(1U, w / 2);
+			h = max(1U, h / 2);
+			d = max(1U, d / 2);
+		}
+	}
+
+	return size;
+}
--- a/src/nvtt/Compressor.h
+++ b/src/nvtt/Compressor.h
@ -1,4 +1,4 @@
-// Copyright Ignacio Castano <icastano@nvidia.com> 2009
+// Copyright NVIDIA Corporation 2008 -- Ignacio Castano <icastano@nvidia.com>
 // 
 // Permission is hereby granted, free of charge, to any person
 // obtaining a copy of this software and associated documentation
@ -24,17 +24,57 @@
 #ifndef NV_TT_COMPRESSOR_H
 #define NV_TT_COMPRESSOR_H

-#include <nvcore/nvcore.h> // uint
+#include <nvcore/Ptr.h>
+
+#include <nvtt/cuda/CudaCompressDXT.h>
+
 #include "nvtt.h"

 namespace nv
 {
-	struct CompressorInterface
+	class Image;
+}
+
+namespace nvtt
+{
+	struct Mipmap;
+
+	struct Compressor::Private
 	{
-		virtual ~CompressorInterface() {}
-		virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, const void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) = 0;
+		Private() {}
+
+		bool compress(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
+		int estimateSize(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions) const;
+
+	private:
+
+		bool outputHeader(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
+		bool compressMipmaps(uint f, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
+
+		bool initMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f, uint m) const;
+
+		int findExactMipmap(const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f) const;
+		int findClosestMipmap(const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f) const;
+
+		void downsampleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
+		void scaleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d) const;
+		void processInputImage(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
+		void quantizeMipmap(Mipmap & mipmap, const CompressionOptions::Private & compressionOptions) const;
+		bool compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
+
+
+
+	public:
+
+		bool cudaSupported;
+		bool cudaEnabled;
+		int cudaDevice;
+
+		nv::AutoPtr<nv::CudaCompressor> cuda;
+
 	};

-} // nv namespace
+} // nvtt namespace

-#endif // NV_TT_COMPRESSOR_H
+
+#endif // NV_TT_COMPRESSOR_H
--- a/src/nvtt/CompressorDXT.cpp
+++ b/src/nvtt/CompressorDXT.cpp
@ -1,676 +0,0 @@
-// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
-// 
-// Permission is hereby granted, free of charge, to any person
-// obtaining a copy of this software and associated documentation
-// files (the "Software"), to deal in the Software without
-// restriction, including without limitation the rights to use,
-// copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following
-// conditions:
-// 
-// The above copyright notice and this permission notice shall be
-// included in all copies or substantial portions of the Software.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-// OTHER DEALINGS IN THE SOFTWARE.
-
-#include "CompressorDXT.h"
-#include "QuickCompressDXT.h"
-#include "OptimalCompressDXT.h"
-#include "CompressionOptions.h"
-#include "OutputOptions.h"
-
-// squish
-#include "squish/colourset.h"
-#include "squish/fastclusterfit.h"
-#include "squish/weightedclusterfit.h"
-
-#include "nvtt.h"
-
-#include "nvcore/Memory.h"
-
-#include "nvimage/Image.h"
-#include "nvimage/ColorBlock.h"
-#include "nvimage/BlockDXT.h"
-
-
-// s3_quant
-#if defined(HAVE_S3QUANT)
-#include "s3tc/s3_quant.h"
-#endif
-
-// ati tc
-#if defined(HAVE_ATITC)
-typedef int BOOL;
-typedef _W64 unsigned long ULONG_PTR;
-typedef ULONG_PTR DWORD_PTR;
-#include "atitc/ATI_Compress.h"
-#endif
-
-// squish
-#if defined(HAVE_SQUISH)
-//#include "squish/squish.h"
-#include "squish-1.10/squish.h"
-#endif
-
-// d3dx
-#if defined(HAVE_D3DX)
-#include <d3dx9.h>
-#endif
-
-// stb
-#if defined(HAVE_STB)
-#define STB_DEFINE
-#include "stb/stb_dxt.h"
-#endif
-
-// OpenMP
-#if defined(HAVE_OPENMP)
-#include <omp.h>
-#endif
-
-using namespace nv;
-using namespace nvtt;
-
-
-void FixedBlockCompressor::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, const void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
-{
-	const uint bs = blockSize();
-	const uint bw = (w + 3) / 4;
-	const uint bh = (h + 3) / 4;
-	const uint size = bs * bw * bh;
-
-#if defined(HAVE_OPENMP)
-	bool singleThreaded = false;
-#else
-	bool singleThreaded = true;
-#endif
-
-	// Use a single thread to compress small textures.
-	if (bw * bh < 16) singleThreaded = true;
-
-	if (singleThreaded)
-	{
-		nvDebugCheck(bs <= 16);
-		uint8 mem[16];
-
-		for (int y = 0; y < int(h); y += 4) {
-			for (uint x = 0; x < w; x += 4) {
-
-				ColorBlock rgba;
-				if (inputFormat == nvtt::InputFormat_BGRA_8UB) {
-					rgba.init(w, h, (uint *)data, x, y);
-				}
-				else {
-					nvDebugCheck(inputFormat == nvtt::InputFormat_RGBA_32F);
-					rgba.init(w, h, (float *)data, x, y);
-				}
-
-				compressBlock(rgba, alphaMode, compressionOptions, mem);
-
-				if (outputOptions.outputHandler != NULL) {
-					outputOptions.outputHandler->writeData(mem, bs);
-				}
-			}
-		}
-	}
-#if defined(HAVE_OPENMP)
-	else
-	{
-		uint8 * mem = new uint8[size];
-
-	#pragma omp parallel
-		{
-	#pragma omp for
-			for (int i = 0; i < int(bw*bh); i++)
-			{
-				const uint x = i % bw;
-				const uint y = i / bw;
-
-				ColorBlock rgba;
-				if (inputFormat == nvtt::InputFormat_BGRA_8UB) {
-					rgba.init(w, h, (uint *)data, 4*x, 4*y);
-				}
-				else {
-					nvDebugCheck(inputFormat == nvtt::InputFormat_RGBA_32F);
-					rgba.init(w, h, (float *)data, 4*x, 4*y);
-				}
-
-				uint8 * ptr = mem + (y * bw + x) * bs;
-				compressBlock(rgba, alphaMode, compressionOptions, ptr);
-			} // omp for
-		} // omp parallel
-
-		if (outputOptions.outputHandler != NULL) {
-			outputOptions.outputHandler->writeData(mem, size);
-		}
-
-		delete [] mem;
-	}
-#endif
-}
-
-
-void FastCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
-{
-	BlockDXT1 * block = new(output) BlockDXT1;
-	QuickCompress::compressDXT1(rgba, block);
-}
-
-void FastCompressorDXT1a::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
-{
-	BlockDXT1 * block = new(output) BlockDXT1;
-	QuickCompress::compressDXT1a(rgba, block);
-}
-
-void FastCompressorDXT3::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
-{
-	BlockDXT3 * block = new(output) BlockDXT3;
-	QuickCompress::compressDXT3(rgba, block);
-}
-
-void FastCompressorDXT5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
-{
-	BlockDXT5 * block = new(output) BlockDXT5;
-	QuickCompress::compressDXT5(rgba, block);
-}
-
-void FastCompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
-{
-	rgba.swizzle(4, 1, 5, 0); // 0xFF, G, 0, R
-
-	BlockDXT5 * block = new(output) BlockDXT5;
-	QuickCompress::compressDXT5(rgba, block);
-}
-
-void FastCompressorBC4::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
-{
-	BlockATI1 * block = new(output) BlockATI1;
-	
-	rgba.swizzle(0, 1, 2, 0); // Copy red to alpha
-	QuickCompress::compressDXT5A(rgba, &block->alpha);
-}
-
-void FastCompressorBC5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
-{
-	BlockATI2 * block = new(output) BlockATI2;
-	
-	rgba.swizzle(0, 1, 2, 0); // Copy red to alpha
-	QuickCompress::compressDXT5A(rgba, &block->x);
-	
-	rgba.swizzle(0, 1, 2, 1); // Copy green to alpha
-	QuickCompress::compressDXT5A(rgba, &block->y);
-}
-
-
-void NormalCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
-{
-	nvsquish::WeightedClusterFit fit;
-	fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
-
-	if (rgba.isSingleColor())
-	{
-		BlockDXT1 * block = new(output) BlockDXT1;
-		OptimalCompress::compressDXT1(rgba.color(0), block);
-	}
-	else
-	{
-		nvsquish::ColourSet colours((uint8 *)rgba.colors(), 0);
-		fit.SetColourSet(&colours, nvsquish::kDxt1);
-		fit.Compress(output);
-	}
-}
-
-
-void NormalCompressorDXT1a::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
-{
-	bool anyAlpha = false;
-	bool allAlpha = true;
-		
-	for (uint i = 0; i < 16; i++)
-	{
-		if (rgba.color(i).a < 128) anyAlpha = true;
-		else allAlpha = false;
-	}
-
-	const bool isSingleColor = rgba.isSingleColor();
-		
-	if ((!anyAlpha && isSingleColor || allAlpha))
-	{
-		BlockDXT1 * block = new(output) BlockDXT1;
-		OptimalCompress::compressDXT1a(rgba.color(0), block);
-	}
-	else
-	{
-		nvsquish::WeightedClusterFit fit;
-		fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
-
-		int flags = nvsquish::kDxt1;
-		if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
-
-		nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
-		fit.SetColourSet(&colours, nvsquish::kDxt1);
-
-		fit.Compress(output);
-	}
-}
-
-
-void NormalCompressorDXT3::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
-{
-	BlockDXT3 * block = new(output) BlockDXT3;
-
-	// Compress explicit alpha.
-	OptimalCompress::compressDXT3A(rgba, &block->alpha);
-
-	// Compress color.
-	if (rgba.isSingleColor())
-	{
-		OptimalCompress::compressDXT1(rgba.color(0), &block->color);
-	}
-	else
-	{
-		nvsquish::WeightedClusterFit fit;
-		fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
-
-		int flags = 0;
-		if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
-
-		nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
-		fit.SetColourSet(&colours, 0);
-		fit.Compress(&block->color);
-	}
-}
-
-
-void NormalCompressorDXT5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
-{
-	BlockDXT5 * block = new(output) BlockDXT5;
-
-	// Compress alpha.
-	if (compressionOptions.quality == Quality_Highest)
-	{
-		OptimalCompress::compressDXT5A(rgba, &block->alpha);
-	}
-	else
-	{
-		QuickCompress::compressDXT5A(rgba, &block->alpha);
-	}
-
-	// Compress color.
-	if (rgba.isSingleColor())
-	{
-		OptimalCompress::compressDXT1(rgba.color(0), &block->color);
-	}
-	else
-	{
-		nvsquish::WeightedClusterFit fit;
-		fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
-
-		int flags = 0;
-		if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
-
-		nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
-		fit.SetColourSet(&colours, 0);
-		fit.Compress(&block->color);
-	}
-}
-
-
-void NormalCompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
-{
-	rgba.swizzle(4, 1, 5, 0); // 0xFF, G, 0, R
-
-	BlockDXT5 * block = new(output) BlockDXT5;
-
-	// Compress X.
-	if (compressionOptions.quality == Quality_Highest)
-	{
-		OptimalCompress::compressDXT5A(rgba, &block->alpha);
-	}
-	else
-	{
-		QuickCompress::compressDXT5A(rgba, &block->alpha);
-	}
-
-	// Compress Y.
-	if (compressionOptions.quality == Quality_Highest)
-	{
-		OptimalCompress::compressDXT1G(rgba, &block->color);
-	}
-	else
-	{
-		if (rgba.isSingleColor())
-		{
-			OptimalCompress::compressDXT1G(rgba.color(0), &block->color);
-		}
-		else
-		{
-			nvsquish::WeightedClusterFit fit;
-			fit.SetMetric(0, 1, 0);
-
-			int flags = 0;
-			if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
-
-			nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
-			fit.SetColourSet(&colours, 0);
-			fit.Compress(&block->color);
-		}
-	}
-}
-
-
-void ProductionCompressorBC4::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
-{
-	BlockATI1 * block = new(output) BlockATI1;
-
-	rgba.swizzle(0, 1, 2, 0); // Copy red to alpha
-	OptimalCompress::compressDXT5A(rgba, &block->alpha);
-}
-
-void ProductionCompressorBC5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
-{
-	BlockATI2 * block = new(output) BlockATI2;
-	
-	rgba.swizzle(0, 1, 2, 0); // Copy red to alpha
-	OptimalCompress::compressDXT5A(rgba, &block->x);
-	
-	rgba.swizzle(0, 1, 2, 1); // Copy green to alpha
-	OptimalCompress::compressDXT5A(rgba, &block->y);
-}
-
-
-
-#if defined(HAVE_S3QUANT)
-
-void S3CompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
-{
-	float error = 0.0f;
-
-	BlockDXT1 dxtBlock3;
-	BlockDXT1 dxtBlock4;
-	ColorBlock block;
-
-	for (uint y = 0; y < h; y += 4) {
-		for (uint x = 0; x < w; x += 4) {
-			block.init(inputFormat, w, h, data, x, y);
-
-			// Init rgb block.
-			RGBBlock rgbBlock;
-			rgbBlock.n = 16;
-			for (uint i = 0; i < 16; i++) {
-				rgbBlock.colorChannel[i][0] = clamp(float(block.color(i).r) / 255.0f, 0.0f, 1.0f);
-				rgbBlock.colorChannel[i][1] = clamp(float(block.color(i).g) / 255.0f, 0.0f, 1.0f);
-				rgbBlock.colorChannel[i][2] = clamp(float(block.color(i).b) / 255.0f, 0.0f, 1.0f);
-			}
-			rgbBlock.weight[0] = 1.0f;
-			rgbBlock.weight[1] = 1.0f;
-			rgbBlock.weight[2] = 1.0f;
-
-			rgbBlock.inLevel = 4;
-			CodeRGBBlock(&rgbBlock);
-
-			// Copy results to DXT block.
-			dxtBlock4.col0.r = rgbBlock.endPoint[0][0];
-			dxtBlock4.col0.g = rgbBlock.endPoint[0][1];
-			dxtBlock4.col0.b = rgbBlock.endPoint[0][2];
-
-			dxtBlock4.col1.r = rgbBlock.endPoint[1][0];
-			dxtBlock4.col1.g = rgbBlock.endPoint[1][1];
-			dxtBlock4.col1.b = rgbBlock.endPoint[1][2];
-
-			dxtBlock4.setIndices(rgbBlock.index);
-
-			if (dxtBlock4.col0.u < dxtBlock4.col1.u) {
-				swap(dxtBlock4.col0.u, dxtBlock4.col1.u);
-				dxtBlock4.indices ^= 0x55555555;
-			}
-
-			uint error4 = blockError(block, dxtBlock4);
-
-			rgbBlock.inLevel = 3;
-
-			CodeRGBBlock(&rgbBlock);
-
-			// Copy results to DXT block.
-			dxtBlock3.col0.r = rgbBlock.endPoint[0][0];
-			dxtBlock3.col0.g = rgbBlock.endPoint[0][1];
-			dxtBlock3.col0.b = rgbBlock.endPoint[0][2];
-
-			dxtBlock3.col1.r = rgbBlock.endPoint[1][0];
-			dxtBlock3.col1.g = rgbBlock.endPoint[1][1];
-			dxtBlock3.col1.b = rgbBlock.endPoint[1][2];
-
-			dxtBlock3.setIndices(rgbBlock.index);
-
-			if (dxtBlock3.col0.u > dxtBlock3.col1.u) {
-				swap(dxtBlock3.col0.u, dxtBlock3.col1.u);
-				dxtBlock3.indices ^= (~dxtBlock3.indices  >> 1) & 0x55555555;
-			}
-
-			uint error3 = blockError(block, dxtBlock3);
-
-			if (error3 < error4) {
-				error += error3;
-
-				if (outputOptions.outputHandler != NULL) {
-					outputOptions.outputHandler->writeData(&dxtBlock3, sizeof(dxtBlock3));
-				}
-			}
-			else {
-				error += error4;
-
-				if (outputOptions.outputHandler != NULL) {
-					outputOptions.outputHandler->writeData(&dxtBlock4, sizeof(dxtBlock4));
-				}
-			}
-		}
-	}
-}
-
-#endif // defined(HAVE_S3QUANT)
-
-
-#if defined(HAVE_ATITC)
-
-void AtiCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
-{
-	// Init source texture
-	ATI_TC_Texture srcTexture;
-	srcTexture.dwSize = sizeof(srcTexture);
-	srcTexture.dwWidth = w;
-	srcTexture.dwHeight = h;
-	if (inputFormat == nvtt::InputFormat_BGRA_8UB)
-	{
-		srcTexture.dwPitch = w * 4;
-		srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
-	}
-	else
-	{
-		srcTexture.dwPitch = w * 16;
-		srcTexture.format = ATI_TC_FORMAT_ARGB_32F;
-	}
-	srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
-	srcTexture.pData = (ATI_TC_BYTE*) data;
-
-	// Init dest texture
-	ATI_TC_Texture destTexture;
-	destTexture.dwSize = sizeof(destTexture);
-	destTexture.dwWidth = w;
-	destTexture.dwHeight = h;
-	destTexture.dwPitch = 0;
-	destTexture.format = ATI_TC_FORMAT_DXT1;
-	destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
-	destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize);
-
-	ATI_TC_CompressOptions options;
-	options.dwSize = sizeof(options);
-	options.bUseChannelWeighting = false;
-	options.bUseAdaptiveWeighting = false;
-	options.bDXT1UseAlpha = false;
-	options.nCompressionSpeed = ATI_TC_Speed_Normal;
-	options.bDisableMultiThreading = false;
-	//options.bDisableMultiThreading = true;
-
-	// Compress
-	ATI_TC_ConvertTexture(&srcTexture, &destTexture, &options, NULL, NULL, NULL);
-
-	if (outputOptions.outputHandler != NULL) {
-		outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize);
-	}
-
-	mem::free(destTexture.pData);
-}
-
-void AtiCompressorDXT5::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
-{
-	// Init source texture
-	ATI_TC_Texture srcTexture;
-	srcTexture.dwSize = sizeof(srcTexture);
-	srcTexture.dwWidth = w;
-	srcTexture.dwHeight = h;
-	if (inputFormat == nvtt::InputFormat_BGRA_8UB)
-	{
-		srcTexture.dwPitch = w * 4;
-		srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
-	}
-	else
-	{
-		srcTexture.dwPitch = w * 16;
-		srcTexture.format = ATI_TC_FORMAT_ARGB_32F;
-	}
-	srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
-	srcTexture.pData = (ATI_TC_BYTE*) data;
-
-	// Init dest texture
-	ATI_TC_Texture destTexture;
-	destTexture.dwSize = sizeof(destTexture);
-	destTexture.dwWidth = w;
-	destTexture.dwHeight = h;
-	destTexture.dwPitch = 0;
-	destTexture.format = ATI_TC_FORMAT_DXT5;
-	destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
-	destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize);
-
-	// Compress
-	ATI_TC_ConvertTexture(&srcTexture, &destTexture, NULL, NULL, NULL, NULL);
-
-	if (outputOptions.outputHandler != NULL) {
-		outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize);
-	}
-
-	mem::free(destTexture.pData);
-}
-
-#endif // defined(HAVE_ATITC)
-
-#if defined(HAVE_SQUISH)
-
-void SquishCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
-{
-#pragma message(NV_FILE_LINE "TODO: Convert input to fixed point ABGR format instead of ARGB")
-	/*
-	Image img(*image);
-	int count = img.width() * img.height();
-	for (int i = 0; i < count; i++)
-	{
-		Color32 c = img.pixel(i);
-		img.pixel(i) = Color32(c.b, c.g, c.r, c.a);
-	}
-
-	int size = squish::GetStorageRequirements(img.width(), img.height(), squish::kDxt1);
-	void * blocks = mem::malloc(size);
-
-	squish::CompressImage((const squish::u8 *)img.pixels(), img.width(), img.height(), blocks, squish::kDxt1 | squish::kColourClusterFit);
-
-	if (outputOptions.outputHandler != NULL) {
-		outputOptions.outputHandler->writeData(blocks, size);
-	}
-
-	mem::free(blocks);
-	*/
-}
-
-#endif // defined(HAVE_SQUISH)
-
-
-#if defined(HAVE_D3DX)
-
-void D3DXCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
-{
-	IDirect3D9 * d3d = Direct3DCreate9(D3D_SDK_VERSION);
-
-	D3DPRESENT_PARAMETERS presentParams;
-	ZeroMemory(&presentParams, sizeof(presentParams));
-	presentParams.Windowed = TRUE;
-	presentParams.SwapEffect = D3DSWAPEFFECT_COPY;
-	presentParams.BackBufferWidth = 8;
-	presentParams.BackBufferHeight = 8;
-	presentParams.BackBufferFormat = D3DFMT_UNKNOWN;
-
-	HRESULT err;
-
-	IDirect3DDevice9 * device = NULL;
-	err = d3d->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_REF, GetDesktopWindow(), D3DCREATE_SOFTWARE_VERTEXPROCESSING, &presentParams, &device);
-
-	IDirect3DTexture9 * texture = NULL;
-	err = D3DXCreateTexture(device, w, h, 1, 0, D3DFMT_DXT1, D3DPOOL_SYSTEMMEM, &texture);
-	
-	IDirect3DSurface9 * surface = NULL;
-	err = texture->GetSurfaceLevel(0, &surface);
-
-	RECT rect;
-	rect.left = 0; 
-	rect.top = 0; 
-	rect.bottom = h;
-	rect.right = w;
-
-	if (inputFormat == nvtt::InputFormat_BGRA_8UB)
-	{
-		err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A8R8G8B8, w * 4, NULL, &rect, D3DX_DEFAULT, 0);
-	}
-	else
-	{
-		err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A32B32G32R32F, w * 16, NULL, &rect, D3DX_DEFAULT, 0);
-	}
-
-	if (err != D3DERR_INVALIDCALL && err != D3DXERR_INVALIDDATA)
-	{
-		D3DLOCKED_RECT rect;
-		ZeroMemory(&rect, sizeof(rect));
-
-		err = surface->LockRect(&rect, NULL, D3DLOCK_READONLY);
-
-		if (outputOptions.outputHandler != NULL) {
-			int size = rect.Pitch * ((h + 3) / 4);
-			outputOptions.outputHandler->writeData(rect.pBits, size);
-		}
-
-		err = surface->UnlockRect();
-	}
-
-	surface->Release();
-	device->Release();
-	d3d->Release();
-}
-
-#endif // defined(HAVE_D3DX)
-
-
-#if defined(HAVE_STB)
-
-void StbCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
-{
-	rgba.swizzle(2, 1, 0, 3); // Swap R and B
-	stb_compress_dxt_block((unsigned char *)output, (unsigned char *)rgba.colors(), 0, 0);
-}
-
-
-#endif // defined(HAVE_STB)
--- a/src/nvtt/CompressorDXT.h
+++ b/src/nvtt/CompressorDXT.h
@ -1,179 +0,0 @@
-// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
-// 
-// Permission is hereby granted, free of charge, to any person
-// obtaining a copy of this software and associated documentation
-// files (the "Software"), to deal in the Software without
-// restriction, including without limitation the rights to use,
-// copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following
-// conditions:
-// 
-// The above copyright notice and this permission notice shall be
-// included in all copies or substantial portions of the Software.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-// OTHER DEALINGS IN THE SOFTWARE.
-
-#ifndef NV_TT_COMPRESSORDXT_H
-#define NV_TT_COMPRESSORDXT_H
-
-#include <nvcore/nvcore.h>
-#include "nvtt.h"
-#include "Compressor.h"
-
-namespace nv
-{
-	struct ColorBlock;
-
-	struct FixedBlockCompressor : public CompressorInterface
-	{
-		virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, const void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
-
-		virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0;
-		virtual uint blockSize() const = 0;
-	};
-
-
-	// Fast CPU compressors.
-	struct FastCompressorDXT1 : public FixedBlockCompressor
-	{
-		virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
-		virtual uint blockSize() const { return 8; }
-	};
-
-	struct FastCompressorDXT1a : public FixedBlockCompressor
-	{
-		virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
-		virtual uint blockSize() const { return 8; }
-	};
-
-	struct FastCompressorDXT3 : public FixedBlockCompressor
-	{
-		virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
-		virtual uint blockSize() const { return 16; }
-	};
-
-	struct FastCompressorDXT5 : public FixedBlockCompressor
-	{
-		virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
-		virtual uint blockSize() const { return 16; }
-	};
-
-	struct FastCompressorDXT5n : public FixedBlockCompressor
-	{
-		virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
-		virtual uint blockSize() const { return 16; }
-	};
-
-	struct FastCompressorBC4 : public FixedBlockCompressor
-	{
-		virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
-		virtual uint blockSize() const { return 8; }
-	};
-
-	struct FastCompressorBC5 : public FixedBlockCompressor
-	{
-		virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
-		virtual uint blockSize() const { return 16; }
-	};
-
-
-	// Normal CPU compressors.
-	struct NormalCompressorDXT1 : public FixedBlockCompressor
-	{
-		virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
-		virtual uint blockSize() const { return 8; }
-	};
-
-	struct NormalCompressorDXT1a : public FixedBlockCompressor
-	{
-		virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
-		virtual uint blockSize() const { return 8; }
-	};
-
-	struct NormalCompressorDXT3 : public FixedBlockCompressor
-	{
-		virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
-		virtual uint blockSize() const { return 16; }
-	};
-
-	struct NormalCompressorDXT5 : public FixedBlockCompressor
-	{
-		virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
-		virtual uint blockSize() const { return 16; }
-	};
-
-	struct NormalCompressorDXT5n : public FixedBlockCompressor
-	{
-		virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
-		virtual uint blockSize() const { return 16; }
-	};
-
-
-	// Production CPU compressors.
-	struct ProductionCompressorBC4 : public FixedBlockCompressor
-	{
-		virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
-		virtual uint blockSize() const { return 8; }
-	};
-
-	struct ProductionCompressorBC5 : public FixedBlockCompressor
-	{
-		virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
-		virtual uint blockSize() const { return 16; }
-	};
-
-
-	// External compressors.
-#if defined(HAVE_S3QUANT)
-	struct S3CompressorDXT1 : public CompressorInterface
-	{
-		virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
-	};
-#endif
-	
-#if defined(HAVE_ATITC)
-	struct AtiCompressorDXT1 : public CompressorInterface
-	{
-		virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
-	};
-
-	struct AtiCompressorDXT5 : public CompressorInterface
-	{
-		virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
-	};
-#endif
-
-#if defined(HAVE_SQUISH)
-	struct SquishCompressorDXT1 : public CompressorInterface
-	{
-		virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
-	};
-#endif
-
-#if defined(HAVE_D3DX)
-	struct D3DXCompressorDXT1 : public CompressorInterface
-	{
-		virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
-	};
-#endif
-
-#if defined(HAVE_STB)
-	struct StbCompressorDXT1 : public FixedBlockCompressor
-	{
-		virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
-		virtual uint blockSize() const { return 8; }
-	};
-#endif
-
-} // nv namespace
-
-
-#endif // NV_TT_COMPRESSORDXT_H
--- a/src/nvtt/CompressorRGB.cpp
+++ b/src/nvtt/CompressorRGB.cpp
@ -1,230 +0,0 @@
-// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
-// 
-// Permission is hereby granted, free of charge, to any person
-// obtaining a copy of this software and associated documentation
-// files (the "Software"), to deal in the Software without
-// restriction, including without limitation the rights to use,
-// copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following
-// conditions:
-// 
-// The above copyright notice and this permission notice shall be
-// included in all copies or substantial portions of the Software.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-// OTHER DEALINGS IN THE SOFTWARE.
-
-#include "CompressorRGB.h"
-#include "CompressionOptions.h"
-#include "OutputOptions.h"
-
-#include <nvimage/Image.h>
-#include <nvimage/FloatImage.h>
-#include <nvimage/PixelFormat.h>
-
-#include <nvmath/Color.h>
-#include <nvmath/Half.h>
-
-#include <nvcore/Debug.h>
-
-using namespace nv;
-using namespace nvtt;
-
-namespace 
-{
-
-	inline uint computePitch(uint w, uint bitsize)
-	{
-		uint p = w * ((bitsize + 7) / 8);
-
-		// Align to 32 bits.
-		return ((p + 3) / 4) * 4;
-	}
-
-	inline void convert_to_a8r8g8b8(const void * src, void * dst, uint w)
-	{
-		memcpy(dst, src, 4 * w);
-	}
-
-	inline void convert_to_x8r8g8b8(const void * src, void * dst, uint w)
-	{
-		memcpy(dst, src, 4 * w);
-	}
-
-    static uint16 to_half(float f)
-    {
-	    union { float f; uint32 u; } c;
-        c.f = f;
-        return half_from_float(c.u);
-    }
-
-} // namespace
-
-
-
-void PixelFormatConverter::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, const void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
-{
-	uint bitCount;
-	uint rmask, rshift, rsize;
-	uint gmask, gshift, gsize;
-	uint bmask, bshift, bsize;
-	uint amask, ashift, asize;
-
-    if (compressionOptions.pixelType == nvtt::PixelType_Float)
-    {
-	    rsize = compressionOptions.rsize;
-	    gsize = compressionOptions.gsize;
-	    bsize = compressionOptions.bsize;
-	    asize = compressionOptions.asize;
-
-	    nvCheck(rsize == 0 || rsize == 16 || rsize == 32);
-	    nvCheck(gsize == 0 || gsize == 16 || gsize == 32);
-	    nvCheck(bsize == 0 || bsize == 16 || bsize == 32);
-	    nvCheck(asize == 0 || asize == 16 || asize == 32);
-
-	    bitCount = rsize + gsize + bsize + asize;
-    }
-    else
-    {
-	    if (compressionOptions.bitcount != 0)
-	    {
-		    bitCount = compressionOptions.bitcount;
-		    nvCheck(bitCount == 8 || bitCount == 16 || bitCount == 24 || bitCount == 32);
-
-		    rmask = compressionOptions.rmask;
-		    gmask = compressionOptions.gmask;
-		    bmask = compressionOptions.bmask;
-		    amask = compressionOptions.amask;
-
-		    PixelFormat::maskShiftAndSize(rmask, &rshift, &rsize);
-		    PixelFormat::maskShiftAndSize(gmask, &gshift, &gsize);
-		    PixelFormat::maskShiftAndSize(bmask, &bshift, &bsize);
-		    PixelFormat::maskShiftAndSize(amask, &ashift, &asize);
-	    }
-	    else
-	    {
-		    rsize = compressionOptions.rsize;
-		    gsize = compressionOptions.gsize;
-		    bsize = compressionOptions.bsize;
-		    asize = compressionOptions.asize;
-
-		    bitCount = rsize + gsize + bsize + asize;
-		    nvCheck(bitCount <= 32);
-
-		    ashift = 0;
-		    bshift = ashift + asize;
-		    gshift = bshift + bsize;
-		    rshift = gshift + gsize;
-
-		    rmask = ((1 << rsize) - 1) << rshift;
-		    gmask = ((1 << gsize) - 1) << gshift;
-		    bmask = ((1 << bsize) - 1) << bshift;
-		    amask = ((1 << asize) - 1) << ashift;
-	    }
-    }
-
-	uint byteCount = (bitCount + 7) / 8;
-    uint pitch = computePitch(w, bitCount);
-
-    uint srcPitch = w;
-    uint srcPlane = w * h;
-
-
-    // Allocate output scanline.
-	uint8 * dst = (uint8 *)mem::malloc(pitch + 4);
-
-	for (uint y = 0; y < h; y++)
-	{
-        const uint * src = (const uint *)data + y * srcPitch;
-        const float * fsrc = (const float *)data + y * srcPitch;
-
-        uint8 * ptr = dst;
-
-		for (uint x = 0; x < w; x++)
-		{
-            float r, g, b, a;
-
-            if (inputFormat == nvtt::InputFormat_BGRA_8UB) {
-                Color32 c = Color32(src[x]);
-                r = float(c.r) / 255.0f;
-                g = float(c.g) / 255.0f;
-                b = float(c.b) / 255.0f;
-                a = float(c.a) / 255.0f;
-            }
-            else {
-                nvDebugCheck (inputFormat == nvtt::InputFormat_RGBA_32F);
-
-			    //r = ((float *)src)[4 * x + 0]; // Color components not interleaved.
-			    //g = ((float *)src)[4 * x + 1];
-			    //b = ((float *)src)[4 * x + 2];
-			    //a = ((float *)src)[4 * x + 3];
-			    r = fsrc[x + 0 * srcPlane];
-			    g = fsrc[x + 1 * srcPlane];
-			    b = fsrc[x + 2 * srcPlane];
-			    a = fsrc[x + 3 * srcPlane];
-            }
-
-            if (compressionOptions.pixelType == nvtt::PixelType_Float)
-            {
-			    if (rsize == 32) *((float *)ptr) = r;
-			    else if (rsize == 16) *((uint16 *)ptr) = to_half(r);
-			    ptr += rsize / 8;
-
-			    if (gsize == 32) *((float *)ptr) = g;
-			    else if (gsize == 16) *((uint16 *)ptr) = to_half(g);
-			    ptr += gsize / 8;
-
-			    if (bsize == 32) *((float *)ptr) = b;
-			    else if (bsize == 16) *((uint16 *)ptr) = to_half(b);
-			    ptr += bsize / 8;
-
-			    if (asize == 32) *((float *)ptr) = a;
-			    else if (asize == 16) *((uint16 *)ptr) = to_half(a);
-			    ptr += asize / 8;
-            }
-            else
-            {
-                Color32 c;
-                if (compressionOptions.pixelType == nvtt::PixelType_UnsignedNorm) {
-                    c.r = uint8(clamp(r * 255, 0.0f, 255.0f));
-                    c.g = uint8(clamp(g * 255, 0.0f, 255.0f));
-                    c.b = uint8(clamp(b * 255, 0.0f, 255.0f));
-                    c.a = uint8(clamp(a * 255, 0.0f, 255.0f));
-                }
-                // @@ Add support for nvtt::PixelType_SignedInt, nvtt::PixelType_SignedNorm, nvtt::PixelType_UnsignedInt
-
-				uint p = 0;
-				p |= PixelFormat::convert(c.r, 8, rsize) << rshift;
-				p |= PixelFormat::convert(c.g, 8, gsize) << gshift;
-				p |= PixelFormat::convert(c.b, 8, bsize) << bshift;
-				p |= PixelFormat::convert(c.a, 8, asize) << ashift;
-				
-				// Output one byte at a time.
-				for (uint i = 0; i < byteCount; i++)
-				{
-					*(dst + x * byteCount + i) = (p >> (i * 8)) & 0xFF;
-				}
-            }
-        }
-
-		// Zero padding.
-		for (uint x = w * byteCount; x < pitch; x++)
-		{
-			*(dst + x) = 0;
-		}
-
-		if (outputOptions.outputHandler != NULL)
-		{
-			outputOptions.outputHandler->writeData(dst, pitch);
-		}
-    }
-
-	mem::free(dst);
-}
--- a/src/nvtt/CompressorRGBE.cpp
+++ b/src/nvtt/CompressorRGBE.cpp
@ -1,102 +0,0 @@
-// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
-// 
-// Permission is hereby granted, free of charge, to any person
-// obtaining a copy of this software and associated documentation
-// files (the "Software"), to deal in the Software without
-// restriction, including without limitation the rights to use,
-// copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following
-// conditions:
-// 
-// The above copyright notice and this permission notice shall be
-// included in all copies or substantial portions of the Software.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-// OTHER DEALINGS IN THE SOFTWARE.
-
-#include "CompressorRGBE.h"
-#include "CompressionOptions.h"
-#include "OutputOptions.h"
-
-#include <nvimage/Image.h>
-#include <nvimage/FloatImage.h>
-
-#include <nvmath/Color.h>
-
-#include <nvcore/Debug.h>
-
-using namespace nv;
-using namespace nvtt;
-
-static Color32 toRgbe8(float r, float g, float b)
-{
-    Color32 c;
-    float v = max(max(r, g), b);
-    if (v < 1e-32) {
-        c.r = c.g = c.b = c.a = 0;
-    }
-    else {
-        int e;
-        v = frexp(v, &e) * 256.0f / v;
-        c.r = uint8(clamp(r * v, 0.0f, 255.0f));
-        c.g = uint8(clamp(g * v, 0.0f, 255.0f));
-        c.b = uint8(clamp(b * v, 0.0f, 255.0f));
-        c.a = e + 128;
-    }
-
-    return c;
-}
-
-
-void CompressorRGBE::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, const void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
-{
-    nvDebugCheck (compressionOptions.format == nvtt::Format_RGBE);
-
-    uint srcPitch = w;
-    uint srcPlane = w * h;
-
-    // Allocate output scanline.
-	Color32 * dst = (Color32 *)mem::malloc(w);
-
-	for (uint y = 0; y < h; y++)
-	{
-        const uint * src = (const uint *)data + y * srcPitch;
-        const float * fsrc = (const float *)data + y * srcPitch;
-
-		for (uint x = 0; x < w; x++)
-		{
-            float r, g, b;
-
-            if (inputFormat == nvtt::InputFormat_BGRA_8UB) {
-                Color32 c = Color32(src[x]);
-                r = float(c.r) / 255.0f;
-                g = float(c.g) / 255.0f;
-                b = float(c.b) / 255.0f;
-            }
-            else {
-                nvDebugCheck (inputFormat == nvtt::InputFormat_RGBA_32F);
-
-			    // Color components not interleaved.
-			    r = fsrc[x + 0 * srcPlane];
-			    g = fsrc[x + 1 * srcPlane];
-			    b = fsrc[x + 2 * srcPlane];
-            }
-            
-            dst[x] = toRgbe8(r, g, b);
-        }
-
-		if (outputOptions.outputHandler != NULL)
-		{
-			outputOptions.outputHandler->writeData(dst, w * 4);
-		}
-    }
-
-	mem::free(dst);
-}
--- a/src/nvtt/Context.cpp
+++ b/src/nvtt/Context.cpp
--- a/src/nvtt/Context.h
+++ b/src/nvtt/Context.h
@ -1,87 +0,0 @@
-// Copyright NVIDIA Corporation 2008 -- Ignacio Castano <icastano@nvidia.com>
-// 
-// Permission is hereby granted, free of charge, to any person
-// obtaining a copy of this software and associated documentation
-// files (the "Software"), to deal in the Software without
-// restriction, including without limitation the rights to use,
-// copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following
-// conditions:
-// 
-// The above copyright notice and this permission notice shall be
-// included in all copies or substantial portions of the Software.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-// OTHER DEALINGS IN THE SOFTWARE.
-
-#ifndef NV_TT_CONTEXT_H
-#define NV_TT_CONTEXT_H
-
-#include "nvcore/Ptr.h"
-
-#include "nvtt/Compressor.h"
-#include "nvtt/cuda/CudaCompressorDXT.h"
-#include "nvtt.h"
-
-namespace nv
-{
-	class Image;
-}
-
-namespace nvtt
-{
-	struct Mipmap;
-
-	struct Compressor::Private
-	{
-		Private() {}
-
-		bool compress(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
-
-        bool compress2D(InputFormat inputFormat, AlphaMode alphaMode, int w, int h, const void * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
-
-		int estimateSize(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions) const;
-
-		bool outputHeader(const TexImage & tex, int mipmapCount, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions);
-
-	private:
-
-		bool outputHeader(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
-
-		nv::CompressorInterface * chooseCpuCompressor(const CompressionOptions::Private & compressionOptions) const;
-		nv::CompressorInterface * chooseGpuCompressor(const CompressionOptions::Private & compressionOptions) const;
-
-		bool compressMipmaps(uint f, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
-
-		bool initMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f, uint m) const;
-
-		int findExactMipmap(const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f) const;
-		int findClosestMipmap(const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f) const;
-
-		void downsampleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
-		void scaleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d) const;
-		void premultiplyAlphaMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
-		void processInputImage(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
-		void quantizeMipmap(Mipmap & mipmap, const CompressionOptions::Private & compressionOptions) const;
-
-
-	public:
-
-		bool cudaSupported;
-		bool cudaEnabled;
-
-		nv::AutoPtr<nv::CudaContext> cuda;
-
-	};
-
-} // nvtt namespace
-
-
-#endif // NV_TT_CONTEXT_H
--- a/src/nvtt/InputOptions.cpp
+++ b/src/nvtt/InputOptions.cpp
@ -23,11 +23,8 @@

 #include <string.h> // memcpy

-#include <nvcore/Containers.h> // nextPowerOfTwo
 #include <nvcore/Memory.h>

-#include <nvmath/Color.h>
-
 #include "nvtt.h"
 #include "InputOptions.h"

@ -104,8 +101,6 @@ void InputOptions::reset()
 	
 	m.colorTransform = ColorTransform_None;
 	m.linearTransform = Matrix(identity);
-	for (int i = 0; i < 4; i++) m.colorOffsets[i] = 0;
-	for (int i = 0; i < 4; i++) m.swizzleTransform[i] = i;

 	m.generateMipmaps = true;
 	m.maxLevel = -1;
@ -123,8 +118,6 @@ void InputOptions::reset()
 	
 	m.maxExtent = 0;
 	m.roundMode = RoundMode_None;
-
-	m.premultiplyAlpha = false;
 }


@ -168,8 +161,7 @@ void InputOptions::setTextureLayout(TextureType type, int width, int height, int
 			img.mipLevel = mipLevel;
 			img.face = f;
 			
-			img.uint8data = NULL;
-			img.floatdata = NULL;
+			img.data = NULL;
 			
 			w = max(1U, w / 2);
 			h = max(1U, h / 2);
@ -207,116 +199,14 @@ bool InputOptions::setMipmapData(const void * data, int width, int height, int d
 		return false;
 	}
 	
-	switch(m.inputFormat)
-	{
-		case InputFormat_BGRA_8UB:
-			if (Image * image = new nv::Image())
-			{
-				image->allocate(width, height);
-				memcpy(image->pixels(), data, width * height * 4);
-				m.images[idx].uint8data = image;
-			}
-			else
-			{
-				// @@ Out of memory error.
-				return false;
-			}
-			break;
-		case InputFormat_RGBA_32F:
-			if (FloatImage * image = new nv::FloatImage())
-			{
-				const float * floatData = (const float *)data;
-				image->allocate(4, width, height);
-				
-				for (int c = 0; c < 4; c++)
-				{
-					float * channel = image->channel(c);
-					for (int i = 0; i < width * height; i++)
-					{
-						channel[i] = floatData[i*4 + c];
-					}
-				}
-				
-				m.images[idx].floatdata = image;
-			}
-			else
-			{
-				// @@ Out of memory error.
-				return false;
-			}
-			break;
-		default:
-			return false;
-	}
+	m.images[idx].data = new nv::Image();
+	m.images[idx].data->allocate(width, height);
+	memcpy(m.images[idx].data->pixels(), data, width * height * 4); 
 	
 	return true;
 }


-// Copies data 
-bool InputOptions::setMipmapChannelData(const void * data, int channel, int width, int height, int depth /*= 1*/, int face /*= 0*/, int mipLevel /*= 0*/)
-{
-	nvCheck(depth == 1);
-	nvCheck(channel >= 0 && channel < 4);
-	
-	const int idx = face * m.mipmapCount + mipLevel;
-	
-	if (m.images[idx].width != width || m.images[idx].height != height || m.images[idx].depth != depth || m.images[idx].mipLevel != mipLevel || m.images[idx].face != face)
-	{
-		// Invalid dimension or index.
-		return false;
-	}
-	
-	// Allocate image if not allocated already.
-	if (m.inputFormat == InputFormat_BGRA_8UB)
-	{
-		m.images[idx].floatdata = NULL;
-		if (m.images[idx].uint8data == NULL)
-		{
-			m.images[idx].uint8data = new Image();
-			m.images[idx].uint8data->allocate(width, height);
-			m.images[idx].uint8data->fill(Color32(0,0,0,0));
-		}
-	}
-	else if (m.inputFormat == InputFormat_RGBA_32F)
-	{
-		m.images[idx].uint8data = NULL;
-		if (m.images[idx].floatdata == NULL)
-		{
-			m.images[idx].floatdata = new FloatImage();
-			m.images[idx].floatdata->allocate(4, width, height);
-			m.images[idx].floatdata->clear();
-		}
-
-		
-	}
-	else
-	{
-		m.images[idx].floatdata = NULL;
-		m.images[idx].uint8data = NULL;
-		return false;
-	}
-
-	// Copy channel data to image.
-	if (m.inputFormat == InputFormat_BGRA_8UB)
-	{
-		// @@ TODO
-	}
-	else if (m.inputFormat == InputFormat_RGBA_32F)
-	{
-		const float * floatData = (const float *)data;
-		float * channelPtr = m.images[idx].floatdata->channel(channel);
-
-		for (int i = 0; i < width * height; i++)
-		{
-			channelPtr[i] = floatData[i];
-		}
-	}
-
-	return true;
-}
-
-
 /// Describe the format of the input.
 void InputOptions::setFormat(InputFormat format)
 {
@ -411,32 +301,8 @@ void InputOptions::setLinearTransform(int channel, float w0, float w1, float w2,
 {
 	nvCheck(channel >= 0 && channel < 4);

-	m.linearTransform(channel, 0) = w0;
-	m.linearTransform(channel, 1) = w1;
-	m.linearTransform(channel, 2) = w2;
-	m.linearTransform(channel, 3) = w3;
-}
-
-void InputOptions::setLinearTransform(int channel, float w0, float w1, float w2, float w3, float offset)
-{
-	nvCheck(channel >= 0 && channel < 4);
-
-	setLinearTransform(channel, w0, w1, w2, w3);
-
-	m.colorOffsets[channel] = offset;
-}
-
-void InputOptions::setSwizzleTransform(int x, int y, int z, int w)
-{
-	nvCheck(x >= 0 && x <= 6);
-	nvCheck(y >= 0 && y <= 6);
-	nvCheck(z >= 0 && z <= 6);
-	nvCheck(w >= 0 && w <= 6);
-	
-	m.swizzleTransform[0] = x;
-	m.swizzleTransform[1] = y;
-	m.swizzleTransform[2] = z;
-	m.swizzleTransform[3] = w;
+	Vector4 w(w0, w1, w2, w3);
+	//m.linearTransform.setRow(channel, w);
 }

 void InputOptions::setMaxExtents(int e)
@ -450,10 +316,6 @@ void InputOptions::setRoundMode(RoundMode mode)
 	m.roundMode = mode;
 }

-void InputOptions::setPremultiplyAlpha(bool b)
-{
-	m.premultiplyAlpha = b;
-}

 void InputOptions::Private::computeTargetExtents() const
 {
@ -533,7 +395,7 @@ const Image * InputOptions::Private::image(uint face, uint mipmap) const
 	nvDebugCheck(image.face == face);
 	nvDebugCheck(image.mipLevel == mipmap);

-	return image.uint8data.ptr();
+	return image.data.ptr();
 }

 const Image * InputOptions::Private::image(uint idx) const
@ -542,14 +404,5 @@ const Image * InputOptions::Private::image(uint idx) const

 	const InputImage & image = this->images[idx];

-	return image.uint8data.ptr();
-}
-
-const FloatImage * InputOptions::Private::floatImage(uint idx) const
-{
-	nvDebugCheck(idx < faceCount * mipmapCount);
-
-	const InputImage & image = this->images[idx];
-
-	return image.floatdata.ptr();
+	return image.data.ptr();
 }
--- a/src/nvtt/InputOptions.h
+++ b/src/nvtt/InputOptions.h
@ -28,7 +28,6 @@
 #include <nvmath/Vector.h>
 #include <nvmath/Matrix.h>
 #include <nvimage/Image.h>
-#include <nvimage/FloatImage.h>
 #include "nvtt.h"

 namespace nvtt
@ -57,8 +56,6 @@ namespace nvtt
 		// Color transform.
 		ColorTransform colorTransform;
 		nv::Matrix linearTransform;
-		float colorOffsets[4];
-		uint swizzleTransform[4];
 		
 		// Mipmap generation options.
 		bool generateMipmaps;
@ -81,8 +78,6 @@ namespace nvtt
 		uint maxExtent;
 		RoundMode roundMode;
 		
-		bool premultiplyAlpha;
-
 		// @@ These are computed in nvtt::compress, so they should be mutable or stored elsewhere...
 		mutable uint targetWidth;
 		mutable uint targetHeight;
@ -94,9 +89,7 @@ namespace nvtt
 		int realMipmapCount() const;
 		
 		const nv::Image * image(uint face, uint mipmap) const;
-		const nv::Image * image(uint idx) const;
-
-		const nv::FloatImage * floatImage(uint idx) const;
+		const nv::Image * image(uint idx) const;

 	};

@ -105,8 +98,6 @@ namespace nvtt
 	{
 		InputImage() {}
 		
-		bool hasValidData() const { return uint8data != NULL || floatdata != NULL; }
-		
 		int mipLevel;
 		int face;
 		
@ -114,8 +105,7 @@ namespace nvtt
 		int height;
 		int depth;
 		
-		nv::AutoPtr<nv::Image> uint8data;
-		nv::AutoPtr<nv::FloatImage> floatdata;
+		nv::AutoPtr<nv::Image> data;
 	};

 } // nvtt namespace
--- a/src/nvtt/OptimalCompressDXT.cpp
+++ b/src/nvtt/OptimalCompressDXT.cpp
@ -21,17 +21,16 @@
 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 // OTHER DEALINGS IN THE SOFTWARE.

-#include "OptimalCompressDXT.h"
-#include "SingleColorLookup.h"
+#include <nvcore/Containers.h> // swap
+
+#include <nvmath/Color.h>

 #include <nvimage/ColorBlock.h>
 #include <nvimage/BlockDXT.h>

-#include <nvmath/Color.h>
+#include "OptimalCompressDXT.h"
+#include "SingleColorLookup.h"

-#include <nvcore/Containers.h> // swap
-
-#include <limits.h>

 using namespace nv;
 using namespace OptimalCompress;
@ -40,37 +39,10 @@ using namespace OptimalCompress;

 namespace
 {
-	static int greenDistance(int g0, int g1)
-	{
-		//return abs(g0 - g1);
-		int d = g0 - g1;
-		return d * d;
-	}
-
-	static int alphaDistance(int a0, int a1)
-	{
-		//return abs(a0 - a1);
-		int d = a0 - a1;
-		return d * d;
-	}
-
-	static uint nearestGreen4(uint green, uint maxGreen, uint minGreen)
-	{
-		uint bias = maxGreen + (maxGreen - minGreen) / 6;
-
-		uint index = 0;
-		if (maxGreen - minGreen != 0) index = clamp(3 * (bias - green) / (maxGreen - minGreen), 0U, 3U);
-
-		return (index * minGreen + (3 - index) * maxGreen) / 3;
-	}
-
-	static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block, int bestError = INT_MAX)
+	static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block)
 	{
 		nvDebugCheck(block != NULL);

-	//	uint g0 = (block->col0.g << 2) | (block->col0.g >> 4);
-	//	uint g1 = (block->col1.g << 2) | (block->col1.g >> 4);
-
 		int palette[4];
 		palette[0] = (block->col0.g << 2) | (block->col0.g >> 4);
 		palette[1] = (block->col1.g << 2) | (block->col1.g >> 4);
@ -78,24 +50,17 @@ namespace
 		palette[3] = (2 * palette[1] + palette[0]) / 3;

 		int totalError = 0;
+
 		for (int i = 0; i < 16; i++)
 		{
 			const int green = rgba.color(i).g;
 			
-			int error = greenDistance(green, palette[0]);
-			error = min(error, greenDistance(green, palette[1]));
-			error = min(error, greenDistance(green, palette[2]));
-			error = min(error, greenDistance(green, palette[3]));
-
+			int error = abs(green - palette[0]);
+			error = min(error, abs(green - palette[1]));
+			error = min(error, abs(green - palette[2]));
+			error = min(error, abs(green - palette[3]));
+			
 			totalError += error;
-
-		//	totalError += nearestGreen4(green, g0, g1);
-
-			if (totalError > bestError)
-			{
-				// early out
-				return totalError;
-			}
 		}

 		return totalError;
@ -113,10 +78,10 @@ namespace
 		{
 			const int color = rgba.color(i).g;
 			
-			uint d0 = greenDistance(color0, color);
-			uint d1 = greenDistance(color1, color);
-			uint d2 = greenDistance(color2, color);
-			uint d3 = greenDistance(color3, color);
+			uint d0 = abs(color0 - color);
+			uint d1 = abs(color1 - color);
+			uint d2 = abs(color2 - color);
+			uint d3 = abs(color3 - color);
 			
 			uint b0 = d0 > d3;
 			uint b1 = d1 > d2;
@ -137,78 +102,49 @@ namespace
 	// Choose quantized color that produces less error. Used by DXT3 compressor.
 	inline static uint quantize4(uint8 a)
 	{
-		int q0 = max(int(a >> 4) - 1, 0);
+		int q0 = (a >> 4) - 1;
 		int q1 = (a >> 4);
-		int q2 = min(int(a >> 4) + 1, 0xF);
+		int q2 = (a >> 4) + 1;
 		
 		q0 = (q0 << 4) | q0;
 		q1 = (q1 << 4) | q1;
 		q2 = (q2 << 4) | q2;
 		
-		int d0 = alphaDistance(q0, a);
-		int d1 = alphaDistance(q1, a);
-		int d2 = alphaDistance(q2, a);
+		int d0 = abs(q0 - a);
+		int d1 = abs(q1 - a);
+		int d2 = abs(q2 - a);

 		if (d0 < d1 && d0 < d2) return q0 >> 4;
 		if (d1 < d2) return q1 >> 4;
 		return q2 >> 4;
 	}
 	
-	static uint nearestAlpha8(uint alpha, uint maxAlpha, uint minAlpha)
-	{
-		float bias = maxAlpha + float(maxAlpha - minAlpha) / (2.0f * 7.0f);
-		float scale = 7.0f / float(maxAlpha - minAlpha);
-
-		uint index = (uint)clamp((bias - float(alpha)) * scale, 0.0f, 7.0f);
-
-		return (index * minAlpha + (7 - index) * maxAlpha) / 7;
-	}
-
-	static uint computeAlphaError8(const ColorBlock & rgba, const AlphaBlockDXT5 * block, int bestError = INT_MAX)
-	{
-		int totalError = 0;
-
-		for (uint i = 0; i < 16; i++)
-		{
-			uint8 alpha = rgba.color(i).a;
-
-			totalError += alphaDistance(alpha, nearestAlpha8(alpha, block->alpha0, block->alpha1));
-
-			if (totalError > bestError)
-			{
-				// early out
-				return totalError;
-			}
-		}
-
-		return totalError;
-	}
-
-	static uint computeAlphaError(const ColorBlock & rgba, const AlphaBlockDXT5 * block, int bestError = INT_MAX)
+	static uint computeAlphaError(const ColorBlock & rgba, const AlphaBlockDXT5 * block)
 	{
 		uint8 alphas[8];
 		block->evaluatePalette(alphas);

-		int totalError = 0;
+		uint totalError = 0;

 		for (uint i = 0; i < 16; i++)
 		{
 			uint8 alpha = rgba.color(i).a;

-			int minDist = INT_MAX;
+			uint besterror = 256*256;
+			uint best;
 			for (uint p = 0; p < 8; p++)
 			{
-				int dist = alphaDistance(alpha, alphas[p]);
-				minDist = min(dist, minDist);
+				int d = alphas[p] - alpha;
+				uint error = d * d;
+
+				if (error < besterror)
+				{
+					besterror = error;
+					best = p;
+				}
 			}

-			totalError += minDist;
-
-			if (totalError > bestError)
-			{
-				// early out
-				return totalError;
-			}
+			totalError += besterror;
 		}

 		return totalError;
@ -223,21 +159,22 @@ namespace
 		{
 			uint8 alpha = rgba.color(i).a;

-			int minDist = INT_MAX;
-			int bestIndex = 8;
-			for (uint p = 0; p < 8; p++)
+			uint besterror = 256*256;
+			uint best = 8;
+			for(uint p = 0; p < 8; p++)
 			{
-				int dist = alphaDistance(alpha, alphas[p]);
+				int d = alphas[p] - alpha;
+				uint error = d * d;

-				if (dist < minDist)
+				if (error < besterror)
 				{
-					minDist = dist;
-					bestIndex = p;
+					besterror = error;
+					best = p;
 				}
 			}
-			nvDebugCheck(bestIndex < 8);
+			nvDebugCheck(best < 8);

-			block->setIndex(i, bestIndex);
+			block->setIndex(i, best);
 		}
 	}

@ -280,23 +217,6 @@ void OptimalCompress::compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock)
 	}
 }

-void OptimalCompress::compressDXT1G(uint8 g, BlockDXT1 * dxtBlock)
-{
-	dxtBlock->col0.r = 31;
-	dxtBlock->col0.g = OMatch6[g][0];
-	dxtBlock->col0.b = 0;
-	dxtBlock->col1.r = 31;
-	dxtBlock->col1.g = OMatch6[g][1];
-	dxtBlock->col1.b = 0;
-	dxtBlock->indices = 0xaaaaaaaa;
-
-	if (dxtBlock->col0.u < dxtBlock->col1.u)
-	{
-		swap(dxtBlock->col0.u, dxtBlock->col1.u);
-		dxtBlock->indices ^= 0x55555555;
-	}
-}
-

 // Brute force green channel compressor
 void OptimalCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block)
@ -306,23 +226,12 @@ void OptimalCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block)
 	uint8 ming = 63;
 	uint8 maxg = 0;
 	
-	bool isSingleColor = true;
-	uint8 singleColor = rgba.color(0).g;
-
 	// Get min/max green.
 	for (uint i = 0; i < 16; i++)
 	{
-		uint8 green = (rgba.color(i).g + 1) >> 2;
+		uint8 green = rgba.color(i).g >> 2;
 		ming = min(ming, green);
 		maxg = max(maxg, green);
-
-		if (rgba.color(i).g != singleColor) isSingleColor = false;
-	}
-
-	if (isSingleColor)
-	{
-		compressDXT1G(singleColor, block);
-		return;
 	}

 	block->col0.r = 31;
@ -332,38 +241,36 @@ void OptimalCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block)
 	block->col0.b = 0;
 	block->col1.b = 0;

-	int bestError = computeGreenError(rgba, block);
-	int bestg0 = maxg;
-	int bestg1 = ming;
-
-	// Expand search space a bit.
-	const int greenExpand = 4;
-	ming = (ming <= greenExpand) ? 0 : ming - greenExpand;
-	maxg = (maxg >= 63-greenExpand) ? 63 : maxg + greenExpand;
-
-	for (int g0 = ming+1; g0 <= maxg; g0++)
+	if (maxg - ming > 4)
 	{
-		for (int g1 = ming; g1 < g0; g1++)
+		int besterror = computeGreenError(rgba, block);
+		int bestg0 = maxg;
+		int bestg1 = ming;
+		
+		for (int g0 = ming+5; g0 < maxg; g0++)
 		{
-			block->col0.g = g0;
-			block->col1.g = g1;
-			int error = computeGreenError(rgba, block, bestError);
-			
-			if (error < bestError)
+			for (int g1 = ming; g1 < g0-4; g1++)
 			{
-				bestError = error;
-				bestg0 = g0;
-				bestg1 = g1;
+				if ((maxg-g0) + (g1-ming) > besterror)
+					continue;
+				
+				block->col0.g = g0;
+				block->col1.g = g1;
+				int error = computeGreenError(rgba, block);
+				
+				if (error < besterror)
+				{
+					besterror = error;
+					bestg0 = g0;
+					bestg1 = g1;
+				}
 			}
 		}
+		
+		block->col0.g = bestg0;
+		block->col1.g = bestg1;
 	}
 	
-	block->col0.g = bestg0;
-	block->col1.g = bestg1;
-
-	nvDebugCheck(bestg0 == bestg1 || block->isFourColorMode());
-
-
 	Color32 palette[4];
 	block->evaluatePalette(palette);
 	block->indices = computeGreenIndices(rgba, palette);
@ -406,26 +313,42 @@ void OptimalCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dx
 	dxtBlock->alpha0 = maxa;
 	dxtBlock->alpha1 = mina;

+	/*int centroidDist = 256;
+	int centroid;
+
+	// Get the closest to the centroid.
+	for (uint i = 0; i < 16; i++)
+	{
+		uint8 alpha = rgba.color(i).a;
+		int dist = abs(alpha - (maxa + mina) / 2);
+		if (dist < centroidDist)
+		{
+			centroidDist = dist;
+			centroid = alpha;
+		}
+	}*/
+
 	if (maxa - mina > 8)
 	{
 		int besterror = computeAlphaError(rgba, dxtBlock);
 		int besta0 = maxa;
 		int besta1 = mina;

-		// Expand search space a bit.
-		const int alphaExpand = 8;
-		mina = (mina <= alphaExpand) ? 0 : mina - alphaExpand;
-		maxa = (maxa >= 255-alphaExpand) ? 255 : maxa + alphaExpand;
-
 		for (int a0 = mina+9; a0 < maxa; a0++)
 		{
 			for (int a1 = mina; a1 < a0-8; a1++)
+			//for (int a1 = mina; a1 < maxa; a1++)
 			{
-				nvDebugCheck(a0 - a1 > 8);
+				//nvCheck(abs(a1-a0) > 8);
+
+				//if (abs(a0 - a1) < 8) continue;
+				//if ((maxa-a0) + (a1-mina) + min(abs(centroid-a0), abs(centroid-a1)) > besterror)
+				if ((maxa-a0) + (a1-mina) > besterror)
+					continue;

 				dxtBlock->alpha0 = a0;
 				dxtBlock->alpha1 = a1;
-				int error = computeAlphaError(rgba, dxtBlock, besterror);
+				int error = computeAlphaError(rgba, dxtBlock);

 				if (error < besterror)
 				{
--- a/src/nvtt/OptimalCompressDXT.h
+++ b/src/nvtt/OptimalCompressDXT.h
@ -26,8 +26,6 @@

 #include <nvimage/nvimage.h>

-#include <nvmath/Color.h>
-
 namespace nv
 {
 	struct ColorBlock;
@ -41,7 +39,6 @@ namespace nv
 	{
 		void compressDXT1(Color32 rgba, BlockDXT1 * dxtBlock);
 		void compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock);
-		void compressDXT1G(uint8 g, BlockDXT1 * dxtBlock);
 		
 		void compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block);
 		void compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock);
--- a/src/nvtt/OutputOptions.cpp
+++ b/src/nvtt/OutputOptions.cpp
@ -33,9 +33,6 @@ OutputOptions::OutputOptions() : m(*new OutputOptions::Private())

 OutputOptions::~OutputOptions()
 {
-	// Cleanup output handler.
-	setOutputHandler(NULL);
-
 	delete &m;
 }

@ -46,31 +43,20 @@ void OutputOptions::reset()
 	m.outputHandler = NULL;
 	m.errorHandler = NULL;
 	m.outputHeader = true;
-	m.container = Container_DDS;
 }


 /// Set output file name.
 void OutputOptions::setFileName(const char * fileName)
 {
-	m.fileName = fileName; // @@ Do we need to record filename?
+	m.fileName = fileName;
 	m.outputHandler = NULL;
-
-	DefaultOutputHandler * oh = new DefaultOutputHandler(fileName);
-	if (!oh->stream.isError())
-	{
-		m.outputHandler = oh;
-	}
 }

 /// Set output handler.
 void OutputOptions::setOutputHandler(OutputHandler * outputHandler)
 {
-	if (!m.fileName.isNull())
-	{
-		delete m.outputHandler;
-		m.fileName.reset();
-	}
+	m.fileName.reset();
 	m.outputHandler = outputHandler;
 }

@ -86,20 +72,31 @@ void OutputOptions::setOutputHeader(bool outputHeader)
 	m.outputHeader = outputHeader;
 }

-/// Set container.
-void OutputOptions::setContainer(Container container)
-{
-	m.container = container;
-}

-
-bool OutputOptions::Private::hasValidOutputHandler() const
+bool OutputOptions::Private::openFile() const
 {
 	if (!fileName.isNull())
 	{
-		return outputHandler != NULL;
+		nvCheck(outputHandler == NULL);
+		
+		DefaultOutputHandler * oh = new DefaultOutputHandler(fileName.str());
+		if (oh->stream.isError())
+		{
+			return false;
+		}
+		
+		outputHandler = oh;
 	}
 	
 	return true;
 }

+void OutputOptions::Private::closeFile() const
+{
+	if (!fileName.isNull())
+	{
+		delete outputHandler;
+		outputHandler = NULL;
+	}
+}
+
--- a/src/nvtt/OutputOptions.h
+++ b/src/nvtt/OutputOptions.h
@ -52,7 +52,7 @@ namespace nvtt
 			//return !stream.isError();
 			return true;
 		}
-
+		
 		nv::StdOutputStream stream;
 	};
 	
@ -61,12 +61,12 @@ namespace nvtt
 	{
 		nv::Path fileName;
 		
-		OutputHandler * outputHandler;
+		mutable OutputHandler * outputHandler;
 		ErrorHandler * errorHandler;
 		bool outputHeader;
-		Container container;
 		
-		bool hasValidOutputHandler() const;
+		bool openFile() const;
+		void closeFile() const;
 	};

 	
--- a/src/nvtt/QuickCompressDXT.cpp
+++ b/src/nvtt/QuickCompressDXT.cpp
@ -21,10 +21,7 @@
 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 // OTHER DEALINGS IN THE SOFTWARE.

-#include <nvcore/Containers.h> // swap
-
 #include <nvmath/Color.h>
-#include <nvmath/Fitting.h>

 #include <nvimage/ColorBlock.h>
 #include <nvimage/BlockDXT.h>
@ -133,7 +130,7 @@ inline static float colorDistance(Vector3::Arg c0, Vector3::Arg c1)
 	return dot(c0-c1, c0-c1);
 }

-inline static uint computeIndices4(const Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor)
+inline static uint computeIndices4(Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor)
 {
 	Vector3 palette[4];
 	palette[0] = maxColor;
@ -165,28 +162,6 @@ inline static uint computeIndices4(const Vector3 block[16], Vector3::Arg maxColo
 	return indices;
 }

-inline static float evaluatePaletteError4(const Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor)
-{
-	Vector3 palette[4];
-	palette[0] = maxColor;
-	palette[1] = minColor;
-	palette[2] = lerp(palette[0], palette[1], 1.0f / 3.0f);
-	palette[3] = lerp(palette[0], palette[1], 2.0f / 3.0f);
-	
-	float total = 0.0f;
-	for (int i = 0; i < 16; i++)
-	{
-		float d0 = colorDistance(palette[0], block[i]);
-		float d1 = colorDistance(palette[1], block[i]);
-		float d2 = colorDistance(palette[2], block[i]);
-		float d3 = colorDistance(palette[3], block[i]);
-
-		total += min(min(d0, d1), min(d2, d3));
-	}
-
-	return total;
-}
-
 inline static uint computeIndices3(const ColorBlock & rgba, Vector3::Arg maxColor, Vector3::Arg minColor)
 {
 	Vector3 palette[4];
@ -475,8 +450,7 @@ void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
 		// read block
 		Vector3 block[16];
 		extractColorBlockRGB(rgba, block);
-
-#if 1
+		
 		// find min and max colors
 		Vector3 maxColor, minColor;
 		findMinMaxColorsBox(block, 16, &maxColor, &minColor);
@ -484,31 +458,7 @@ void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
 		selectDiagonal(block, 16, &maxColor, &minColor);
 		
 		insetBBox(&maxColor, &minColor);
-#else
-		float weights[16] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
-		Vector3 cluster[4];
-		int count = Compute4Means(16, block, weights, Vector3(1, 1, 1), cluster);
-
-		Vector3 maxColor, minColor;
-		float bestError = FLT_MAX;
-
-		for (int i = 1; i < 4; i++)
-		{
-			for (int j = 0; j < i; j++)
-			{
-		        uint16 color0 = roundAndExpand(&cluster[i]);
-		        uint16 color1 = roundAndExpand(&cluster[j]);
-
-				float error = evaluatePaletteError4(block, cluster[i], cluster[j]);
-				if (error < bestError) {
-					bestError = error;
-					maxColor = cluster[i];
-					minColor = cluster[j];
-				}
-			}
-		}
-#endif
-
+		
 		uint16 color0 = roundAndExpand(&maxColor);
 		uint16 color1 = roundAndExpand(&minColor);

--- a/src/nvtt/TexImage.cpp
+++ b/src/nvtt/TexImage.cpp
--- a/src/nvtt/TexImage.h
+++ b/src/nvtt/TexImage.h
@ -1,79 +0,0 @@
-// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
-// 
-// Permission is hereby granted, free of charge, to any person
-// obtaining a copy of this software and associated documentation
-// files (the "Software"), to deal in the Software without
-// restriction, including without limitation the rights to use,
-// copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following
-// conditions:
-// 
-// The above copyright notice and this permission notice shall be
-// included in all copies or substantial portions of the Software.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-// OTHER DEALINGS IN THE SOFTWARE.
-
-#ifndef NV_TT_TEXIMAGE_H
-#define NV_TT_TEXIMAGE_H
-
-#include "nvtt.h"
-
-#include <nvcore/Containers.h>
-#include <nvcore/RefCounted.h>
-#include <nvcore/Ptr.h>
-
-#include <nvimage/Image.h>
-#include <nvimage/FloatImage.h>
-
-namespace nvtt
-{
-
-	struct TexImage::Private : public nv::RefCounted
-	{
-		Private()
-		{
-			type = TextureType_2D;
-			wrapMode = WrapMode_Mirror;
-			alphaMode = AlphaMode_None;
-			isNormalMap = false;
-
-			imageArray.resize(1, NULL);
-		}
-		Private(const Private & p) // Copy ctor. inits refcount to 0.
-		{
-			type = p.type;
-			wrapMode = p.wrapMode;
-			alphaMode = p.alphaMode;
-			isNormalMap = p.isNormalMap;
-
-			imageArray = p.imageArray;
-		}
-		~Private()
-		{
-			const uint count = imageArray.count();
-			for (uint i = 0; i < count; i++) {
-				delete imageArray[i];
-			}
-		}
-
-		TextureType type;
-		WrapMode wrapMode;
-		AlphaMode alphaMode;
-		bool isNormalMap;
-
-		nv::Array<nv::FloatImage *> imageArray;
-	};
-
-	
-} // nvtt namespace
-
-
-#endif // NV_TT_TEXIMAGE_H
--- a/src/nvtt/cuda/BitmapTable.h
+++ b/src/nvtt/cuda/BitmapTable.h
--- a/src/nvtt/cuda/Bitmaps.h
+++ b/src/nvtt/cuda/Bitmaps.h
--- a/src/nvtt/cuda/CompressKernel.cu
+++ b/src/nvtt/cuda/CompressKernel.cu
--- a/src/nvtt/cuda/CudaCompressDXT.cpp
+++ b/src/nvtt/cuda/CudaCompressDXT.cpp
@ -0,0 +1,380 @@
+// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
+// 
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
+// 
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+
+#include <nvcore/Debug.h>
+#include <nvcore/Containers.h>
+#include <nvmath/Color.h>
+#include <nvimage/Image.h>
+#include <nvimage/ColorBlock.h>
+#include <nvimage/BlockDXT.h>
+#include <nvtt/CompressionOptions.h>
+#include <nvtt/OutputOptions.h>
+#include <nvtt/QuickCompressDXT.h>
+#include <nvtt/OptimalCompressDXT.h>
+
+#include "CudaCompressDXT.h"
+#include "CudaUtils.h"
+
+
+#if defined HAVE_CUDA
+#include <cuda_runtime_api.h>
+#endif
+
+#include <time.h>
+#include <stdio.h>
+
+using namespace nv;
+using namespace nvtt;
+
+#if defined HAVE_CUDA
+
+#define MAX_BLOCKS 8192U // 32768, 65535
+
+
+extern "C" void setupCompressKernel(const float weights[3]);
+extern "C" void compressKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
+extern "C" void compressKernelDXT1_Level4(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
+extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
+
+#include "Bitmaps.h"	// @@ Rename to BitmapTable.h
+
+// Convert linear image to block linear.
+static void convertToBlockLinear(const Image * image, uint * blockLinearImage)
+{
+	const uint w = (image->width() + 3) / 4;
+	const uint h = (image->height() + 3) / 4;
+
+	for(uint by = 0; by < h; by++) {
+		for(uint bx = 0; bx < w; bx++) {
+			const uint bw = min(image->width() - bx * 4, 4U);
+			const uint bh = min(image->height() - by * 4, 4U);
+
+			for (uint i = 0; i < 16; i++) {
+				const int x = (i % 4) % bw;
+				const int y = (i / 4) % bh;
+				blockLinearImage[(by * w + bx) * 16 + i] = image->pixel(bx * 4 + x, by * 4 + y).u;
+			}
+		}
+	}
+}
+
+#endif
+
+
+CudaCompressor::CudaCompressor() : m_bitmapTable(NULL), m_data(NULL), m_result(NULL)
+{
+#if defined HAVE_CUDA
+    // Allocate and upload bitmaps.
+    cudaMalloc((void**) &m_bitmapTable, 992 * sizeof(uint));
+	if (m_bitmapTable != NULL)
+	{
+		cudaMemcpy(m_bitmapTable, s_bitmapTable, 992 * sizeof(uint), cudaMemcpyHostToDevice);
+	}
+
+	// Allocate scratch buffers.
+    cudaMalloc((void**) &m_data, MAX_BLOCKS * 64U);
+    cudaMalloc((void**) &m_result, MAX_BLOCKS * 8U);
+#endif
+}
+
+CudaCompressor::~CudaCompressor()
+{
+#if defined HAVE_CUDA
+	// Free device mem allocations.
+	cudaFree(m_data);
+	cudaFree(m_result);
+	cudaFree(m_bitmapTable);
+#endif
+}
+
+bool CudaCompressor::isValid() const
+{
+#if defined HAVE_CUDA
+	if (cudaGetLastError() != cudaSuccess)
+   	{
+		return false;
+	}
+#endif
+	return m_data != NULL && m_result != NULL && m_bitmapTable != NULL;
+}
+
+// @@ This code is very repetitive and needs to be cleaned up.
+
+void CudaCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode)
+{
+	m_image = image;
+	m_alphaMode = alphaMode;
+}
+
+/// Compress image using CUDA.
+void CudaCompressor::compressDXT1(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
+{
+	nvDebugCheck(cuda::isHardwarePresent());
+#if defined HAVE_CUDA
+
+	// Image size in blocks.
+	const uint w = (m_image->width() + 3) / 4;
+	const uint h = (m_image->height() + 3) / 4;
+
+	uint imageSize = w * h * 16 * sizeof(Color32);
+    uint * blockLinearImage = (uint *) malloc(imageSize);
+	convertToBlockLinear(m_image, blockLinearImage);	// @@ Do this in parallel with the GPU, or in the GPU!
+
+	const uint blockNum = w * h;
+	const uint compressedSize = blockNum * 8;
+
+	clock_t start = clock();
+
+	setupCompressKernel(compressionOptions.colorWeight.ptr());
+	
+	// TODO: Add support for multiple GPUs.
+	uint bn = 0;
+	while(bn != blockNum)
+	{
+		uint count = min(blockNum - bn, MAX_BLOCKS);
+
+	    cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
+
+		// Launch kernel.
+		compressKernelDXT1(count, m_data, m_result, m_bitmapTable);
+
+		// Check for errors.
+		cudaError_t err = cudaGetLastError();
+		if (err != cudaSuccess)
+		{
+			nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
+
+			if (outputOptions.errorHandler != NULL)
+			{
+				outputOptions.errorHandler->error(Error_CudaError);
+			}
+		}
+
+		// Copy result to host, overwrite swizzled image.
+		cudaMemcpy(blockLinearImage, m_result, count * 8, cudaMemcpyDeviceToHost);
+
+		// Output result.
+		if (outputOptions.outputHandler != NULL)
+		{
+			outputOptions.outputHandler->writeData(blockLinearImage, count * 8);
+		}
+
+		bn += count;
+	}
+
+	clock_t end = clock();
+	//printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
+
+	free(blockLinearImage);
+
+#else
+	if (outputOptions.errorHandler != NULL)
+	{
+		outputOptions.errorHandler->error(Error_CudaError);
+	}
+#endif
+}
+
+
+/// Compress image using CUDA.
+void CudaCompressor::compressDXT3(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
+{
+	nvDebugCheck(cuda::isHardwarePresent());
+#if defined HAVE_CUDA
+
+	// Image size in blocks.
+	const uint w = (m_image->width() + 3) / 4;
+	const uint h = (m_image->height() + 3) / 4;
+
+	uint imageSize = w * h * 16 * sizeof(Color32);
+    uint * blockLinearImage = (uint *) malloc(imageSize);
+	convertToBlockLinear(m_image, blockLinearImage);
+
+	const uint blockNum = w * h;
+	const uint compressedSize = blockNum * 8;
+
+	AlphaBlockDXT3 * alphaBlocks = NULL;
+	alphaBlocks = (AlphaBlockDXT3 *)malloc(min(compressedSize, MAX_BLOCKS * 8U));
+
+	setupCompressKernel(compressionOptions.colorWeight.ptr());
+	
+	clock_t start = clock();
+
+	uint bn = 0;
+	while(bn != blockNum)
+	{
+		uint count = min(blockNum - bn, MAX_BLOCKS);
+
+	    cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
+
+		// Launch kernel.
+		if (m_alphaMode == AlphaMode_Transparency)
+		{
+			compressWeightedKernelDXT1(count, m_data, m_result, m_bitmapTable);
+		}
+		else
+		{
+			compressKernelDXT1_Level4(count, m_data, m_result, m_bitmapTable);
+		}
+
+		// Compress alpha in parallel with the GPU.
+		for (uint i = 0; i < count; i++)
+		{
+			ColorBlock rgba(blockLinearImage + (bn + i) * 16);
+			OptimalCompress::compressDXT3A(rgba, alphaBlocks + i);
+		}
+
+		// Check for errors.
+		cudaError_t err = cudaGetLastError();
+		if (err != cudaSuccess)
+		{
+			nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
+
+			if (outputOptions.errorHandler != NULL)
+			{
+				outputOptions.errorHandler->error(Error_CudaError);
+			}
+		}
+
+		// Copy result to host, overwrite swizzled image.
+		cudaMemcpy(blockLinearImage, m_result, count * 8, cudaMemcpyDeviceToHost);
+
+		// Output result.
+		if (outputOptions.outputHandler != NULL)
+		{
+			for (uint i = 0; i < count; i++)
+			{
+				outputOptions.outputHandler->writeData(alphaBlocks + i, 8);
+				outputOptions.outputHandler->writeData(blockLinearImage + i * 2, 8);
+			}
+		}
+
+		bn += count;
+	}
+
+	clock_t end = clock();
+	//printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
+
+	free(alphaBlocks);
+	free(blockLinearImage);
+
+#else
+	if (outputOptions.errorHandler != NULL)
+	{
+		outputOptions.errorHandler->error(Error_CudaError);
+	}
+#endif
+}
+
+
+/// Compress image using CUDA.
+void CudaCompressor::compressDXT5(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
+{
+	nvDebugCheck(cuda::isHardwarePresent());
+#if defined HAVE_CUDA
+
+	// Image size in blocks.
+	const uint w = (m_image->width() + 3) / 4;
+	const uint h = (m_image->height() + 3) / 4;
+
+	uint imageSize = w * h * 16 * sizeof(Color32);
+    uint * blockLinearImage = (uint *) malloc(imageSize);
+	convertToBlockLinear(m_image, blockLinearImage);
+
+	const uint blockNum = w * h;
+	const uint compressedSize = blockNum * 8;
+
+	AlphaBlockDXT5 * alphaBlocks = NULL;
+	alphaBlocks = (AlphaBlockDXT5 *)malloc(min(compressedSize, MAX_BLOCKS * 8U));
+
+	setupCompressKernel(compressionOptions.colorWeight.ptr());
+	
+	clock_t start = clock();
+
+	uint bn = 0;
+	while(bn != blockNum)
+	{
+		uint count = min(blockNum - bn, MAX_BLOCKS);
+
+	    cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
+
+		// Launch kernel.
+		if (m_alphaMode == AlphaMode_Transparency)
+		{
+			compressWeightedKernelDXT1(count, m_data, m_result, m_bitmapTable);
+		}
+		else
+		{
+			compressKernelDXT1_Level4(count, m_data, m_result, m_bitmapTable);
+		}
+
+		// Compress alpha in parallel with the GPU.
+		for (uint i = 0; i < count; i++)
+		{
+			ColorBlock rgba(blockLinearImage + (bn + i) * 16);
+			QuickCompress::compressDXT5A(rgba, alphaBlocks + i);
+		}
+
+		// Check for errors.
+		cudaError_t err = cudaGetLastError();
+		if (err != cudaSuccess)
+		{
+			nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
+
+			if (outputOptions.errorHandler != NULL)
+			{
+				outputOptions.errorHandler->error(Error_CudaError);
+			}
+		}
+
+		// Copy result to host, overwrite swizzled image.
+		cudaMemcpy(blockLinearImage, m_result, count * 8, cudaMemcpyDeviceToHost);
+
+		// Output result.
+		if (outputOptions.outputHandler != NULL)
+		{
+			for (uint i = 0; i < count; i++)
+			{
+				outputOptions.outputHandler->writeData(alphaBlocks + i, 8);
+				outputOptions.outputHandler->writeData(blockLinearImage + i * 2, 8);
+			}
+		}
+
+		bn += count;
+	}
+
+	clock_t end = clock();
+	//printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
+
+	free(alphaBlocks);
+	free(blockLinearImage);
+
+#else
+	if (outputOptions.errorHandler != NULL)
+	{
+		outputOptions.errorHandler->error(Error_CudaError);
+	}
+#endif
+}
+
+
--- a/src/nvtt/cuda/CudaCompressDXT.h
+++ b/src/nvtt/cuda/CudaCompressDXT.h
@ -1,40 +1,61 @@
-// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
-// 
-// Permission is hereby granted, free of charge, to any person
-// obtaining a copy of this software and associated documentation
-// files (the "Software"), to deal in the Software without
-// restriction, including without limitation the rights to use,
-// copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following
-// conditions:
-// 
-// The above copyright notice and this permission notice shall be
-// included in all copies or substantial portions of the Software.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-// OTHER DEALINGS IN THE SOFTWARE.
-
-#ifndef NV_TT_COMPRESSORRGB_H
-#define NV_TT_COMPRESSORRGB_H
-
-#include "nvtt.h"
-#include "Compressor.h"
-
-namespace nv
-{
-    struct PixelFormatConverter : public CompressorInterface
-	{
-		virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, const void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
-	};
-
-} // nv namespace
-
-
-#endif // NV_TT_COMPRESSORRGB_H
+// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
+// 
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
+// 
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+
+#ifndef NV_TT_CUDACOMPRESSDXT_H
+#define NV_TT_CUDACOMPRESSDXT_H
+
+#include <nvimage/nvimage.h>
+#include <nvtt/nvtt.h>
+
+namespace nv
+{
+	class Image;
+
+	class CudaCompressor
+	{
+	public:
+		CudaCompressor();
+		~CudaCompressor();
+
+		bool isValid() const;
+
+		void setImage(const Image * image, nvtt::AlphaMode alphaMode);
+
+		void compressDXT1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
+		void compressDXT3(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
+		void compressDXT5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
+
+	private:
+
+		uint * m_bitmapTable;
+		uint * m_data;
+		uint * m_result;
+		
+		const Image * m_image;
+		nvtt::AlphaMode m_alphaMode;
+	};
+
+} // nv namespace
+
+
+#endif // NV_TT_CUDAUTILS_H
--- a/src/nvtt/cuda/CudaCompressorDXT.cpp
+++ b/src/nvtt/cuda/CudaCompressorDXT.cpp
@ -1,649 +0,0 @@
-// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
-// 
-// Permission is hereby granted, free of charge, to any person
-// obtaining a copy of this software and associated documentation
-// files (the "Software"), to deal in the Software without
-// restriction, including without limitation the rights to use,
-// copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following
-// conditions:
-// 
-// The above copyright notice and this permission notice shall be
-// included in all copies or substantial portions of the Software.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-// OTHER DEALINGS IN THE SOFTWARE.
-
-#include <nvcore/Debug.h>
-#include <nvcore/Containers.h>
-#include <nvmath/Color.h>
-#include <nvimage/Image.h>
-#include <nvimage/ColorBlock.h>
-#include <nvimage/BlockDXT.h>
-#include <nvtt/CompressionOptions.h>
-#include <nvtt/OutputOptions.h>
-#include <nvtt/QuickCompressDXT.h>
-#include <nvtt/OptimalCompressDXT.h>
-
-#include "CudaCompressorDXT.h"
-#include "CudaUtils.h"
-
-
-#if defined HAVE_CUDA
-#include <cuda_runtime_api.h>
-#endif
-
-#include <time.h>
-#include <stdio.h>
-
-using namespace nv;
-using namespace nvtt;
-
-#if defined HAVE_CUDA
-
-#define MAX_BLOCKS 8192U // 32768, 65535
-
-
-extern "C" void setupCompressKernel(const float weights[3]);
-extern "C" void bindTextureToArray(cudaArray * d_data);
-
-extern "C" void compressKernelDXT1(uint firstBlock, uint blockNum, uint w, uint * d_result, uint * d_bitmaps);
-extern "C" void compressKernelDXT1_Level4(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
-extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
-extern "C" void compressKernelDXT3(uint firstBlock, uint blockNum, uint w, uint * d_result, uint * d_bitmaps);
-//extern "C" void compressNormalKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
-//extern "C" void compressKernelCTX1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
-
-
-#include "BitmapTable.h"
-
-/*
-// Convert linear image to block linear.
-static void convertToBlockLinear(const Image * image, uint * blockLinearImage)
-{
-	const uint w = (image->width() + 3) / 4;
-	const uint h = (image->height() + 3) / 4;
-
-	for(uint by = 0; by < h; by++) {
-		for(uint bx = 0; bx < w; bx++) {
-			const uint bw = min(image->width() - bx * 4, 4U);
-			const uint bh = min(image->height() - by * 4, 4U);
-
-			for (uint i = 0; i < 16; i++) {
-				const int x = (i % 4) % bw;
-				const int y = (i / 4) % bh;
-				blockLinearImage[(by * w + bx) * 16 + i] = image->pixel(bx * 4 + x, by * 4 + y).u;
-			}
-		}
-	}
-}
-*/
-
-#endif
-
-
-CudaContext::CudaContext() : 
-	bitmapTable(NULL), 
-	bitmapTableCTX(NULL), 
-	data(NULL), 
-	result(NULL)
-{
-#if defined HAVE_CUDA
-    // Allocate and upload bitmaps.
-    cudaMalloc((void**) &bitmapTable, 992 * sizeof(uint));
-	if (bitmapTable != NULL)
-	{
-		cudaMemcpy(bitmapTable, s_bitmapTable, 992 * sizeof(uint), cudaMemcpyHostToDevice);
-	}
-
-    cudaMalloc((void**) &bitmapTableCTX, 704 * sizeof(uint));
-	if (bitmapTableCTX != NULL)
-	{
-		cudaMemcpy(bitmapTableCTX, s_bitmapTableCTX, 704 * sizeof(uint), cudaMemcpyHostToDevice);
-	}
-
-	// Allocate scratch buffers.
-    cudaMalloc((void**) &data, MAX_BLOCKS * 64U);
-    cudaMalloc((void**) &result, MAX_BLOCKS * 8U);
-#endif
-}
-
-CudaContext::~CudaContext()
-{
-#if defined HAVE_CUDA
-	// Free device mem allocations.
-	cudaFree(bitmapTableCTX);
-	cudaFree(bitmapTable);
-	cudaFree(data);
-	cudaFree(result);
-#endif
-}
-
-bool CudaContext::isValid() const
-{
-#if defined HAVE_CUDA
-	cudaError_t err = cudaGetLastError();
-	if (err != cudaSuccess)
-   	{
-		nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(err));
-		return false;
-	}
-#endif
-	return bitmapTable != NULL && bitmapTableCTX != NULL && data != NULL && result != NULL;
-}
-
-
-#if defined HAVE_CUDA
-
-CudaCompressor::CudaCompressor(CudaContext & ctx) : m_ctx(ctx)
-{
-
-}
-
-void CudaCompressor::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, const void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
-{
-	nvDebugCheck(cuda::isHardwarePresent());
-
-#if defined HAVE_CUDA
-
-	// Allocate image as a cuda array.
-	cudaArray * d_image;
-	if (inputFormat == nvtt::InputFormat_BGRA_8UB)
-	{
-        cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(8, 8, 8, 8, cudaChannelFormatKindUnsigned);
-	    cudaMallocArray(&d_image, &channelDesc, w, h);
-
-		const int imageSize = w * h * sizeof(uint);
-		cudaMemcpyToArray(d_image, 0, 0, data, imageSize, cudaMemcpyHostToDevice);
-	}
-	else
-	{
-#pragma message(NV_FILE_LINE "FIXME: Floating point textures not really supported by CUDA compressors.")
-		cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 32, 32, 32, cudaChannelFormatKindFloat);
-	    cudaMallocArray(&d_image, &channelDesc, w, h);
-
-		const int imageSize = w * h * sizeof(uint);
-		cudaMemcpyToArray(d_image, 0, 0, data, imageSize, cudaMemcpyHostToDevice);
-	}
-
-	// Image size in blocks.
-	const uint bw = (w + 3) / 4;
-	const uint bh = (h + 3) / 4;
-	const uint bs = blockSize();
-	const uint blockNum = bw * bh;
-	const uint compressedSize = blockNum * bs;
-
-	void * h_result = malloc(min(blockNum, MAX_BLOCKS) * bs);
-
-	setup(d_image, compressionOptions);
-
-	// Timer timer;
-	// timer.start();
-
-	uint bn = 0;
-	while(bn != blockNum)
-	{
-		uint count = min(blockNum - bn, MAX_BLOCKS);
-
-		compressBlocks(bn, count, w, h, alphaMode, compressionOptions, h_result);
-
-		// Check for errors.
-		cudaError_t err = cudaGetLastError();
-		if (err != cudaSuccess)
-		{
-			//nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
-			if (outputOptions.errorHandler != NULL)
-			{
-				outputOptions.errorHandler->error(Error_CudaError);
-			}
-		}
-
-		// Output result.
-		if (outputOptions.outputHandler != NULL)
-		{
-			outputOptions.outputHandler->writeData(h_result, count * bs);
-		}
-
-		bn += count;
-	}
-
-	//timer.stop();
-	//printf("\rCUDA time taken: %.3f seconds\n", timer.elapsed() / CLOCKS_PER_SEC);
-
-	free(h_result);
-	cudaFreeArray(d_image);
-
-#else
-	if (outputOptions.errorHandler != NULL)
-	{
-		outputOptions.errorHandler->error(Error_CudaError);
-	}
-#endif
-
-}
-
-
-void CudaCompressorDXT1::setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions)
-{
-	setupCompressKernel(compressionOptions.colorWeight.ptr());
-	bindTextureToArray(image);
-}
-
-void CudaCompressorDXT1::compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
-{
-	// Launch kernel.
-	compressKernelDXT1(first, count, w, m_ctx.result, m_ctx.bitmapTable);
-
-	// Copy result to host.
-	cudaMemcpy(output, m_ctx.result, count * 8, cudaMemcpyDeviceToHost);
-}
-
-
-void CudaCompressorDXT3::setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions)
-{
-	setupCompressKernel(compressionOptions.colorWeight.ptr());
-	bindTextureToArray(image);
-}
-
-void CudaCompressorDXT3::compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
-{
-	// Launch kernel.
-	compressKernelDXT3(first, count, w, m_ctx.result, m_ctx.bitmapTable);
-
-	// Copy result to host.
-	cudaMemcpy(output, m_ctx.result, count * 16, cudaMemcpyDeviceToHost);
-}
-
-
-void CudaCompressorDXT5::setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions)
-{
-	setupCompressKernel(compressionOptions.colorWeight.ptr());
-	bindTextureToArray(image);
-}
-
-void CudaCompressorDXT5::compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
-{
-	/*// Launch kernel.
-	compressKernelDXT5(first, count, w, m_ctx.result, m_ctx.bitmapTable);
-
-	// Copy result to host.
-	cudaMemcpy(output, m_ctx.result, count * 16, cudaMemcpyDeviceToHost);*/
-
-	// Launch kernel.
-	if (alphaMode == AlphaMode_Transparency)
-	{
-	//	compressWeightedKernelDXT1(first, count, w, m_ctx.result, m_ctx.bitmapTable);
-	}
-	else
-	{
-	//	compressKernelDXT1_Level4(first, count, w, m_ctx.result, m_ctx.bitmapTable);
-	}
-
-	// Compress alpha in parallel with the GPU.
-	for (uint i = 0; i < count; i++)
-	{
-		//ColorBlock rgba(blockLinearImage + (first + i) * 16);
-		//OptimalCompress::compressDXT3A(rgba, alphaBlocks + i);
-	}
-
-	// Copy result to host.
-	cudaMemcpy(output, m_ctx.result, count * 8, cudaMemcpyDeviceToHost);
-
-	// @@ Interleave color and alpha blocks.
-
-}
-
-
-
-
-
-
-// @@ This code is very repetitive and needs to be cleaned up.
-
-#if 0
-
-/// Compress image using CUDA.
-void CudaCompressor::compressDXT3(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
-{
-	nvDebugCheck(cuda::isHardwarePresent());
-#if defined HAVE_CUDA
-
-	// Image size in blocks.
-	const uint w = (m_image->width() + 3) / 4;
-	const uint h = (m_image->height() + 3) / 4;
-
-	uint imageSize = w * h * 16 * sizeof(Color32);
-    uint * blockLinearImage = (uint *) malloc(imageSize);
-	convertToBlockLinear(m_image, blockLinearImage);
-
-	const uint blockNum = w * h;
-	const uint compressedSize = blockNum * 8;
-
-	AlphaBlockDXT3 * alphaBlocks = NULL;
-	alphaBlocks = (AlphaBlockDXT3 *)malloc(min(compressedSize, MAX_BLOCKS * 8U));
-
-	setupCompressKernel(compressionOptions.colorWeight.ptr());
-	
-	clock_t start = clock();
-
-	uint bn = 0;
-	while(bn != blockNum)
-	{
-		uint count = min(blockNum - bn, MAX_BLOCKS);
-
-	    cudaMemcpy(m_ctx.data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
-
-		// Launch kernel.
-		if (m_alphaMode == AlphaMode_Transparency)
-		{
-			compressWeightedKernelDXT1(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTable);
-		}
-		else
-		{
-			compressKernelDXT1_Level4(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTable);
-		}
-
-		// Compress alpha in parallel with the GPU.
-		for (uint i = 0; i < count; i++)
-		{
-			ColorBlock rgba(blockLinearImage + (bn + i) * 16);
-			OptimalCompress::compressDXT3A(rgba, alphaBlocks + i);
-		}
-
-		// Check for errors.
-		cudaError_t err = cudaGetLastError();
-		if (err != cudaSuccess)
-		{
-			nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
-
-			if (outputOptions.errorHandler != NULL)
-			{
-				outputOptions.errorHandler->error(Error_CudaError);
-			}
-		}
-
-		// Copy result to host, overwrite swizzled image.
-		cudaMemcpy(blockLinearImage, m_ctx.result, count * 8, cudaMemcpyDeviceToHost);
-
-		// Output result.
-		if (outputOptions.outputHandler != NULL)
-		{
-			for (uint i = 0; i < count; i++)
-			{
-				outputOptions.outputHandler->writeData(alphaBlocks + i, 8);
-				outputOptions.outputHandler->writeData(blockLinearImage + i * 2, 8);
-			}
-		}
-
-		bn += count;
-	}
-
-	clock_t end = clock();
-	//printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
-
-	free(alphaBlocks);
-	free(blockLinearImage);
-
-#else
-	if (outputOptions.errorHandler != NULL)
-	{
-		outputOptions.errorHandler->error(Error_CudaError);
-	}
-#endif
-}
-
-
-/// Compress image using CUDA.
-void CudaCompressor::compressDXT5(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
-{
-	nvDebugCheck(cuda::isHardwarePresent());
-#if defined HAVE_CUDA
-
-	// Image size in blocks.
-	const uint w = (m_image->width() + 3) / 4;
-	const uint h = (m_image->height() + 3) / 4;
-
-	uint imageSize = w * h * 16 * sizeof(Color32);
-    uint * blockLinearImage = (uint *) malloc(imageSize);
-	convertToBlockLinear(m_image, blockLinearImage);
-
-	const uint blockNum = w * h;
-	const uint compressedSize = blockNum * 8;
-
-	AlphaBlockDXT5 * alphaBlocks = NULL;
-	alphaBlocks = (AlphaBlockDXT5 *)malloc(min(compressedSize, MAX_BLOCKS * 8U));
-
-	setupCompressKernel(compressionOptions.colorWeight.ptr());
-	
-	clock_t start = clock();
-
-	uint bn = 0;
-	while(bn != blockNum)
-	{
-		uint count = min(blockNum - bn, MAX_BLOCKS);
-
-	    cudaMemcpy(m_ctx.data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
-
-		// Launch kernel.
-		if (m_alphaMode == AlphaMode_Transparency)
-		{
-			compressWeightedKernelDXT1(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTable);
-		}
-		else
-		{
-			compressKernelDXT1_Level4(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTable);
-		}
-
-		// Compress alpha in parallel with the GPU.
-		for (uint i = 0; i < count; i++)
-		{
-			ColorBlock rgba(blockLinearImage + (bn + i) * 16);
-			QuickCompress::compressDXT5A(rgba, alphaBlocks + i);
-		}
-
-		// Check for errors.
-		cudaError_t err = cudaGetLastError();
-		if (err != cudaSuccess)
-		{
-			nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
-
-			if (outputOptions.errorHandler != NULL)
-			{
-				outputOptions.errorHandler->error(Error_CudaError);
-			}
-		}
-
-		// Copy result to host, overwrite swizzled image.
-		cudaMemcpy(blockLinearImage, m_ctx.result, count * 8, cudaMemcpyDeviceToHost);
-
-		// Output result.
-		if (outputOptions.outputHandler != NULL)
-		{
-			for (uint i = 0; i < count; i++)
-			{
-				outputOptions.outputHandler->writeData(alphaBlocks + i, 8);
-				outputOptions.outputHandler->writeData(blockLinearImage + i * 2, 8);
-			}
-		}
-
-		bn += count;
-	}
-
-	clock_t end = clock();
-	//printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
-
-	free(alphaBlocks);
-	free(blockLinearImage);
-
-#else
-	if (outputOptions.errorHandler != NULL)
-	{
-		outputOptions.errorHandler->error(Error_CudaError);
-	}
-#endif
-}
-
-
-void CudaCompressor::compressDXT1n(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
-{
-	nvDebugCheck(cuda::isHardwarePresent());
-#if defined HAVE_CUDA
-
-	// Image size in blocks.
-	const uint w = (m_image->width() + 3) / 4;
-	const uint h = (m_image->height() + 3) / 4;
-
-	uint imageSize = w * h * 16 * sizeof(Color32);
-    uint * blockLinearImage = (uint *) malloc(imageSize);
-	convertToBlockLinear(m_image, blockLinearImage);	// @@ Do this in parallel with the GPU, or in the GPU!
-
-	const uint blockNum = w * h;
-	const uint compressedSize = blockNum * 8;
-
-	clock_t start = clock();
-
-	setupCompressKernel(compressionOptions.colorWeight.ptr());
-	
-	// TODO: Add support for multiple GPUs.
-	uint bn = 0;
-	while(bn != blockNum)
-	{
-		uint count = min(blockNum - bn, MAX_BLOCKS);
-
-	    cudaMemcpy(m_ctx.data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
-
-		// Launch kernel.
-		compressNormalKernelDXT1(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTable);
-
-		// Check for errors.
-		cudaError_t err = cudaGetLastError();
-		if (err != cudaSuccess)
-		{
-			nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
-
-			if (outputOptions.errorHandler != NULL)
-			{
-				outputOptions.errorHandler->error(Error_CudaError);
-			}
-		}
-
-		// Copy result to host, overwrite swizzled image.
-		cudaMemcpy(blockLinearImage, m_ctx.result, count * 8, cudaMemcpyDeviceToHost);
-
-		// Output result.
-		if (outputOptions.outputHandler != NULL)
-		{
-			outputOptions.outputHandler->writeData(blockLinearImage, count * 8);
-		}
-
-		bn += count;
-	}
-
-	clock_t end = clock();
-	//printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
-
-	free(blockLinearImage);
-
-#else
-	if (outputOptions.errorHandler != NULL)
-	{
-		outputOptions.errorHandler->error(Error_CudaError);
-	}
-#endif
-}
-
-
-void CudaCompressor::compressCTX1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
-{
-	nvDebugCheck(cuda::isHardwarePresent());
-#if defined HAVE_CUDA
-
-	// Image size in blocks.
-	const uint w = (m_image->width() + 3) / 4;
-	const uint h = (m_image->height() + 3) / 4;
-
-	uint imageSize = w * h * 16 * sizeof(Color32);
-    uint * blockLinearImage = (uint *) malloc(imageSize);
-	convertToBlockLinear(m_image, blockLinearImage);	// @@ Do this in parallel with the GPU, or in the GPU!
-
-	const uint blockNum = w * h;
-	const uint compressedSize = blockNum * 8;
-
-	clock_t start = clock();
-
-	setupCompressKernel(compressionOptions.colorWeight.ptr());
-	
-	// TODO: Add support for multiple GPUs.
-	uint bn = 0;
-	while(bn != blockNum)
-	{
-		uint count = min(blockNum - bn, MAX_BLOCKS);
-
-	    cudaMemcpy(m_ctx.data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
-
-		// Launch kernel.
-		compressKernelCTX1(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTableCTX);
-
-		// Check for errors.
-		cudaError_t err = cudaGetLastError();
-		if (err != cudaSuccess)
-		{
-			nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
-
-			if (outputOptions.errorHandler != NULL)
-			{
-				outputOptions.errorHandler->error(Error_CudaError);
-			}
-		}
-
-		// Copy result to host, overwrite swizzled image.
-		cudaMemcpy(blockLinearImage, m_ctx.result, count * 8, cudaMemcpyDeviceToHost);
-
-		// Output result.
-		if (outputOptions.outputHandler != NULL)
-		{
-			outputOptions.outputHandler->writeData(blockLinearImage, count * 8);
-		}
-
-		bn += count;
-	}
-
-	clock_t end = clock();
-	//printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
-
-	free(blockLinearImage);
-
-#else
-	if (outputOptions.errorHandler != NULL)
-	{
-		outputOptions.errorHandler->error(Error_CudaError);
-	}
-#endif
-}
-
-
-void CudaCompressor::compressDXT5n(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
-{
-	nvDebugCheck(cuda::isHardwarePresent());
-#if defined HAVE_CUDA
-
-	// @@ TODO
-
-#else
-	if (outputOptions.errorHandler != NULL)
-	{
-		outputOptions.errorHandler->error(Error_CudaError);
-	}
-#endif
-}
-
-#endif // 0
-
-#endif // defined HAVE_CUDA
--- a/src/nvtt/cuda/CudaCompressorDXT.h
+++ b/src/nvtt/cuda/CudaCompressorDXT.h
@ -1,112 +0,0 @@
-// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
-// 
-// Permission is hereby granted, free of charge, to any person
-// obtaining a copy of this software and associated documentation
-// files (the "Software"), to deal in the Software without
-// restriction, including without limitation the rights to use,
-// copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following
-// conditions:
-// 
-// The above copyright notice and this permission notice shall be
-// included in all copies or substantial portions of the Software.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-// OTHER DEALINGS IN THE SOFTWARE.
-
-#ifndef NV_TT_CUDACOMPRESSORDXT_H
-#define NV_TT_CUDACOMPRESSORDXT_H
-
-#include "nvtt/nvtt.h"
-#include "../Compressor.h" // CompressorInterface
-
-struct cudaArray;
-
-namespace nv
-{
-	class CudaContext
-	{
-	public:
-		CudaContext();
-		~CudaContext();
-
-		bool isValid() const;
-
-	public:
-		// Device pointers.
-		uint * bitmapTable;
-		uint * bitmapTableCTX;
-		uint * data;
-		uint * result;
-	};
-
-#if defined HAVE_CUDA
-
-	struct CudaCompressor : public CompressorInterface
-	{
-		CudaCompressor(CudaContext & ctx);
-
-		virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, const void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
-
-		virtual void setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions) = 0;
-		virtual void compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0;
-		virtual uint blockSize() const = 0;
-
-	protected:
-		CudaContext & m_ctx;
-	};
-
-	struct CudaCompressorDXT1 : public CudaCompressor
-	{
-		CudaCompressorDXT1(CudaContext & ctx) : CudaCompressor(ctx) {}
-
-		virtual void setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions);
-		virtual void compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
-		virtual uint blockSize() const { return 8; };
-	};
-
-	/*struct CudaCompressorDXT1n : public CudaCompressor
-	{
-		virtual void setup(const CompressionOptions::Private & compressionOptions);
-		virtual void compressBlocks(uint blockCount, const void * input, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0;
-		virtual uint blockSize() const { return 8; };
-	};*/
-
-	struct CudaCompressorDXT3 : public CudaCompressor
-	{
-		CudaCompressorDXT3(CudaContext & ctx) : CudaCompressor(ctx) {}
-
-		virtual void setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions);
-		virtual void compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
-		virtual uint blockSize() const { return 16; };
-	};
-
-	struct CudaCompressorDXT5 : public CudaCompressor
-	{
-		CudaCompressorDXT5(CudaContext & ctx) : CudaCompressor(ctx) {}
-
-		virtual void setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions);
-		virtual void compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
-		virtual uint blockSize() const { return 16; };
-	};
-
-	/*struct CudaCompressorCXT1 : public CudaCompressor
-	{
-		virtual void setup(const CompressionOptions::Private & compressionOptions);
-		virtual void compressBlocks(uint blockCount, const void * input, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0;
-		virtual uint blockSize() const { return 8; };
-	};*/
-
-#endif // defined HAVE_CUDA
-
-} // nv namespace
-
-
-#endif // NV_TT_CUDAUTILS_H
--- a/src/nvtt/cuda/CudaMath.h
+++ b/src/nvtt/cuda/CudaMath.h
@ -26,6 +26,7 @@
 #ifndef CUDAMATH_H
 #define CUDAMATH_H

+#include <float.h>


 inline __device__ __host__ float3 operator *(float3 a, float3 b)
@ -86,69 +87,6 @@ inline __device__ __host__ bool operator ==(float3 a, float3 b)
 	return a.x == b.x && a.y == b.y && a.z == b.z;
 }

-
-// float2 operators
-inline __device__ __host__ float2 operator *(float2 a, float2 b)
-{
-    return make_float2(a.x*b.x, a.y*b.y);
-}
-
-inline __device__ __host__ float2 operator *(float f, float2 v)
-{
-    return make_float2(v.x*f, v.y*f);
-}
-
-inline __device__ __host__ float2 operator *(float2 v, float f)
-{
-    return make_float2(v.x*f, v.y*f);
-}
-
-inline __device__ __host__ float2 operator +(float2 a, float2 b)
-{
-    return make_float2(a.x+b.x, a.y+b.y);
-}
-
-inline __device__ __host__ void operator +=(float2 & b, float2 a)
-{
-    b.x += a.x;
-    b.y += a.y;
-}
-
-inline __device__ __host__ float2 operator -(float2 a, float2 b)
-{
-    return make_float2(a.x-b.x, a.y-b.y);
-}
-
-inline __device__ __host__ void operator -=(float2 & b, float2 a)
-{
-    b.x -= a.x;
-    b.y -= a.y;
-}
-
-inline __device__ __host__ float2 operator /(float2 v, float f)
-{
-    float inv = 1.0f / f;
-    return v * inv;
-}
-
-inline __device__ __host__ void operator /=(float2 & b, float f)
-{
-    float inv = 1.0f / f;
-    b.x *= inv;
-	b.y *= inv;
-}
-
-inline __device__ __host__ bool operator ==(float2 a, float2 b)
-{
-	return a.x == b.x && a.y == b.y;
-}
-
-
-inline __device__ __host__ float dot(float2 a, float2 b)
-{
-    return a.x * b.x + a.y * b.y;
-}
-
 inline __device__ __host__ float dot(float3 a, float3 b)
 {
    return a.x * b.x + a.y * b.y + a.z * b.z;
@ -181,16 +119,6 @@ inline __device__ __host__ float3 normalize(float3 v)
    return make_float3(v.x * len, v.y * len, v.z * len);
 }

-inline __device__ __host__ float3 lerp(float3 a, float3 b, float t)
-{
-    const float s = 1.0f - t;
-    return make_float3(s * a.x + t * b.x, s * a.y + t * b.y, s * a.z + t * b.z);
-}
-
-inline __device__ __host__ float lengthSquared(float3 a)
-{
-    return dot(a, a);
-}



@ -199,8 +127,21 @@ inline __device__ __host__ float lengthSquared(float3 a)
 inline __device__ __host__ float3 firstEigenVector( float matrix[6] )
 {
 	// 8 iterations seems to be more than enough.
+
+	float3 row0 = make_float3(matrix[0], matrix[1], matrix[2]);
+	float3 row1 = make_float3(matrix[1], matrix[3], matrix[4]);
+	float3 row2 = make_float3(matrix[2], matrix[4], matrix[5]);

-	float3 v = make_float3(1.0f, 1.0f, 1.0f);
+	float r0 = dot(row0, row0);
+	float r1 = dot(row1, row1);
+	float r2 = dot(row2, row2);
+
+	float3 v;
+	if (r0 > r1 && r0 > r2) v = row0;
+	else if (r1 > r2) v = row1;
+	else v = row2;
+
+	//float3 v = make_float3(1.0f, 1.0f, 1.0f);
 	for(int i = 0; i < 8; i++) {
 		float x = v.x * matrix[0] + v.y * matrix[1] + v.z * matrix[2];
 		float y = v.x * matrix[1] + v.y * matrix[3] + v.z * matrix[4];
@ -315,89 +256,5 @@ inline __device__ float3 bestFitLine(const float3 * colors, float3 color_sum, fl
 	return firstEigenVector(covariance);
 }

-// @@ For 2D this may not be the most efficient method. It's a quadratic equation, right?
-inline __device__ __host__ float2 firstEigenVector2D( float matrix[3] )
-{
-	// @@ 8 iterations is probably more than enough.
-
-	float2 v = make_float2(1.0f, 1.0f);
-	for(int i = 0; i < 8; i++) {
-		float x = v.x * matrix[0] + v.y * matrix[1];
-		float y = v.x * matrix[1] + v.y * matrix[2];
-		float m = max(x, y);        
-		float iv = 1.0f / m;
-		if (m == 0.0f) iv = 0.0f;
-		v = make_float2(x*iv, y*iv);
-	}
-
-	return v;
-}
-
-inline __device__ void colorSums(const float2 * colors, float2 * sums)
-{
-#if __DEVICE_EMULATION__
-	float2 color_sum = make_float2(0.0f, 0.0f);
-	for (int i = 0; i < 16; i++)
-	{
-		color_sum += colors[i];
-	}
-
-	for (int i = 0; i < 16; i++)
-	{
-		sums[i] = color_sum;
-	}
-#else
-
-	const int idx = threadIdx.x;
-
-	sums[idx] = colors[idx];
-	sums[idx] += sums[idx^8];
-	sums[idx] += sums[idx^4];
-	sums[idx] += sums[idx^2];
-	sums[idx] += sums[idx^1];
-
-#endif
-}
-
-inline __device__ float2 bestFitLine(const float2 * colors, float2 color_sum)
-{
-	// Compute covariance matrix of the given colors.
-#if __DEVICE_EMULATION__
-	float covariance[3] = {0, 0, 0};
-	for (int i = 0; i < 16; i++)
-	{
-		float2 a = (colors[i] - color_sum * (1.0f / 16.0f));
-		covariance[0] += a.x * a.x;
-		covariance[1] += a.x * a.y;
-		covariance[2] += a.y * a.y;
-	}
-#else
-
-	const int idx = threadIdx.x;
-
-	float2 diff = (colors[idx] - color_sum * (1.0f / 16.0f));
-
-	__shared__ float covariance[16*3];
-
-	covariance[3 * idx + 0] = diff.x * diff.x;
-	covariance[3 * idx + 1] = diff.x * diff.y;
-	covariance[3 * idx + 2] = diff.y * diff.y;
-
-	for(int d = 8; d > 0; d >>= 1)
-	{
-		if (idx < d)
-		{
-			covariance[3 * idx + 0] += covariance[3 * (idx+d) + 0];
-			covariance[3 * idx + 1] += covariance[3 * (idx+d) + 1];
-			covariance[3 * idx + 2] += covariance[3 * (idx+d) + 2];
-		}
-	}
-
-#endif
-
-	// Compute first eigen vector.
-	return firstEigenVector2D(covariance);
-}
-

 #endif // CUDAMATH_H
--- a/src/nvtt/cuda/CudaUtils.cpp
+++ b/src/nvtt/cuda/CudaUtils.cpp
@ -1,239 +1,300 @@
-// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
-// 
-// Permission is hereby granted, free of charge, to any person
-// obtaining a copy of this software and associated documentation
-// files (the "Software"), to deal in the Software without
-// restriction, including without limitation the rights to use,
-// copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following
-// conditions:
-// 
-// The above copyright notice and this permission notice shall be
-// included in all copies or substantial portions of the Software.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-// OTHER DEALINGS IN THE SOFTWARE.
-
-#include <nvcore/Debug.h>
-#include <nvcore/Library.h>
-#include "CudaUtils.h"
-
-#if defined HAVE_CUDA
-#include <cuda.h>
-#include <cuda_runtime_api.h>
-#endif
-
-using namespace nv;
-using namespace cuda;
-
-/* @@ Move this to win32 utils or somewhere else.
-#if NV_OS_WIN32
-
-#define WINDOWS_LEAN_AND_MEAN
-#include <windows.h>
-
-static bool isWindowsVista()
-{
-	OSVERSIONINFO osvi;
-	osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
-
-	::GetVersionEx(&osvi);
-	return osvi.dwMajorVersion >= 6;
-}
-
-
-typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
-
-static bool isWow32()
-{
-	LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process");
-
-	BOOL bIsWow64 = FALSE;
- 
-	if (NULL != fnIsWow64Process)
-	{
-		if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64))
-		{
-			// Assume 32 bits.
-			return true;
-		}
-	}
-
-	return !bIsWow64;
-}
-
-#endif
-*/
-
-
-static bool isCudaDriverAvailable(int version)
-{
-#if defined HAVE_CUDA
-#if NV_OS_WIN32
-	Library nvcuda("nvcuda.dll");
-#else
-	Library nvcuda(NV_LIBRARY_NAME(cuda));
-#endif
-	
-	if (!nvcuda.isValid())
-	{
-		nvDebug("*** CUDA driver not found.\n");
-		return false;
-	}
-	
-	if (version >= 2000)
-	{
-		void * address = nvcuda.bindSymbol("cuStreamCreate");
-		if (address == NULL) {
-			nvDebug("*** CUDA driver version < 2.0.\n");
-			return false;
-		}
-	}
-
-	if (version >= 2010)
-	{
-		void * address = nvcuda.bindSymbol("cuModuleLoadDataEx");
-		if (address == NULL) {
-			nvDebug("*** CUDA driver version < 2.1.\n");
-			return false;
-		}
-	}
-	
-	if (version >= 2020)
-	{
-		typedef CUresult (CUDAAPI * PFCU_DRIVERGETVERSION)(int * version);
-
-		PFCU_DRIVERGETVERSION driverGetVersion = (PFCU_DRIVERGETVERSION)nvcuda.bindSymbol("cuDriverGetVersion");
-		if (driverGetVersion == NULL) {
-			nvDebug("*** CUDA driver version < 2.2.\n");
-			return false;
-		}
-
-		int driverVersion;
-		CUresult err = driverGetVersion(&driverVersion);
-		if (err != CUDA_SUCCESS) {
-			nvDebug("*** Error querying driver version: '%s'.\n", cudaGetErrorString((cudaError_t)err));
-			return false;
-		}
-
-		return driverVersion >= version;
-	}
-#endif // HAVE_CUDA
-
-	return true;
-}
-
-
-/// Determine if CUDA is available.
-bool nv::cuda::isHardwarePresent()
-{
-#if defined HAVE_CUDA
-	// Make sure that CUDA driver matches CUDA runtime.
-	if (!isCudaDriverAvailable(CUDART_VERSION))
-	{
-		nvDebug("CUDA driver not available for CUDA runtime %d\n", CUDART_VERSION);
-		return false;
-	}
-
-	int count = deviceCount();
-	if (count == 1)
-	{
-		// Make sure it's not an emulation device.
-		cudaDeviceProp deviceProp;
-		cudaGetDeviceProperties(&deviceProp, 0);
-
-		// deviceProp.name != Device Emulation (CPU)
-		if (deviceProp.major == -1 || deviceProp.minor == -1)
-		{
-			return false;
-		}
-	}
-
-	// @@ Make sure that warp size == 32
-
-	return count > 0;
-#else
-	return false;
-#endif
-}
-
-/// Get number of CUDA enabled devices.
-int nv::cuda::deviceCount()
-{
-#if defined HAVE_CUDA
-	int gpuCount = 0;
-
-	cudaError_t result = cudaGetDeviceCount(&gpuCount);
-
-	if (result == cudaSuccess)
-	{
-		return gpuCount;
-	}
-#endif
-	return 0;
-}
-
-int nv::cuda::getFastestDevice()
-{
-	int max_gflops_device = 0;
-#if defined HAVE_CUDA
-	int max_gflops = 0;
-
-	const int device_count = deviceCount();
-	int current_device = 0;
-	while (current_device < device_count)
-	{
-		cudaDeviceProp device_properties;
-		cudaGetDeviceProperties(&device_properties, current_device);
-		int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
-
-		if (device_properties.major != -1 && device_properties.minor != -1)
-		{
-			if( gflops > max_gflops )
-			{
-				max_gflops = gflops;
-				max_gflops_device = current_device;
-			}
-		}
-		
-		current_device++;
-	}
-#endif
-	return max_gflops_device;
-}
-
-
-/// Activate the given devices.
-bool nv::cuda::setDevice(int i)
-{
-	nvCheck(i < deviceCount());
-#if defined HAVE_CUDA
-	cudaError_t result = cudaSetDevice(i);
-
-	if (result != cudaSuccess) {
-		nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result));
-	}
-
-	return result == cudaSuccess;
-#else
-	return false;
-#endif
-}
-
-void nv::cuda::exit()
-{
-#if defined HAVE_CUDA
-	cudaError_t result = cudaThreadExit();
-
-	if (result != cudaSuccess) {
-		nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result));
-	}
-#endif
-}
+// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
+// 
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
+// 
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+
+#include <nvcore/Debug.h>
+#include <nvcore/Library.h>
+#include "CudaUtils.h"
+
+#if defined HAVE_CUDA
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#endif
+
+using namespace nv;
+using namespace cuda;
+
+/* @@ Move this to win32 utils or somewhere else.
+#if NV_OS_WIN32
+
+#define WINDOWS_LEAN_AND_MEAN
+#include <windows.h>
+
+static bool isWindowsVista()
+{
+OSVERSIONINFO osvi;
+osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
+
+::GetVersionEx(&osvi);
+return osvi.dwMajorVersion >= 6;
+}
+
+
+typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
+
+static bool isWow32()
+{
+LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process");
+
+BOOL bIsWow64 = FALSE;
+
+if (NULL != fnIsWow64Process)
+{
+if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64))
+{
+// Assume 32 bits.
+return true;
+}
+}
+
+return !bIsWow64;
+}
+
+#endif
+*/
+
+
+static bool isCudaDriverAvailable(int version)
+{
+#if defined HAVE_CUDA
+#if NV_OS_WIN32
+	Library nvcuda("nvcuda.dll");
+#else
+	Library nvcuda(NV_LIBRARY_NAME(cuda));
+#endif
+
+	if (!nvcuda.isValid())
+	{
+		nvDebug("*** CUDA driver not found.\n");
+		return false;
+	}
+
+	if (version >= 2000)
+	{
+		void * address = nvcuda.bindSymbol("cuStreamCreate");
+		if (address == NULL) {
+			nvDebug("*** CUDA driver version < 2.0.\n");
+			return false;
+		}
+	}
+
+	if (version >= 2010)
+	{
+		void * address = nvcuda.bindSymbol("cuModuleLoadDataEx");
+		if (address == NULL) {
+			nvDebug("*** CUDA driver version < 2.1.\n");
+			return false;
+		}
+	}
+
+	if (version >= 2020)
+	{
+		typedef CUresult (CUDAAPI * PFCU_DRIVERGETVERSION)(int * version);
+
+		PFCU_DRIVERGETVERSION driverGetVersion = (PFCU_DRIVERGETVERSION)nvcuda.bindSymbol("cuDriverGetVersion");
+		if (driverGetVersion == NULL) {
+			nvDebug("*** CUDA driver version < 2.2.\n");
+			return false;
+		}
+
+		int driverVersion;
+		CUresult err = driverGetVersion(&driverVersion);
+		if (err != CUDA_SUCCESS) {
+			nvDebug("*** Error querying driver version: '%s'.\n", cudaGetErrorString((cudaError_t)err));
+			return false;
+		}
+
+		return driverVersion >= version;
+	}
+#endif // HAVE_CUDA
+
+	return true;
+}
+
+
+/// Determine if CUDA is available.
+bool nv::cuda::isHardwarePresent()
+{
+#if defined HAVE_CUDA
+	// Make sure that CUDA driver matches CUDA runtime.
+	if (!isCudaDriverAvailable(CUDART_VERSION))
+	{
+		nvDebug("CUDA driver not available for CUDA runtime %d\n", CUDART_VERSION);
+		return false;
+	}
+
+	int count = deviceCount();
+	if (count == 1)
+	{
+		// Make sure it's not an emulation device.
+		cudaDeviceProp deviceProp;
+		cudaGetDeviceProperties(&deviceProp, 0);
+
+		// deviceProp.name != Device Emulation (CPU)
+		if (deviceProp.major == -1 || deviceProp.minor == -1)
+		{
+			return false;
+		}
+	}
+
+	// @@ Make sure that warp size == 32
+
+	// @@ Make sure available GPU is faster than the CPU.
+
+	return count > 0;
+#else
+	return false;
+#endif
+}
+
+/// Get number of CUDA enabled devices.
+int nv::cuda::deviceCount()
+{
+#if defined HAVE_CUDA
+	int gpuCount = 0;
+
+	cudaError_t result = cudaGetDeviceCount(&gpuCount);
+
+	if (result == cudaSuccess)
+	{
+		return gpuCount;
+	}
+#endif
+	return 0;
+}
+
+
+// Make sure device meets requirements:
+// - Not an emulation device.
+// - Not an integrated device?
+// - Faster than CPU.
+bool nv::cuda::isValidDevice(int i)
+{
+#if defined HAVE_CUDA
+
+	cudaDeviceProp device_properties;
+	cudaGetDeviceProperties(&device_properties, i);
+	int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
+
+	if (device_properties.major == -1 || device_properties.minor == -1) {
+		// Emulation device.
+		return false;
+	}
+
+#if CUDART_VERSION >= 2030 // 2.3
+	/*if (device_properties.integrated)
+	{
+		// Integrated devices.
+		return false;
+	}*/
+#endif
+
+	return true;
+#else
+	return false;
+#endif
+}
+
+int nv::cuda::getFastestDevice()
+{
+	int max_gflops_device = -1;
+#if defined HAVE_CUDA
+	int max_gflops = 0;
+
+	const int device_count = deviceCount();
+	for (int i = 0; i < device_count; i++)
+	{
+		if (isValidDevice(i))
+		{
+			cudaDeviceProp device_properties;
+			cudaGetDeviceProperties(&device_properties, i);
+			int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
+
+			if (gflops > max_gflops)
+			{
+				max_gflops = gflops;
+				max_gflops_device = i;
+			}
+		}
+	}
+#endif
+	return max_gflops_device;
+}
+
+
+/// Activate the given devices.
+bool nv::cuda::initDevice(int * device_ptr)
+{
+	nvDebugCheck(device_ptr != NULL);
+#if defined HAVE_CUDA
+
+#if CUDART_VERSION >= 2030 // 2.3
+
+	// Set device flags to yield in order to play nice with other threads and to find out if CUDA was already active.
+	cudaError_t resul = cudaSetDeviceFlags(cudaDeviceScheduleYield);
+
+#endif
+
+	int device = getFastestDevice();
+
+	if (device == -1)
+	{
+		// No device is fast enough.
+		*device_ptr = -1;
+		return false;
+	}
+
+	// Select CUDA device.
+	cudaError_t result = cudaSetDevice(device);
+
+	if (result == cudaErrorSetOnActiveProcess)
+	{
+		int device;
+		result = cudaGetDevice(&device);
+
+		*device_ptr = -1;  // No device to cleanup.
+		return isValidDevice(device); // Return true if device is valid.
+	}
+	else if (result != cudaSuccess)
+	{
+		nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result));
+		*device_ptr = -1;
+		return false;
+	}
+
+	*device_ptr = device;
+	return true;
+#else
+	return false;
+#endif
+}
+
+void nv::cuda::exitDevice()
+{
+#if defined HAVE_CUDA
+	cudaError_t result = cudaThreadExit();
+
+	if (result != cudaSuccess) {
+		nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result));
+	}
+#endif
+}
--- a/src/nvtt/cuda/CudaUtils.h
+++ b/src/nvtt/cuda/CudaUtils.h
@ -32,8 +32,10 @@ namespace nv
 		bool isHardwarePresent();
 		int deviceCount();
 		int getFastestDevice();
-		bool setDevice(int i);
-		void exit();
+		bool isValidDevice(int i);
+
+		bool initDevice(int * device_ptr);
+		void exitDevice();
 	};
 	
 } // nv namespace
--- a/src/nvtt/experimental/nvtt_experimental.cpp
+++ b/src/nvtt/experimental/nvtt_experimental.cpp
@ -1,60 +0,0 @@
-
-#include "nvtt_experimental.h"
-
-struct NvttTexture
-{
-	NvttTexture() :
-		m_constant(false),
-		m_image(NULL),
-		m_floatImage(NULL)
-	{
-	}
-	
-	~NvttTexture()
-	{
-		if (m_constant && m_image) m_image->unwrap();
-		delete m_image;
-		delete m_floatImage;
-	}
-	
-	bool m_constant;
-	Image * m_image;
-	FloatImage * m_floatImage;
-};
-
-NvttTexture * nvttCreateTexture() 
-{
-	return new NvttTexture();
-}
-	
-void nvttDestroyTexture(NvttTexture * tex)
-{
-	delete tex;
-}
-
-void nvttSetImageData(NvttImage * img, NvttInputFormat format, uint w, uint h, void * data)
-{
-	nvCheck(img != NULL);
-	
-	if (format == NVTT_InputFormat_BGRA_8UB)
-	{
-		img->m_constant = false;
-		img->m_image->allocate(w, h);
-		memcpy(img->m_image->pixels(), data, w * h * 4);
-	}
-	else
-	{
-		nvCheck(false);
-	}
-}
-
-void nvttCompressImage(NvttImage * img, NvttFormat format)
-{
-	nvCheck(img != NULL);
-
-	// @@ Invoke appropriate compressor.
-}
-
-
-
-#endif // NVTT_EXPERIMENTAL_H
--- a/src/nvtt/experimental/nvtt_experimental.h
+++ b/src/nvtt/experimental/nvtt_experimental.h
@ -1,103 +0,0 @@
-
-#ifndef NVTT_EXPERIMENTAL_H
-#define NVTT_EXPERIMENTAL_H
-
-#include <nvtt/nvtt.h>
-
-typedef struct NvttTexture NvttTexture;
-typedef struct NvttOutputOptions NvttOutputOptions;
-
-
-// Global functions
-void nvttInitialize(...);
-unsigned int nvttGetVersion();
-const char * nvttGetErrorString(unsigned int error);
-
-
-// Texture functions
-NvttTexture * nvttCreateTexture();
-void nvttDestroyTexture(NvttTexture * tex);
-
-void nvttSetTexture2D(NvttTexture * tex, NvttInputFormat format, uint w, uint h, uint idx, void * data);
-
-void nvttResize(NvttTexture * img, uint w, uint h);
-unsigned int nvttDownsample(NvttTexture * img);
-
-void nvttOutputCompressed(NvttTexture * img, NvttOutputFormat format);
-void nvttOutputPixelFormat(NvttTexture * img, NvttOutputFormat format);
-
-
-
-
-// How to control the compression parameters?
-
-// Using many arguments:
-// void nvttCompressImage(img, format, quality, r, g, b, a, ...);
-
-// Using existing compression option class:
-// compressionOptions = nvttCreateCompressionOptions();
-// nvttSetCompressionOptionsFormat(compressionOptions, format);
-// nvttSetCompressionOptionsQuality(compressionOptions, quality);
-// nvttSetCompressionOptionsQuality(compressionOptions, quality);
-// nvttSetCompressionOptionsColorWeights(compressionOptions, r, g, b, a);
-// ...
-// nvttCompressImage(img, compressionOptions);
-
-// Using thread local context state:
-// void nvttSetCompressionFormat(format);
-// void nvttSetCompressionQuality(quality);
-// void nvttSetCompressionColorWeights(r, g, b, a);
-// ...
-// nvttCompressImage(img);
-
-// Using thread local context state, but with GL style function arguments:
-// nvttCompressorParameteri(NVTT_FORMAT, format);
-// nvttCompressorParameteri(NVTT_QUALITY, quality);
-// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_RED, r);
-// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_GREEN, g);
-// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_BLUE, b);
-// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_ALPHA, a);
-// or nvttCompressorParameter4f(NVTT_COLOR_WEIGHTS, r, g, b, a);
-// ...
-// nvttCompressImage(img);
-
-// How do we get the compressed output?
-// - Using callbacks. (via new entrypoints, or through outputOptions)
-// - Return it explicitely from nvttCompressImage.
-// - Store it along the image, retrieve later explicitely with 'nvttGetCompressedData(img, ...)'
-
-/*
-
-// Global functions
-void nvttInitialize(...);
-unsigned int nvttGetVersion();
-const char * nvttGetErrorString(unsigned int error);
-
-// Context object
-void nvttCreateContext();
-void nvttDestroyContext();
-
-void nvttSetParameter1i(unsigned int name, int value);
-
-void nvttSetParameter1f(unsigned int name, float value);
-void nvttSetParameter2f(unsigned int name, float v0, float v1);
-void nvttSetParameter3f(unsigned int name, float v0, float v1, float v2);
-void nvttSetParameter4f(unsigned int name, float v0, float v1, float v2, float v3);
-
-// Image object
-NvttImage * nvttCreateImage();
-void nvttDestroyImage(NvttImage * img);
-
-void nvttSetImageData(NvttImage * image, NvttInputFormat format, unsigned int w, unsigned int h, void * data);
-
-void nvttSetImageParameter1i(NvttImage * image, unsigned int name, int value);
-void nvttSetImageParameter1f(NvttImage * image, unsigned int name, float value);
-
-void nvttResizeImage(NvttImage * image, unsigned int w, unsigned int h);
-void nvttQuantizeImage(NvttImage * image, bool dither, unsigned int rbits, unsigned int gbits, unsigned int bbits, unsigned int abits);
-void nvttCompressImage(NvttImage * image, void * buffer, int size);
-
-*/
-
-
-#endif // NVTT_EXPERIMENTAL_H
--- a/src/nvtt/experimental/test.cpp
+++ b/src/nvtt/experimental/test.cpp
@ -1,61 +0,0 @@
-
-#include "nvtt_experimental.h"
-
-/*
-Errors in the original API:
- Too many memory copies.
- Implementation too complicated.
- Error output should not be in output options.
- Data driven interface. Follows the dialog model. Provide all the data upfront.
-*/
-
-
-// Output texture with mipmaps
-void example0()
-{
-	CompressionOptions compressionOptions;
-	OutputOptions outputOptions;
-	
-	Texture img;
-	img.setTexture2D(format, w, h, 0, data);
-
-	Compressor context;
-	context.outputHeader(outputOptions);
-	context.outputCompressed(img, compressionOptions, outputOptions);
-
-	img.toLinear(2.2);	
-	while (img.downsample(NVTT_FILTER_BOX))
-	{
-		img.toGamma(2.2);	
-		outputCompressed(img, compressionOptions, outputOptions);		
-	}
-}
-
-
-// Output texture with colored mipmaps
-void example1()
-{
-	CompressionOptions compressionOptions;
-	OutputOptions outputOptions;
-	
-	Texture img;
-	img.setTexture2D(format, w, h, 0, data);
-
-	Compressor context;
-	context.outputHeader(outputOptions);
-	context.outputCompressed(img, compressionOptions, outputOptions);
-
-	img.toLinear(2.2);	
-	while (img.downsample(NVTT_FILTER_BOX))
-	{
-		img.toGamma(2.2);
-		
-		Texture mipmap = img;
-		mipmap.blend(color[i].r, color[i].g, color[i].b, 0.5f);
-		
-		context.outputCompressed(mipmap, compressionOptions, outputOptions);		
-	}
-}
-
-
-
--- a/src/nvtt/nvtt.cpp
+++ b/src/nvtt/nvtt.cpp
@ -42,8 +42,6 @@ const char * nvtt::errorString(Error e)
 			return "Error opening file";
 		case Error_FileWrite:
 			return "Error writing through output handler";
-        case Error_UnsupportedOutputFormat:
-            return "The container file does not support the selected output format";
 	}
 	
 	return "Invalid error";
--- a/src/nvtt/nvtt.h
+++ b/src/nvtt/nvtt.h
@ -21,7 +21,6 @@
 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 // OTHER DEALINGS IN THE SOFTWARE.

-#pragma once
 #ifndef NV_TT_H
 #define NV_TT_H

@ -48,15 +47,12 @@
 #	define NVTT_API
 #endif

-#define NVTT_VERSION 201
+#define NVTT_VERSION 200

-#define NVTT_FORBID_COPY(Class) \
+#define NVTT_DECLARE_PIMPL(Class) \
 	private: \
 		Class(const Class &); \
 		void operator=(const Class &); \
-	public:
-
-#define NVTT_DECLARE_PIMPL(Class) \
 	public: \
 		struct Private; \
 		Private & m
@ -65,9 +61,6 @@
 // Public interface.
 namespace nvtt
 {
-	// Forward declarations.
-	struct TexImage;
-	
 	/// Supported compression formats.
 	enum Format
 	{
@ -90,25 +83,6 @@ namespace nvtt
 		Format_BC3n = Format_DXT5n,
 		Format_BC4,     // ATI1
 		Format_BC5,     // 3DC, ATI2
-
-		Format_DXT1n,// Not supported on CPU yet.
-		Format_CTX1, // Not supported on CPU yet.
-		Format_YCoCg_DXT5, // Not supported yet.
-
-		Format_BC6, // Not supported yet.
-		Format_BC7, // Not supported yet.
-
-        Format_RGBE,
-	};
-
-	/// Pixel types.
-	enum PixelType
-	{
-		PixelType_UnsignedNorm,
-		PixelType_SignedNorm, // Not supported yet.
-		PixelType_UnsignedInt, // Not supported yet.
-		PixelType_SignedInt, // Not supported yet.
-		PixelType_Float,
 	};
 	
 	/// Quality modes.
@ -123,7 +97,6 @@ namespace nvtt
 	/// Compression options. This class describes the desired compression format and other compression settings.
 	struct CompressionOptions
 	{
-		NVTT_FORBID_COPY(CompressionOptions);
 		NVTT_DECLARE_PIMPL(CompressionOptions);

 		NVTT_API CompressionOptions();
@ -139,23 +112,10 @@ namespace nvtt

 		// Set color mask to describe the RGB/RGBA format.
 		NVTT_API void setPixelFormat(unsigned int bitcount, unsigned int rmask, unsigned int gmask, unsigned int bmask, unsigned int amask);
-		NVTT_API void setPixelFormat(unsigned char rsize, unsigned char gsize, unsigned char bsize, unsigned char asize);
-		
-		NVTT_API void setPixelType(PixelType pixelType);

 		NVTT_API void setQuantization(bool colorDithering, bool alphaDithering, bool binaryAlpha, int alphaThreshold = 127);
 	};

-	/* 
-	// DXGI_FORMAT_R16G16_FLOAT
-	compressionOptions.setPixelType(PixelType_Float);
-	compressionOptions.setPixelFormat2(16, 16, 0, 0);
-	
-	// DXGI_FORMAT_R32G32B32A32_FLOAT
-	compressionOptions.setPixelType(PixelType_Float);
-	compressionOptions.setPixelFormat2(32, 32, 32, 32);
-	*/
-	

 	/// Wrap modes.
 	enum WrapMode
@ -177,7 +137,8 @@ namespace nvtt
 	enum InputFormat
 	{
 		InputFormat_BGRA_8UB,
-		InputFormat_RGBA_32F,
+	//	InputFormat_RGBE_8UB,
+	//	InputFormat_BGRA_32F,
 	};
 	
 	/// Mipmap downsampling filters.
@ -188,23 +149,11 @@ namespace nvtt
 		MipmapFilter_Kaiser,    ///< Kaiser-windowed Sinc filter is the best downsampling filter.
 	};
 	
-	/// Texture resize filters.
-	enum ResizeFilter
-	{
-		ResizeFilter_Box,
-		ResizeFilter_Triangle,
-		ResizeFilter_Kaiser,
-		ResizeFilter_Mitchell,
-	};
-	
 	/// Color transformation.
 	enum ColorTransform
 	{
 		ColorTransform_None,
-		ColorTransform_Linear,      ///< Not implemented.
-		ColorTransform_Swizzle,     ///< Not implemented.
-		ColorTransform_YCoCg,       ///< Transform into r=Co, g=Cg, b=0, a=Y
-		ColorTransform_ScaledYCoCg, ///< Not implemented.
+		ColorTransform_Linear,
 	};
 	
 	/// Extents rounding mode.
@ -227,7 +176,6 @@ namespace nvtt
 	/// Input options. Specify format and layout of the input texture.
 	struct InputOptions
 	{
-		NVTT_FORBID_COPY(InputOptions);
 		NVTT_DECLARE_PIMPL(InputOptions);

 		NVTT_API InputOptions();
@ -242,18 +190,17 @@ namespace nvtt
 		
 		// Set mipmap data. Copies the data.
 		NVTT_API bool setMipmapData(const void * data, int w, int h, int d = 1, int face = 0, int mipmap = 0);
-		NVTT_API bool setMipmapChannelData(const void * data, int channel, int w, int h, int d = 1, int face = 0, int mipmap = 0);
 		
 		// Describe the format of the input.
 		NVTT_API void setFormat(InputFormat format);
 		
-		// Set the way the input alpha channel is interpreted. @@ Not implemented!
+		// Set the way the input alpha channel is interpreted.
 		NVTT_API void setAlphaMode(AlphaMode alphaMode);
 		
 		// Set gamma settings.
 		NVTT_API void setGamma(float inputGamma, float outputGamma);
 		
-		// Set texture wrapping mode.
+		// Set texture wrappign mode.
 		NVTT_API void setWrapMode(WrapMode mode);
 		
 		// Set mipmapping options.
@ -268,18 +215,13 @@ namespace nvtt
 		NVTT_API void setNormalFilter(float sm, float medium, float big, float large);
 		NVTT_API void setNormalizeMipmaps(bool b);
 		
-		// Set color transforms.
+		// Set color transforms. @@ Not implemented!
 		NVTT_API void setColorTransform(ColorTransform t);
 		NVTT_API void setLinearTransform(int channel, float w0, float w1, float w2, float w3);
-		NVTT_API void setLinearTransform(int channel, float w0, float w1, float w2, float w3, float offset);
-		NVTT_API void setSwizzleTransform(int x, int y, int z, int w);
 		
 		// Set resizing options.
 		NVTT_API void setMaxExtents(int d);
 		NVTT_API void setRoundMode(RoundMode mode);
-
-		// Set whether or not to premultiply color by alpha
-		NVTT_API void setPremultiplyAlpha(bool b);
 	};
 	
 	
@ -304,7 +246,6 @@ namespace nvtt
 		Error_CudaError,
  		Error_FileOpen,
  		Error_FileWrite,
-		Error_UnsupportedOutputFormat,
 	};
 	
 	/// Error handler.
@ -316,19 +257,11 @@ namespace nvtt
 		virtual void error(Error e) = 0;
 	};

-	/// Container.
-	enum Container
-	{
-		Container_DDS,
-		Container_DDS10,
-	};
-	

 	/// Output Options. This class holds pointers to the interfaces that are used to report the output of 
 	/// the compressor to the user.
 	struct OutputOptions
 	{
-		NVTT_FORBID_COPY(OutputOptions);
 		NVTT_DECLARE_PIMPL(OutputOptions);

 		NVTT_API OutputOptions();
@ -342,132 +275,34 @@ namespace nvtt
 		NVTT_API void setOutputHandler(OutputHandler * outputHandler);
 		NVTT_API void setErrorHandler(ErrorHandler * errorHandler);
 		NVTT_API void setOutputHeader(bool outputHeader);
-		NVTT_API void setContainer(Container container);
 	};


-	/// Context.
+	/// Texture compressor.
 	struct Compressor
 	{
-		NVTT_FORBID_COPY(Compressor);
 		NVTT_DECLARE_PIMPL(Compressor);

 		NVTT_API Compressor();
 		NVTT_API ~Compressor();

-		// Context settings.
 		NVTT_API void enableCudaAcceleration(bool enable);
 		NVTT_API bool isCudaAccelerationEnabled() const;

-		// InputOptions api.
+		// Main entrypoint of the compression library.
 		NVTT_API bool process(const InputOptions & inputOptions, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const;
+		
+		// Estimate the size of compressing the input with the given options.
 		NVTT_API int estimateSize(const InputOptions & inputOptions, const CompressionOptions & compressionOptions) const;
-
-		// RAW api.
-		NVTT_API bool compress2D(InputFormat format, int w, int h, void * data, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const;
-		//ßNVTT_API bool compress3D(InputFormat format, int w, int h, int d, void * data, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const;
-		NVTT_API int estimateSize(int w, int h, int d, const CompressionOptions & compressionOptions) const;
-
-		// TexImage api.
-		NVTT_API TexImage createTexImage() const;
-		NVTT_API bool outputHeader(const TexImage & tex, int mipmapCount, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const;
-		NVTT_API bool compress(const TexImage & tex, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const;
-		NVTT_API int estimateSize(const TexImage & tex, const CompressionOptions & compressionOptions) const;
 	};
-
-	// "Compressor" is deprecated. This should have been called "Context"
-	typedef Compressor Context;
-
 	
-	/// DXT decoder.
-	enum Decoder
-	{
-		Decoder_Reference,
-		Decoder_NV5x,
-	};
-
-	/// A texture mipmap.
-	struct TexImage
-	{
-		NVTT_API TexImage();
-		NVTT_API TexImage(const TexImage & tex);
-		NVTT_API ~TexImage();
-
-		NVTT_API void operator=(const TexImage & tex);
-
-		// Texture parameters.
-		NVTT_API void setTextureType(TextureType type);
-		NVTT_API void setWrapMode(WrapMode mode);
-		NVTT_API void setAlphaMode(AlphaMode alphaMode);
-		NVTT_API void setNormalMap(bool isNormalMap);
-
-		// Accessors.
-		NVTT_API int width() const;
-		NVTT_API int height() const;
-		NVTT_API int depth() const;
-		NVTT_API int faceCount() const;
-		NVTT_API TextureType textureType() const;
-		NVTT_API WrapMode wrapMode() const;
-		NVTT_API AlphaMode alphaMode() const;
-		NVTT_API bool isNormalMap() const;
-		NVTT_API int countMipmaps() const;
-
-		// Texture data.
-		NVTT_API bool load(const char * fileName);
-		NVTT_API bool save(const char * fileName) const;
-		NVTT_API bool setImage2D(InputFormat format, int w, int h, int idx, const void * data);
-		NVTT_API bool setImage2D(InputFormat format, int w, int h, int idx, const void * r, const void * g, const void * b, const void * a);
-		NVTT_API bool setImage2D(Format format, Decoder decoder, int w, int h, int idx, const void * data);
-
-		// Resizing methods.
-		NVTT_API void resize(int w, int h, ResizeFilter filter);
-		NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter);
-		NVTT_API bool buildNextMipmap(MipmapFilter filter);
-
-		// Color transforms.
-		NVTT_API void toLinear(float gamma);
-		NVTT_API void toGamma(float gamma);
-		NVTT_API void transform(const float w0[4], const float w1[4], const float w2[4], const float w3[4], const float offset[4]);
-		NVTT_API void swizzle(int r, int g, int b, int a);
-		NVTT_API void scaleBias(int channel, float scale, float bias);
-		NVTT_API void packNormal();
-		NVTT_API void expandNormal();
-		NVTT_API void blend(float r, float g, float b, float a, float t);
-		NVTT_API void premultiplyAlpha();
-		NVTT_API void toGreyScale(float redScale, float greenScale, float blueScale, float alphaScale);
-		NVTT_API void setBorder(float r, float g, float b, float a);
-		NVTT_API void fill(float r, float g, float b, float a);
-
-		// Set normal map options.
-		NVTT_API void toNormalMap(float sm, float medium, float big, float large);
-		NVTT_API void toHeightMap();
-		NVTT_API void normalizeNormalMap();
-
-		// Error compare.
-		NVTT_API float rootMeanSquaredError_rgb(const TexImage & reference) const;
-		NVTT_API float rootMeanSquaredError_alpha(const TexImage & reference) const;
-
-        // Geometric transforms.
-        NVTT_API void flipVertically();
-
-	private:
-		void detach();
-
-		struct Private;
-		Private * m;
-	};
-
-
+	
 	// Return string for the given error code.
 	NVTT_API const char * errorString(Error e);

 	// Return NVTT version.
 	NVTT_API unsigned int version();

-	// Set callbacks.
-	//NVTT_API void setErrorCallback(ErrorCallback callback);
-	//NVTT_API void setMemoryCallbacks(...);	
-	
 } // nvtt namespace

 #endif // NV_TT_H
--- a/src/nvtt/nvtt_wrapper.cpp
+++ b/src/nvtt/nvtt_wrapper.cpp
@ -185,16 +185,6 @@ void nvttSetOutputOptionsOutputHandler(NvttOutputOptions * outputOptions, nvttOu


 // Compressor class.
-NvttCompressor * nvttCreateCompressor()
-{
-	return new nvtt::Compressor();
-}
-
-void nvttDestroyCompressor(NvttCompressor * compressor)
-{
-	delete compressor;
-}
-
 NvttBoolean nvttCompress(const NvttCompressor * compressor, const NvttInputOptions * inputOptions, const NvttCompressionOptions * compressionOptions, const NvttOutputOptions * outputOptions)
 {
 	return (NvttBoolean)compressor->process(*inputOptions, *compressionOptions, *outputOptions);
--- a/src/nvtt/nvtt_wrapper.h
+++ b/src/nvtt/nvtt_wrapper.h
@ -47,7 +47,7 @@
 #	define NVTT_API
 #endif

-#define NVTT_VERSION 201
+#define NVTT_VERSION 200

 #ifdef __cplusplus
 typedef struct nvtt::InputOptions NvttInputOptions;
@ -156,7 +156,6 @@ typedef enum
 	NVTT_Error_Unknown,
 	NVTT_Error_FileOpen,
 	NVTT_Error_FileWrite,
-    NVTT_Error_UnsupportedOutputFormat,
 } NvttError;

 typedef enum
--- a/src/nvtt/squish/Makefile
+++ b/src/nvtt/squish/Makefile
@ -0,0 +1,31 @@
+
+include config
+
+SRC = alpha.cpp clusterfit.cpp colourblock.cpp colourfit.cpp colourset.cpp maths.cpp rangefit.cpp singlecolourfit.cpp squish.cpp
+
+OBJ = $(SRC:%.cpp=%.o)
+
+LIB = libsquish.a
+
+all : $(LIB)
+
+install : $(LIB)
+	install squish.h $(INSTALL_DIR)/include 
+	install libsquish.a $(INSTALL_DIR)/lib
+
+uninstall:
+	$(RM) $(INSTALL_DIR)/include/squish.h
+	$(RM) $(INSTALL_DIR)/lib/libsquish.a
+
+$(LIB) : $(OBJ)
+	$(AR) cr $@ $?
+	ranlib $@
+
+%.o : %.cpp
+	$(CXX) $(CPPFLAGS) -I. $(CXXFLAGS) -o$@ -c $<
+
+clean :
+	$(RM) $(OBJ) $(LIB)
+
+
+
--- a/src/nvtt/squish/clusterfit.cpp
+++ b/src/nvtt/squish/clusterfit.cpp
@ -28,7 +28,7 @@
 #include "colourblock.h"
 #include <cfloat>

-namespace nvsquish {
+namespace squish {

 ClusterFit::ClusterFit()
 {
@ -109,7 +109,7 @@ void ClusterFit::SetMetric(float r, float g, float b)
 float ClusterFit::GetBestError() const
 {
 #if SQUISH_USE_SIMD
-	return m_besterror.GetX();
+	return m_besterror.GetVec3().X();
 #else
 	return m_besterror;
 #endif
@ -280,6 +280,15 @@ void ClusterFit::Compress4( void* block )
 					m_beta[k] = m_weights[k];
 				}

+				/*unsigned int permutation = 0;
+				for(int p = 0; p < 16; p++) {
+					permutation |= indices[p] << (p * 2);
+				}
+				if (debug) printf("%X:\t", permutation);
+
+				if (debug && permutation == 0x55FFFFAA) __debugbreak();
+				*/
+
 				// solve a least squares problem to place the endpoints
 #if SQUISH_USE_SIMD
 				Vec4 start, end;
--- a/src/nvtt/squish/clusterfit.h
+++ b/src/nvtt/squish/clusterfit.h
@ -23,15 +23,15 @@
 	
   -------------------------------------------------------------------------- */
   
-#ifndef NV_SQUISH_CLUSTERFIT_H
-#define NV_SQUISH_CLUSTERFIT_H
+#ifndef SQUISH_CLUSTERFIT_H
+#define SQUISH_CLUSTERFIT_H

 #include "squish.h"
 #include "maths.h"
 #include "simd.h"
 #include "colourfit.h"

-namespace nvsquish {
+namespace squish {

 class ClusterFit : public ColourFit
 {
--- a/src/nvtt/squish/colourblock.cpp
+++ b/src/nvtt/squish/colourblock.cpp
@ -25,7 +25,7 @@
   
 #include "colourblock.h"

-namespace nvsquish {
+namespace squish {

 static int FloatToInt( float a, int limit )
 {
--- a/src/nvtt/squish/colourblock.h
+++ b/src/nvtt/squish/colourblock.h
@ -23,13 +23,13 @@
 	
   -------------------------------------------------------------------------- */
   
-#ifndef NV_SQUISH_COLOURBLOCK_H
-#define NV_SQUISH_COLOURBLOCK_H
+#ifndef SQUISH_COLOURBLOCK_H
+#define SQUISH_COLOURBLOCK_H

 #include "squish.h"
 #include "maths.h"

-namespace nvsquish {
+namespace squish {

 void WriteColourBlock3( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block );
 void WriteColourBlock4( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block );
--- a/src/nvtt/squish/colourfit.cpp
+++ b/src/nvtt/squish/colourfit.cpp
@ -26,7 +26,7 @@
 #include "colourfit.h"
 #include "colourset.h"

-namespace nvsquish {
+namespace squish {

 ColourFit::ColourFit()
 {
--- a/src/nvtt/squish/colourfit.h
+++ b/src/nvtt/squish/colourfit.h
@ -23,13 +23,13 @@
 	
   -------------------------------------------------------------------------- */
   
-#ifndef NV_SQUISH_COLOURFIT_H
-#define NV_SQUISH_COLOURFIT_H
+#ifndef SQUISH_COLOURFIT_H
+#define SQUISH_COLOURFIT_H

 #include "squish.h"
 #include "maths.h"

-namespace nvsquish {
+namespace squish {

 class ColourSet;

--- a/src/nvtt/squish/colourset.cpp
+++ b/src/nvtt/squish/colourset.cpp
@ -25,7 +25,7 @@
   
 #include "colourset.h"

-namespace nvsquish {
+namespace squish {

 // @@ Add flags:
 // - MatchTransparent
--- a/src/nvtt/squish/colourset.h
+++ b/src/nvtt/squish/colourset.h
@ -23,21 +23,21 @@
 	
   -------------------------------------------------------------------------- */
   
-#ifndef NV_SQUISH_COLOURSET_H
-#define NV_SQUISH_COLOURSET_H
+#ifndef SQUISH_COLOURSET_H
+#define SQUISH_COLOURSET_H

 #include "squish.h"
 #include "maths.h"
 #include "simd.h"

-namespace nvsquish {
+namespace squish {

 /*! @brief Represents a set of block colours
 */
 class ColourSet
 {
 public:
-	ColourSet( u8 const* rgba, int flags, bool createMinimalSet = true );
+	ColourSet( u8 const* rgba, int flags, bool createMinimalSet = false );

 	int GetCount() const { return m_count; }
 	Vec3 const* GetPoints() const { return m_points; }
--- a/src/nvtt/squish/extra/squishgen2.cpp
+++ b/src/nvtt/squish/extra/squishgen2.cpp
@ -1,113 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
-	Copyright (c) 2008 Ignacio Castano                      castano@gmail.com
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-
-#include <stdio.h>
-#include <float.h>
-#include <math.h>
-
-struct Precomp {
-	float alpha2_sum;
-	float beta2_sum;
-	float alphabeta_sum;
-	float factor;
-};
-
-
-int main()
-{
-	int i = 0;
-	
-	printf("struct Precomp {\n");
-	printf("\tfloat alpha2_sum;\n");
-	printf("\tfloat beta2_sum;\n");
-	printf("\tfloat alphabeta_sum;\n");
-	printf("\tfloat factor;\n");
-	printf("};\n\n");
-
-	printf("static const SQUISH_ALIGN_16 Precomp s_threeElement[153] = {\n");
-	
-	// Three element clusters:
-	for( int c0 = 0; c0 <= 16; c0++)	// At least two clusters.
-	{
-		for( int c1 = 0; c1 <=  16-c0; c1++)
-		{
-			int c2 = 16 - c0 - c1;
-
-			Precomp p;
-			p.alpha2_sum = c0 + c1 * 0.25f;
-			p.beta2_sum = c2 + c1 * 0.25f;
-			p.alphabeta_sum = c1 * 0.25f;
-			p.factor = 1.0f / (p.alpha2_sum * p.beta2_sum - p.alphabeta_sum * p.alphabeta_sum);
-
-			if (isfinite(p.factor))
-			{
-				printf("\t{ %ff, %ff, %ff, %ff }, // %d (%d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, p.factor, i, c0, c1, c2);
-			}
-			else
-			{
-				printf("\t{ %ff, %ff, %ff, FLT_MAX }, // %d (%d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, i, c0, c1, c2);
-			}
-			
-			i++;
-		}
-	}
-	printf("}; // %d three cluster elements\n\n", i);
-	
-	printf("static const SQUISH_ALIGN_16 Precomp s_fourElement[969] = {\n");
-
-	// Four element clusters:
-	i = 0;
-	for( int c0 = 0; c0 <= 16; c0++)
-	{
-		for( int c1 = 0; c1 <=  16-c0; c1++)
-		{
-			for( int c2 = 0; c2 <=  16-c0-c1; c2++)
-			{
-				int c3 = 16 - c0 - c1 - c2;
-
-				Precomp p;			
-				p.alpha2_sum = c0 + c1 * (4.0f/9.0f) + c2 * (1.0f/9.0f);
-				p.beta2_sum = c3 + c2 * (4.0f/9.0f) + c1 * (1.0f/9.0f);
-				p.alphabeta_sum = (c1 + c2) * (2.0f/9.0f);
-				p.factor = 1.0f / (p.alpha2_sum * p.beta2_sum - p.alphabeta_sum * p.alphabeta_sum);
-
-				if (isfinite(p.factor))
-				{
-					printf("\t{ %ff, %ff, %ff, %ff }, // %d (%d %d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, p.factor, i, c0, c1, c2, c3);
-				}
-				else
-				{
-					printf("\t{ %ff, %ff, %ff, FLT_MAX }, // %d (%d %d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, i, c0, c1, c2, c3);
-				}
-
-				i++;
-			}
-		}
-	}
-	printf("}; // %d four cluster elements\n\n", i);
-
-	return 0;
-}
--- a/src/nvtt/squish/fastclusterfit.cpp
+++ b/src/nvtt/squish/fastclusterfit.cpp
@ -31,7 +31,7 @@

 #include "fastclusterlookup.inl"

-namespace nvsquish {
+namespace squish {

 FastClusterFit::FastClusterFit()
 {
@ -129,8 +129,6 @@ void FastClusterFit::Compress3( void* block )
 	Vec4 const zero = VEC4_CONST(0.0f);
 	Vec4 const half = VEC4_CONST(0.5f);
 	Vec4 const two = VEC4_CONST(2.0);
-	Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
-	Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
 	 
 	// declare variables
 	Vec4 beststart = VEC4_CONST( 0.0f );
@ -162,22 +160,25 @@ void FastClusterFit::Compress3( void* block )
 			Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
 			Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
 			
-			// clamp to the grid
+			// clamp the output to [0, 1]
 			a = Min( one, Max( zero, a ) );
 			b = Min( one, Max( zero, b ) );
+			
+			// clamp to the grid
+			Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
+			Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f );
 			a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp;
 			b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp;
 			
-			// compute the error (we skip the constant xxsum)
-			Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
-			Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
-			Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
-			Vec4 e4 = MultiplyAdd( two, e3, e1 );
-
+			// compute the error
+			Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum );
+			Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
+			Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 );
+			
 			// apply the metric to the error term
-			Vec4 e5 = e4 * m_metricSqr;
-			Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
-
+			Vec4 e4 = e3 * m_metricSqr;
+			Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ();
+			
 			// keep the solution if it wins
 			if( CompareAnyLessThan( error, besterror ) )
 			{
@ -273,7 +274,7 @@ void FastClusterFit::Compress4( void* block )
 				Vec4 const factor = constants.SplatW();
 				i++;
 				
-				Vec4 const alphax_sum = MultiplyAdd(x2, onethird, MultiplyAdd(x1, twothirds, x0));
+				Vec4 const alphax_sum = x0 + MultiplyAdd(x1, twothirds, x2 * onethird);
 				Vec4 const betax_sum = m_xsum - alphax_sum;
 				
 				Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
@ -285,19 +286,18 @@ void FastClusterFit::Compress4( void* block )
 				
 				// clamp to the grid
 				Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
-				Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
+				Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f );
 				a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp;
 				b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp;
 				
-				// compute the error (we skip the constant xxsum)
-				Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
-				Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
-				Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
-				Vec4 e4 = MultiplyAdd( two, e3, e1 );
-
+				// compute the error
+				Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum );
+				Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
+				Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 );
+				
 				// apply the metric to the error term
-				Vec4 e5 = e4 * m_metricSqr;
-				Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
+				Vec4 e4 = e3 * m_metricSqr;
+				Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ();
 				
 				// keep the solution if it wins
 				if( CompareAnyLessThan( error, besterror ) )
@ -370,12 +370,6 @@ void FastClusterFit::Compress4( void* block )

 void FastClusterFit::Compress3( void* block )
 {
-	Vec3 const one( 1.0f );
-	Vec3 const zero( 0.0f );
-	Vec3 const half( 0.5f );
-	Vec3 const grid( 31.0f, 63.0f, 31.0f );
-	Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
-
 	// declare variables
 	Vec3 beststart( 0.0f );
 	Vec3 bestend( 0.0f );
@ -405,9 +399,16 @@ void FastClusterFit::Compress3( void* block )
 			Vec3 a = (alphax_sum*beta2_sum - betax_sum*alphabeta_sum) * factor;
 			Vec3 b = (betax_sum*alpha2_sum - alphax_sum*alphabeta_sum) * factor;
 			
-			// clamp to the grid
+			// clamp the output to [0, 1]
+			Vec3 const one( 1.0f );
+			Vec3 const zero( 0.0f );
 			a = Min( one, Max( zero, a ) );
 			b = Min( one, Max( zero, b ) );
+			
+			// clamp to the grid
+			Vec3 const grid( 31.0f, 63.0f, 31.0f );
+			Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f );
+			Vec3 const half( 0.5f );
 			a = Floor( grid*a + half )*gridrcp;
 			b = Floor( grid*b + half )*gridrcp;
 			
@ -476,12 +477,6 @@ void FastClusterFit::Compress3( void* block )

 void FastClusterFit::Compress4( void* block )
 {
-	Vec3 const one( 1.0f );
-	Vec3 const zero( 0.0f );
-	Vec3 const half( 0.5f );
-	Vec3 const grid( 31.0f, 63.0f, 31.0f );
-	Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
-
 	// declare variables
 	Vec3 beststart( 0.0f );
 	Vec3 bestend( 0.0f );
@ -516,9 +511,16 @@ void FastClusterFit::Compress4( void* block )
 				Vec3 a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor;
 				Vec3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor;
 				
-				// clamp to the grid
+				// clamp the output to [0, 1]
+				Vec3 const one( 1.0f );
+				Vec3 const zero( 0.0f );
 				a = Min( one, Max( zero, a ) );
 				b = Min( one, Max( zero, b ) );
+				
+				// clamp to the grid
+				Vec3 const grid( 31.0f, 63.0f, 31.0f );
+				Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f );
+				Vec3 const half( 0.5f );
 				a = Floor( grid*a + half )*gridrcp;
 				b = Floor( grid*b + half )*gridrcp;
 				
--- a/src/nvtt/squish/fastclusterfit.h
+++ b/src/nvtt/squish/fastclusterfit.h
@ -24,15 +24,15 @@
 	
   -------------------------------------------------------------------------- */
   
-#ifndef NV_SQUISH_FASTCLUSTERFIT_H
-#define NV_SQUISH_FASTCLUSTERFIT_H
+#ifndef SQUISH_FASTCLUSTERFIT_H
+#define SQUISH_FASTCLUSTERFIT_H

 #include "squish.h"
 #include "maths.h"
 #include "simd.h"
 #include "colourfit.h"

-namespace nvsquish {
+namespace squish {

 class FastClusterFit : public ColourFit
 {
--- a/src/nvtt/squish/maths.cpp
+++ b/src/nvtt/squish/maths.cpp
@ -27,7 +27,7 @@
 #include "simd.h"
 #include <cfloat>

-namespace nvsquish {
+namespace squish {

 Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weights, Vec3::Arg metric )
 {
@ -134,4 +134,4 @@ Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )

 #endif

-} // namespace nvsquish
+} // namespace squish
--- a/src/nvtt/squish/maths.h
+++ b/src/nvtt/squish/maths.h
@ -23,14 +23,14 @@
 	
   -------------------------------------------------------------------------- */
   
-#ifndef NV_SQUISH_MATHS_H
-#define NV_SQUISH_MATHS_H
+#ifndef SQUISH_MATHS_H
+#define SQUISH_MATHS_H

 #include <cmath>
 #include <algorithm>
 #include "config.h"

-namespace nvsquish {
+namespace squish {

 class Vec3
 {
--- a/src/nvtt/squish/simd.h
+++ b/src/nvtt/squish/simd.h
@ -23,8 +23,8 @@
 	
   -------------------------------------------------------------------------- */
   
-#ifndef NV_SQUISH_SIMD_H
-#define NV_SQUISH_SIMD_H
+#ifndef SQUISH_SIMD_H
+#define SQUISH_SIMD_H

 #include "maths.h"

--- a/src/nvtt/squish/simd_sse.h
+++ b/src/nvtt/squish/simd_sse.h
@ -23,8 +23,8 @@
 	
   -------------------------------------------------------------------------- */
   
-#ifndef NV_SQUISH_SIMD_SSE_H
-#define NV_SQUISH_SIMD_SSE_H
+#ifndef SQUISH_SIMD_SSE_H
+#define SQUISH_SIMD_SSE_H

 #include <xmmintrin.h>
 #if ( SQUISH_USE_SSE > 1 )
@ -35,7 +35,7 @@
 #define SQUISH_SSE_SPLAT( a )										\
 	( ( a ) | ( ( a ) << 2 ) | ( ( a ) << 4 ) | ( ( a ) << 6 ) )

-namespace nvsquish {
+namespace squish {

 #define VEC4_CONST( X ) Vec4( _mm_set1_ps( X ) )

@ -72,13 +72,6 @@ public:
 		_mm_store_ps( c, m_v );
 		return Vec3( c[0], c[1], c[2] );
 	}
-
-    float GetX() const 
-    {
-        SQUISH_ALIGN_16 float f;
-        _mm_store_ss(&f, m_v);
-        return f;
-    }
 	
 	Vec4 SplatX() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 0 ) ) ); }
 	Vec4 SplatY() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 1 ) ) ); }
--- a/src/nvtt/squish/simd_ve.h
+++ b/src/nvtt/squish/simd_ve.h
@ -26,12 +26,10 @@
 #ifndef SQUISH_SIMD_VE_H
 #define SQUISH_SIMD_VE_H

-#ifndef __APPLE_ALTIVEC__
 #include <altivec.h>
 #undef bool
-#endif

-namespace nvsquish {
+namespace squish {

 #define VEC4_CONST( X ) Vec4( ( vector float )( X ) )

@ -78,14 +76,7 @@ public:
 		u.v = m_v;
 		return Vec3( u.c[0], u.c[1], u.c[2] );
 	}
-
-	float GetX() const
-	{
-		union { vector float v; float c[4]; } u;
-		u.v = m_v;
-		return u.c[0];
-	}
-
+	
 	Vec4 SplatX() const { return Vec4( vec_splat( m_v, 0 ) ); }
 	Vec4 SplatY() const { return Vec4( vec_splat( m_v, 1 ) ); }
 	Vec4 SplatZ() const { return Vec4( vec_splat( m_v, 2 ) ); }
--- a/src/nvtt/squish/squish.h
+++ b/src/nvtt/squish/squish.h
@ -23,11 +23,11 @@
 	
   -------------------------------------------------------------------------- */
   
-#ifndef NV_SQUISH_H
-#define NV_SQUISH_H
+#ifndef SQUISH_H
+#define SQUISH_H

 //! All squish API functions live in this namespace.
-namespace nvsquish {
+namespace squish {

 // -----------------------------------------------------------------------------

--- a/src/nvtt/squish/squish.xcodeproj/project.pbxproj
+++ b/src/nvtt/squish/squish.xcodeproj/project.pbxproj
@ -0,0 +1,531 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 42;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		133FA0DC096A7B8E0050752E /* alpha.h in Headers */ = {isa = PBXBuildFile; fileRef = 133FA0DA096A7B8E0050752E /* alpha.h */; };
+		133FA0DD096A7B8E0050752E /* alpha.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 133FA0DB096A7B8E0050752E /* alpha.cpp */; };
+		1342B4160999DF1900152915 /* libsquish.a in Frameworks */ = {isa = PBXBuildFile; fileRef = D2AAC046055464E500DB518D /* libsquish.a */; };
+		1342B41A0999DF7000152915 /* squishpng.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1342B4190999DF7000152915 /* squishpng.cpp */; };
+		1342B43F0999E0CC00152915 /* squishtest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1342B43E0999E0CC00152915 /* squishtest.cpp */; };
+		1342B4420999E0EC00152915 /* libsquish.a in Frameworks */ = {isa = PBXBuildFile; fileRef = D2AAC046055464E500DB518D /* libsquish.a */; };
+		1350D71A092AA858005EE038 /* clusterfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D70B092AA857005EE038 /* clusterfit.cpp */; };
+		1350D71B092AA858005EE038 /* clusterfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D70C092AA858005EE038 /* clusterfit.h */; };
+		1350D71E092AA858005EE038 /* colourblock.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D70F092AA858005EE038 /* colourblock.cpp */; };
+		1350D71F092AA858005EE038 /* colourblock.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D710092AA858005EE038 /* colourblock.h */; };
+		1350D720092AA858005EE038 /* config.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D711092AA858005EE038 /* config.h */; };
+		1350D721092AA858005EE038 /* maths.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D712092AA858005EE038 /* maths.cpp */; };
+		1350D722092AA858005EE038 /* maths.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D713092AA858005EE038 /* maths.h */; };
+		1350D725092AA858005EE038 /* rangefit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D716092AA858005EE038 /* rangefit.cpp */; };
+		1350D726092AA858005EE038 /* rangefit.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D717092AA858005EE038 /* rangefit.h */; };
+		1350D727092AA858005EE038 /* squish.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D718092AA858005EE038 /* squish.cpp */; };
+		1350D728092AA858005EE038 /* squish.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D719092AA858005EE038 /* squish.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		139C21CF09ADAB0800A2500D /* squishgen.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 139C21CE09ADAB0800A2500D /* squishgen.cpp */; };
+		139C234F09B0602700A2500D /* singlecolourfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 139C234D09B0602700A2500D /* singlecolourfit.h */; };
+		139C235009B0602700A2500D /* singlecolourfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 139C234E09B0602700A2500D /* singlecolourfit.cpp */; };
+		13A7CCA40952BE63001C963A /* colourfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 13A7CCA20952BE63001C963A /* colourfit.h */; };
+		13A7CCA50952BE63001C963A /* colourfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 13A7CCA30952BE63001C963A /* colourfit.cpp */; };
+		13C4C7AD0941C18000AC5B89 /* colourset.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 13C4C7AB0941C18000AC5B89 /* colourset.cpp */; };
+		13C4C7AE0941C18000AC5B89 /* colourset.h in Headers */ = {isa = PBXBuildFile; fileRef = 13C4C7AC0941C18000AC5B89 /* colourset.h */; };
+		13CD64C2092BCF8A00488C97 /* simd.h in Headers */ = {isa = PBXBuildFile; fileRef = 13CD64C0092BCF8A00488C97 /* simd.h */; };
+		13D0DC910931F93A00909807 /* simd_ve.h in Headers */ = {isa = PBXBuildFile; fileRef = 13D0DC900931F93A00909807 /* simd_ve.h */; };
+		13D0DC970931F9D600909807 /* simd_sse.h in Headers */ = {isa = PBXBuildFile; fileRef = 13D0DC960931F9D600909807 /* simd_sse.h */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXContainerItemProxy section */
+		1342B52B099BF72F00152915 /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = D2AAC045055464E500DB518D;
+			remoteInfo = squish;
+		};
+		1342B58E099BF93D00152915 /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = D2AAC045055464E500DB518D;
+			remoteInfo = squish;
+		};
+/* End PBXContainerItemProxy section */
+
+/* Begin PBXFileReference section */
+		133FA0DA096A7B8E0050752E /* alpha.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alpha.h; sourceTree = "<group>"; };
+		133FA0DB096A7B8E0050752E /* alpha.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alpha.cpp; sourceTree = "<group>"; };
+		1342B4110999DE7F00152915 /* squishpng */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = squishpng; sourceTree = BUILT_PRODUCTS_DIR; };
+		1342B4190999DF7000152915 /* squishpng.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = squishpng.cpp; path = extra/squishpng.cpp; sourceTree = "<group>"; };
+		1342B4370999E07C00152915 /* squishtest */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = squishtest; sourceTree = BUILT_PRODUCTS_DIR; };
+		1342B43E0999E0CC00152915 /* squishtest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = squishtest.cpp; path = extra/squishtest.cpp; sourceTree = "<group>"; };
+		1350D70B092AA857005EE038 /* clusterfit.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = clusterfit.cpp; sourceTree = "<group>"; };
+		1350D70C092AA858005EE038 /* clusterfit.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = clusterfit.h; sourceTree = "<group>"; };
+		1350D70F092AA858005EE038 /* colourblock.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = colourblock.cpp; sourceTree = "<group>"; };
+		1350D710092AA858005EE038 /* colourblock.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = colourblock.h; sourceTree = "<group>"; };
+		1350D711092AA858005EE038 /* config.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = config.h; sourceTree = "<group>"; };
+		1350D712092AA858005EE038 /* maths.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = maths.cpp; sourceTree = "<group>"; };
+		1350D713092AA858005EE038 /* maths.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = maths.h; sourceTree = "<group>"; };
+		1350D716092AA858005EE038 /* rangefit.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = rangefit.cpp; sourceTree = "<group>"; };
+		1350D717092AA858005EE038 /* rangefit.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = rangefit.h; sourceTree = "<group>"; };
+		1350D718092AA858005EE038 /* squish.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = squish.cpp; sourceTree = "<group>"; };
+		1350D719092AA858005EE038 /* squish.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = squish.h; sourceTree = "<group>"; };
+		13906CE3096938880000A6A7 /* texture_compression_s3tc.txt */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; path = texture_compression_s3tc.txt; sourceTree = "<group>"; };
+		139C21C409ADAA7000A2500D /* squishgen */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = squishgen; sourceTree = BUILT_PRODUCTS_DIR; };
+		139C21CE09ADAB0800A2500D /* squishgen.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = squishgen.cpp; path = extra/squishgen.cpp; sourceTree = "<group>"; };
+		139C234D09B0602700A2500D /* singlecolourfit.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = singlecolourfit.h; sourceTree = "<group>"; };
+		139C234E09B0602700A2500D /* singlecolourfit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = singlecolourfit.cpp; sourceTree = "<group>"; };
+		139C236D09B060A900A2500D /* singlecolourlookup.inl */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; path = singlecolourlookup.inl; sourceTree = "<group>"; };
+		13A7CCA20952BE63001C963A /* colourfit.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = colourfit.h; sourceTree = "<group>"; };
+		13A7CCA30952BE63001C963A /* colourfit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = colourfit.cpp; sourceTree = "<group>"; };
+		13C4C7AB0941C18000AC5B89 /* colourset.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = colourset.cpp; sourceTree = "<group>"; };
+		13C4C7AC0941C18000AC5B89 /* colourset.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = colourset.h; sourceTree = "<group>"; };
+		13CD64C0092BCF8A00488C97 /* simd.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simd.h; sourceTree = "<group>"; };
+		13D0DC900931F93A00909807 /* simd_ve.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simd_ve.h; sourceTree = "<group>"; };
+		13D0DC960931F9D600909807 /* simd_sse.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simd_sse.h; sourceTree = "<group>"; };
+		D2AAC046055464E500DB518D /* libsquish.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libsquish.a; sourceTree = BUILT_PRODUCTS_DIR; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		1342B40F0999DE7F00152915 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				1342B4160999DF1900152915 /* libsquish.a in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		1342B4350999E07C00152915 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				1342B4420999E0EC00152915 /* libsquish.a in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		139C21C209ADAA7000A2500D /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		D289987405E68DCB004EDB86 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		08FB7794FE84155DC02AAC07 /* squish */ = {
+			isa = PBXGroup;
+			children = (
+				08FB7795FE84155DC02AAC07 /* Source */,
+				C6A0FF2B0290797F04C91782 /* Documentation */,
+				1AB674ADFE9D54B511CA2CBB /* Products */,
+			);
+			name = squish;
+			sourceTree = "<group>";
+		};
+		08FB7795FE84155DC02AAC07 /* Source */ = {
+			isa = PBXGroup;
+			children = (
+				133FA0DB096A7B8E0050752E /* alpha.cpp */,
+				133FA0DA096A7B8E0050752E /* alpha.h */,
+				1350D70B092AA857005EE038 /* clusterfit.cpp */,
+				1350D70C092AA858005EE038 /* clusterfit.h */,
+				13A7CCA30952BE63001C963A /* colourfit.cpp */,
+				13A7CCA20952BE63001C963A /* colourfit.h */,
+				13C4C7AB0941C18000AC5B89 /* colourset.cpp */,
+				13C4C7AC0941C18000AC5B89 /* colourset.h */,
+				1350D70F092AA858005EE038 /* colourblock.cpp */,
+				1350D710092AA858005EE038 /* colourblock.h */,
+				13906CE3096938880000A6A7 /* texture_compression_s3tc.txt */,
+				1350D711092AA858005EE038 /* config.h */,
+				1350D712092AA858005EE038 /* maths.cpp */,
+				1350D713092AA858005EE038 /* maths.h */,
+				1350D716092AA858005EE038 /* rangefit.cpp */,
+				1350D717092AA858005EE038 /* rangefit.h */,
+				13CD64C0092BCF8A00488C97 /* simd.h */,
+				13D0DC960931F9D600909807 /* simd_sse.h */,
+				13D0DC900931F93A00909807 /* simd_ve.h */,
+				139C234E09B0602700A2500D /* singlecolourfit.cpp */,
+				139C234D09B0602700A2500D /* singlecolourfit.h */,
+				139C236D09B060A900A2500D /* singlecolourlookup.inl */,
+				1350D718092AA858005EE038 /* squish.cpp */,
+				1350D719092AA858005EE038 /* squish.h */,
+				139C21CE09ADAB0800A2500D /* squishgen.cpp */,
+				1342B4190999DF7000152915 /* squishpng.cpp */,
+				1342B43E0999E0CC00152915 /* squishtest.cpp */,
+			);
+			name = Source;
+			sourceTree = "<group>";
+		};
+		1AB674ADFE9D54B511CA2CBB /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				D2AAC046055464E500DB518D /* libsquish.a */,
+				1342B4110999DE7F00152915 /* squishpng */,
+				1342B4370999E07C00152915 /* squishtest */,
+				139C21C409ADAA7000A2500D /* squishgen */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		C6A0FF2B0290797F04C91782 /* Documentation */ = {
+			isa = PBXGroup;
+			children = (
+			);
+			name = Documentation;
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXHeadersBuildPhase section */
+		D2AAC043055464E500DB518D /* Headers */ = {
+			isa = PBXHeadersBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				1350D71B092AA858005EE038 /* clusterfit.h in Headers */,
+				1350D71F092AA858005EE038 /* colourblock.h in Headers */,
+				1350D720092AA858005EE038 /* config.h in Headers */,
+				1350D722092AA858005EE038 /* maths.h in Headers */,
+				1350D726092AA858005EE038 /* rangefit.h in Headers */,
+				1350D728092AA858005EE038 /* squish.h in Headers */,
+				13CD64C2092BCF8A00488C97 /* simd.h in Headers */,
+				13D0DC910931F93A00909807 /* simd_ve.h in Headers */,
+				13D0DC970931F9D600909807 /* simd_sse.h in Headers */,
+				13C4C7AE0941C18000AC5B89 /* colourset.h in Headers */,
+				13A7CCA40952BE63001C963A /* colourfit.h in Headers */,
+				133FA0DC096A7B8E0050752E /* alpha.h in Headers */,
+				139C234F09B0602700A2500D /* singlecolourfit.h in Headers */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXHeadersBuildPhase section */
+
+/* Begin PBXNativeTarget section */
+		1342B4100999DE7F00152915 /* squishpng */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 1342B4130999DE9F00152915 /* Build configuration list for PBXNativeTarget "squishpng" */;
+			buildPhases = (
+				1342B40E0999DE7F00152915 /* Sources */,
+				1342B40F0999DE7F00152915 /* Frameworks */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+				1342B58F099BF93D00152915 /* PBXTargetDependency */,
+			);
+			name = squishpng;
+			productName = squishpng;
+			productReference = 1342B4110999DE7F00152915 /* squishpng */;
+			productType = "com.apple.product-type.tool";
+		};
+		1342B4360999E07C00152915 /* squishtest */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 1342B43B0999E0C000152915 /* Build configuration list for PBXNativeTarget "squishtest" */;
+			buildPhases = (
+				1342B4340999E07C00152915 /* Sources */,
+				1342B4350999E07C00152915 /* Frameworks */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+				1342B52C099BF72F00152915 /* PBXTargetDependency */,
+			);
+			name = squishtest;
+			productName = squishtest;
+			productReference = 1342B4370999E07C00152915 /* squishtest */;
+			productType = "com.apple.product-type.tool";
+		};
+		139C21C309ADAA7000A2500D /* squishgen */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 139C21CB09ADAB0300A2500D /* Build configuration list for PBXNativeTarget "squishgen" */;
+			buildPhases = (
+				139C21C109ADAA7000A2500D /* Sources */,
+				139C21C209ADAA7000A2500D /* Frameworks */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = squishgen;
+			productName = squishgen;
+			productReference = 139C21C409ADAA7000A2500D /* squishgen */;
+			productType = "com.apple.product-type.tool";
+		};
+		D2AAC045055464E500DB518D /* squish */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "squish" */;
+			buildPhases = (
+				D2AAC043055464E500DB518D /* Headers */,
+				D2AAC044055464E500DB518D /* Sources */,
+				D289987405E68DCB004EDB86 /* Frameworks */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = squish;
+			productName = squish;
+			productReference = D2AAC046055464E500DB518D /* libsquish.a */;
+			productType = "com.apple.product-type.library.static";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		08FB7793FE84155DC02AAC07 /* Project object */ = {
+			isa = PBXProject;
+			buildConfigurationList = 1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "squish" */;
+			hasScannedForEncodings = 1;
+			mainGroup = 08FB7794FE84155DC02AAC07 /* squish */;
+			projectDirPath = "";
+			targets = (
+				D2AAC045055464E500DB518D /* squish */,
+				1342B4100999DE7F00152915 /* squishpng */,
+				1342B4360999E07C00152915 /* squishtest */,
+				139C21C309ADAA7000A2500D /* squishgen */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXSourcesBuildPhase section */
+		1342B40E0999DE7F00152915 /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				1342B41A0999DF7000152915 /* squishpng.cpp in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		1342B4340999E07C00152915 /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				1342B43F0999E0CC00152915 /* squishtest.cpp in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		139C21C109ADAA7000A2500D /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				139C21CF09ADAB0800A2500D /* squishgen.cpp in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		D2AAC044055464E500DB518D /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				1350D71A092AA858005EE038 /* clusterfit.cpp in Sources */,
+				1350D71E092AA858005EE038 /* colourblock.cpp in Sources */,
+				1350D721092AA858005EE038 /* maths.cpp in Sources */,
+				1350D725092AA858005EE038 /* rangefit.cpp in Sources */,
+				1350D727092AA858005EE038 /* squish.cpp in Sources */,
+				13C4C7AD0941C18000AC5B89 /* colourset.cpp in Sources */,
+				13A7CCA50952BE63001C963A /* colourfit.cpp in Sources */,
+				133FA0DD096A7B8E0050752E /* alpha.cpp in Sources */,
+				139C235009B0602700A2500D /* singlecolourfit.cpp in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin PBXTargetDependency section */
+		1342B52C099BF72F00152915 /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = D2AAC045055464E500DB518D /* squish */;
+			targetProxy = 1342B52B099BF72F00152915 /* PBXContainerItemProxy */;
+		};
+		1342B58F099BF93D00152915 /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = D2AAC045055464E500DB518D /* squish */;
+			targetProxy = 1342B58E099BF93D00152915 /* PBXContainerItemProxy */;
+		};
+/* End PBXTargetDependency section */
+
+/* Begin XCBuildConfiguration section */
+		1342B4140999DE9F00152915 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				HEADER_SEARCH_PATHS = (
+					..,
+					/sw/include,
+				);
+				INSTALL_PATH = "$(HOME)/bin";
+				LIBRARY_SEARCH_PATHS = /sw/lib;
+				OTHER_LDFLAGS = "-lpng";
+				PRODUCT_NAME = squishpng;
+			};
+			name = Debug;
+		};
+		1342B4150999DE9F00152915 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				HEADER_SEARCH_PATHS = (
+					..,
+					/sw/include,
+				);
+				INSTALL_PATH = "$(HOME)/bin";
+				LIBRARY_SEARCH_PATHS = /sw/lib;
+				OTHER_LDFLAGS = "-lpng";
+				PRODUCT_NAME = squishpng;
+			};
+			name = Release;
+		};
+		1342B43C0999E0C000152915 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				HEADER_SEARCH_PATHS = ..;
+				INSTALL_PATH = "$(HOME)/bin";
+				PRODUCT_NAME = squishtest;
+			};
+			name = Debug;
+		};
+		1342B43D0999E0C000152915 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				HEADER_SEARCH_PATHS = ..;
+				INSTALL_PATH = "$(HOME)/bin";
+				PRODUCT_NAME = squishtest;
+			};
+			name = Release;
+		};
+		139C21CC09ADAB0300A2500D /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				HEADER_SEARCH_PATHS = ..;
+				INSTALL_PATH = "$(HOME)/bin";
+				PRODUCT_NAME = squishgen;
+			};
+			name = Debug;
+		};
+		139C21CD09ADAB0300A2500D /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				HEADER_SEARCH_PATHS = ..;
+				INSTALL_PATH = "$(HOME)/bin";
+				PRODUCT_NAME = squishgen;
+			};
+			name = Release;
+		};
+		1DEB91EC08733DB70010E9CD /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				COPY_PHASE_STRIP = NO;
+				GCC_PREPROCESSOR_DEFINITIONS = "SQUISH_USE_ALTIVEC=1";
+				INSTALL_PATH = /usr/local/lib;
+				OTHER_CFLAGS = "-maltivec";
+				PRODUCT_NAME = squish;
+				STRIP_INSTALLED_PRODUCT = NO;
+			};
+			name = Debug;
+		};
+		1DEB91ED08733DB70010E9CD /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_PREPROCESSOR_DEFINITIONS = "SQUISH_USE_ALTIVEC=1";
+				INSTALL_PATH = /usr/local/lib;
+				OTHER_CFLAGS = "-maltivec";
+				PRODUCT_NAME = squish;
+				STRIP_INSTALLED_PRODUCT = YES;
+			};
+			name = Release;
+		};
+		1DEB91F008733DB70010E9CD /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_DYNAMIC_NO_PIC = YES;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_TREAT_WARNINGS_AS_ERRORS = YES;
+				GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES;
+				GCC_WARN_PEDANTIC = YES;
+				GCC_WARN_SHADOW = YES;
+				GCC_WARN_SIGN_COMPARE = YES;
+				GCC_WARN_UNUSED_PARAMETER = YES;
+				GCC_WARN_UNUSED_VALUE = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				PREBINDING = NO;
+				SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
+			};
+			name = Debug;
+		};
+		1DEB91F108733DB70010E9CD /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_DYNAMIC_NO_PIC = YES;
+				GCC_OPTIMIZATION_LEVEL = 3;
+				GCC_TREAT_WARNINGS_AS_ERRORS = YES;
+				GCC_UNROLL_LOOPS = YES;
+				GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES;
+				GCC_WARN_PEDANTIC = YES;
+				GCC_WARN_SHADOW = YES;
+				GCC_WARN_SIGN_COMPARE = YES;
+				GCC_WARN_UNUSED_PARAMETER = YES;
+				GCC_WARN_UNUSED_VALUE = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				PREBINDING = NO;
+				SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		1342B4130999DE9F00152915 /* Build configuration list for PBXNativeTarget "squishpng" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				1342B4140999DE9F00152915 /* Debug */,
+				1342B4150999DE9F00152915 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		1342B43B0999E0C000152915 /* Build configuration list for PBXNativeTarget "squishtest" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				1342B43C0999E0C000152915 /* Debug */,
+				1342B43D0999E0C000152915 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		139C21CB09ADAB0300A2500D /* Build configuration list for PBXNativeTarget "squishgen" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				139C21CC09ADAB0300A2500D /* Debug */,
+				139C21CD09ADAB0300A2500D /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "squish" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				1DEB91EC08733DB70010E9CD /* Debug */,
+				1DEB91ED08733DB70010E9CD /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "squish" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				1DEB91F008733DB70010E9CD /* Debug */,
+				1DEB91F108733DB70010E9CD /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = 08FB7793FE84155DC02AAC07 /* Project object */;
+}
--- a/src/nvtt/squish/texture_compression_s3tc.txt
+++ b/src/nvtt/squish/texture_compression_s3tc.txt
@ -0,0 +1,508 @@
+Name
+
+    EXT_texture_compression_s3tc
+
+Name Strings
+
+    GL_EXT_texture_compression_s3tc
+
+Contact
+
+    Pat Brown, NVIDIA Corporation (pbrown 'at' nvidia.com)
+
+Status
+
+    FINAL
+
+Version
+
+    1.1, 16 November 2001 (containing only clarifications relative to
+                           version 1.0, dated 7 July 2000)
+
+Number
+
+    198
+
+Dependencies
+
+    OpenGL 1.1 is required.
+
+    GL_ARB_texture_compression is required.
+
+    This extension is written against the OpenGL 1.2.1 Specification.
+
+Overview
+
+    This extension provides additional texture compression functionality
+    specific to S3's S3TC format (called DXTC in Microsoft's DirectX API),
+    subject to all the requirements and limitations described by the extension
+    GL_ARB_texture_compression.
+
+    This extension supports DXT1, DXT3, and DXT5 texture compression formats.
+    For the DXT1 image format, this specification supports an RGB-only mode
+    and a special RGBA mode with single-bit "transparent" alpha.
+
+IP Status
+
+    Contact S3 Incorporated (http://www.s3.com) regarding any intellectual
+    property issues associated with implementing this extension.
+
+    WARNING:  Vendors able to support S3TC texture compression in Direct3D
+    drivers do not necessarily have the right to use the same functionality in
+    OpenGL.
+
+Issues
+
+    (1) Should DXT2 and DXT4 (premultiplied alpha) formats be supported?
+
+        RESOLVED:  No -- insufficient interest.  Supporting DXT2 and DXT4
+        would require some rework to the TexEnv definition (maybe add a new
+        base internal format RGBA_PREMULTIPLIED_ALPHA) for these formats.
+        Note that the EXT_texture_env_combine extension (which extends normal
+        TexEnv modes) can be used to support textures with premultipled alpha.
+
+    (2) Should generic "RGB_S3TC_EXT" and "RGBA_S3TC_EXT" enums be supported
+        or should we use only the DXT<n> enums?  
+
+        RESOLVED:  No.  A generic RGBA_S3TC_EXT is problematic because DXT3
+        and DXT5 are both nominally RGBA (and DXT1 with the 1-bit alpha is
+        also) yet one format must be chosen up front.
+
+    (3) Should TexSubImage support all block-aligned edits or just the minimal
+        functionality required by the ARB_texture_compression extension?
+
+        RESOLVED:  Allow all valid block-aligned edits.
+
+    (4) A pre-compressed image with a DXT1 format can be used as either an
+        RGB_S3TC_DXT1 or an RGBA_S3TC_DXT1 image.  If the image has
+        transparent texels, how are they treated in each format?
+
+        RESOLVED:  The renderer has to make sure that an RGB_S3TC_DXT1 format
+        is decoded as RGB (where alpha is effectively one for all texels),
+        while RGBA_S3TC_DXT1 is decoded as RGBA (where alpha is zero for all
+        texels with "transparent" encodings).  Otherwise, the formats are
+        identical.
+
+    (5) Is the encoding of the RGB components for DXT1 formats correct in this
+        spec?  MSDN documentation does not specify an RGB color for the
+        "transparent" encoding.  Is it really black?
+
+        RESOLVED:  Yes.  The specification for the DXT1 format initially
+        required black, but later changed that requirement to a
+        recommendation.  All vendors involved in the definition of this
+        specification support black.  In addition, specifying black has a
+        useful behavior.
+
+        When blending multiple texels (GL_LINEAR filtering), mixing opaque and
+        transparent samples is problematic.  Defining a black color on
+        transparent texels achieves a sensible result that works like a
+        texture with premultiplied alpha.  For example, if three opaque white
+        and one transparent sample is being averaged, the result would be a
+        75% intensity gray (with an alpha of 75%).  This is the same result on
+        the color channels as would be obtained using a white color, 75%
+        alpha, and a SRC_ALPHA blend factor.
+
+    (6) Is the encoding of the RGB components for DXT3 and DXT5 formats
+        correct in this spec?  MSDN documentation suggests that the RGB blocks
+        for DXT3 and DXT5 are decoded as described by the DXT1 format.
+
+        RESOLVED:  Yes -- this appears to be a bug in the MSDN documentation.
+        The specification for the DXT2-DXT5 formats require decoding using the
+        opaque block encoding, regardless of the relative values of "color0"
+        and "color1".
+
+New Procedures and Functions
+
+    None.
+
+New Tokens
+
+    Accepted by the <internalformat> parameter of TexImage2D, CopyTexImage2D,
+    and CompressedTexImage2DARB and the <format> parameter of
+    CompressedTexSubImage2DARB:
+
+        COMPRESSED_RGB_S3TC_DXT1_EXT                   0x83F0
+        COMPRESSED_RGBA_S3TC_DXT1_EXT                  0x83F1
+        COMPRESSED_RGBA_S3TC_DXT3_EXT                  0x83F2
+        COMPRESSED_RGBA_S3TC_DXT5_EXT                  0x83F3
+
+Additions to Chapter 2 of the OpenGL 1.2.1 Specification (OpenGL Operation)
+
+    None.
+
+Additions to Chapter 3 of the OpenGL 1.2.1 Specification (Rasterization)
+
+    Add to Table 3.16.1:  Specific Compressed Internal Formats
+
+        Compressed Internal Format         Base Internal Format
+        ==========================         ====================
+        COMPRESSED_RGB_S3TC_DXT1_EXT       RGB
+        COMPRESSED_RGBA_S3TC_DXT1_EXT      RGBA
+        COMPRESSED_RGBA_S3TC_DXT3_EXT      RGBA
+        COMPRESSED_RGBA_S3TC_DXT5_EXT      RGBA
+
+    
+    Modify Section 3.8.2, Alternate Image Specification
+
+    (add to end of TexSubImage discussion, p.123 -- after edit from the
+    ARB_texture_compression spec)
+
+    If the internal format of the texture image being modified is
+    COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
+    COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT, the
+    texture is stored using one of the several S3TC compressed texture image
+    formats.  Such images are easily edited along 4x4 texel boundaries, so the
+    limitations on TexSubImage2D or CopyTexSubImage2D parameters are relaxed.
+    TexSubImage2D and CopyTexSubImage2D will result in an INVALID_OPERATION
+    error only if one of the following conditions occurs:
+
+        * <width> is not a multiple of four or equal to TEXTURE_WIDTH, 
+          unless <xoffset> and <yoffset> are both zero.
+        * <height> is not a multiple of four or equal to TEXTURE_HEIGHT,
+          unless <xoffset> and <yoffset> are both zero.
+        * <xoffset> or <yoffset> is not a multiple of four.
+
+    The contents of any 4x4 block of texels of an S3TC compressed texture
+    image that does not intersect the area being modified are preserved during
+    valid TexSubImage2D and CopyTexSubImage2D calls.
+
+
+    Add to Section 3.8.2, Alternate Image Specification (adding to the end of
+    the CompressedTexImage section introduced by the ARB_texture_compression
+    spec)
+
+    If <internalformat> is COMPRESSED_RGB_S3TC_DXT1_EXT,
+    COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or
+    COMPRESSED_RGBA_S3TC_DXT5_EXT, the compressed texture is stored using one
+    of several S3TC compressed texture image formats.  The S3TC texture
+    compression algorithm supports only 2D images without borders.
+    CompressedTexImage1DARB and CompressedTexImage3DARB produce an
+    INVALID_ENUM error if <internalformat> is an S3TC format.
+    CompressedTexImage2DARB will produce an INVALID_OPERATION error if
+    <border> is non-zero.
+
+
+    Add to Section 3.8.2, Alternate Image Specification (adding to the end of
+    the CompressedTexSubImage section introduced by the
+    ARB_texture_compression spec)
+
+    If the internal format of the texture image being modified is
+    COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
+    COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT, the
+    texture is stored using one of the several S3TC compressed texture image
+    formats.  Since the S3TC texture compression algorithm supports only 2D
+    images, CompressedTexSubImage1DARB and CompressedTexSubImage3DARB produce
+    an INVALID_ENUM error if <format> is an S3TC format.  Since S3TC images
+    are easily edited along 4x4 texel boundaries, the limitations on
+    CompressedTexSubImage2D are relaxed.  CompressedTexSubImage2D will result
+    in an INVALID_OPERATION error only if one of the following conditions
+    occurs:
+
+        * <width> is not a multiple of four or equal to TEXTURE_WIDTH.
+        * <height> is not a multiple of four or equal to TEXTURE_HEIGHT.
+        * <xoffset> or <yoffset> is not a multiple of four.
+
+    The contents of any 4x4 block of texels of an S3TC compressed texture
+    image that does not intersect the area being modified are preserved during
+    valid TexSubImage2D and CopyTexSubImage2D calls.
+
+Additions to Chapter 4 of the OpenGL 1.2.1 Specification (Per-Fragment
+Operations and the Frame Buffer)
+
+    None.
+
+Additions to Chapter 5 of the OpenGL 1.2.1 Specification (Special Functions)
+
+    None.
+
+Additions to Chapter 6 of the OpenGL 1.2.1 Specification (State and
+State Requests)
+
+    None.
+
+Additions to Appendix A of the OpenGL 1.2.1 Specification (Invariance)
+
+    None.
+
+Additions to the AGL/GLX/WGL Specifications
+
+    None.
+
+GLX Protocol
+
+    None.
+
+Errors
+
+    INVALID_ENUM is generated by CompressedTexImage1DARB or
+    CompressedTexImage3DARB if <internalformat> is
+    COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
+    COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT.
+
+    INVALID_OPERATION is generated by CompressedTexImage2DARB if
+    <internalformat> is COMPRESSED_RGB_S3TC_DXT1_EXT,
+    COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or
+    COMPRESSED_RGBA_S3TC_DXT5_EXT and <border> is not equal to zero.
+
+    INVALID_ENUM is generated by CompressedTexSubImage1DARB or
+    CompressedTexSubImage3DARB if <format> is COMPRESSED_RGB_S3TC_DXT1_EXT,
+    COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or
+    COMPRESSED_RGBA_S3TC_DXT5_EXT.
+
+    INVALID_OPERATION is generated by TexSubImage2D CopyTexSubImage2D, or
+    CompressedTexSubImage2D if TEXTURE_INTERNAL_FORMAT is
+    COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
+    COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT and any of
+    the following apply: <width> is not a multiple of four or equal to
+    TEXTURE_WIDTH; <height> is not a multiple of four or equal to
+    TEXTURE_HEIGHT; <xoffset> or <yoffset> is not a multiple of four.
+
+
+    The following restrictions from the ARB_texture_compression specification
+    do not apply to S3TC texture formats, since subimage modification is
+    straightforward as long as the subimage is properly aligned.
+
+    DELETE: INVALID_OPERATION is generated by TexSubImage1D, TexSubImage2D,
+    DELETE: TexSubImage3D, CopyTexSubImage1D, CopyTexSubImage2D, or
+    DELETE: CopyTexSubImage3D if the internal format of the texture image is
+    DELETE: compressed and <xoffset>, <yoffset>, or <zoffset> does not equal
+    DELETE: -b, where b is value of TEXTURE_BORDER.
+
+    DELETE: INVALID_VALUE is generated by CompressedTexSubImage1DARB,
+    DELETE: CompressedTexSubImage2DARB, or CompressedTexSubImage3DARB if the
+    DELETE: entire texture image is not being edited:  if <xoffset>,
+    DELETE: <yoffset>, or <zoffset> is greater than -b, <xoffset> + <width> is
+    DELETE: less than w+b, <yoffset> + <height> is less than h+b, or <zoffset>
+    DELETE: + <depth> is less than d+b, where b is the value of
+    DELETE: TEXTURE_BORDER, w is the value of TEXTURE_WIDTH, h is the value of
+    DELETE: TEXTURE_HEIGHT, and d is the value of TEXTURE_DEPTH.
+
+    See also errors in the GL_ARB_texture_compression specification.
+
+New State
+
+    In the "Textures" state table, increment the TEXTURE_INTERNAL_FORMAT
+    subscript for Z by 4 in the "Type" row.
+
+New Implementation Dependent State
+
+    None
+
+Appendix
+
+    S3TC Compressed Texture Image Formats
+
+    Compressed texture images stored using the S3TC compressed image formats
+    are represented as a collection of 4x4 texel blocks, where each block
+    contains 64 or 128 bits of texel data.  The image is encoded as a normal
+    2D raster image in which each 4x4 block is treated as a single pixel.  If
+    an S3TC image has a width or height less than four, the data corresponding
+    to texels outside the image are irrelevant and undefined.
+
+    When an S3TC image with a width of <w>, height of <h>, and block size of
+    <blocksize> (8 or 16 bytes) is decoded, the corresponding image size (in
+    bytes) is:
+    
+        ceil(<w>/4) * ceil(<h>/4) * blocksize.
+
+    When decoding an S3TC image, the block containing the texel at offset
+    (<x>, <y>) begins at an offset (in bytes) relative to the base of the
+    image of:
+
+        blocksize * (ceil(<w>/4) * floor(<y>/4) + floor(<x>/4)).
+
+    The data corresponding to a specific texel (<x>, <y>) are extracted from a
+    4x4 texel block using a relative (x,y) value of
+    
+        (<x> modulo 4, <y> modulo 4).
+
+    There are four distinct S3TC image formats:
+
+    COMPRESSED_RGB_S3TC_DXT1_EXT:  Each 4x4 block of texels consists of 64
+    bits of RGB image data.  
+
+    Each RGB image data block is encoded as a sequence of 8 bytes, called (in
+    order of increasing address):
+
+            c0_lo, c0_hi, c1_lo, c1_hi, bits_0, bits_1, bits_2, bits_3
+
+        The 8 bytes of the block are decoded into three quantities:
+
+            color0 = c0_lo + c0_hi * 256
+            color1 = c1_lo + c1_hi * 256
+            bits   = bits_0 + 256 * (bits_1 + 256 * (bits_2 + 256 * bits_3))
+        
+        color0 and color1 are 16-bit unsigned integers that are unpacked to
+        RGB colors RGB0 and RGB1 as though they were 16-bit packed pixels with
+        a <format> of RGB and a type of UNSIGNED_SHORT_5_6_5.
+
+        bits is a 32-bit unsigned integer, from which a two-bit control code
+        is extracted for a texel at location (x,y) in the block using:
+
+            code(x,y) = bits[2*(4*y+x)+1..2*(4*y+x)+0]
+        
+        where bit 31 is the most significant and bit 0 is the least
+        significant bit.
+
+        The RGB color for a texel at location (x,y) in the block is given by:
+
+            RGB0,              if color0 > color1 and code(x,y) == 0
+            RGB1,              if color0 > color1 and code(x,y) == 1
+            (2*RGB0+RGB1)/3,   if color0 > color1 and code(x,y) == 2
+            (RGB0+2*RGB1)/3,   if color0 > color1 and code(x,y) == 3
+
+            RGB0,              if color0 <= color1 and code(x,y) == 0
+            RGB1,              if color0 <= color1 and code(x,y) == 1
+            (RGB0+RGB1)/2,     if color0 <= color1 and code(x,y) == 2
+            BLACK,             if color0 <= color1 and code(x,y) == 3
+
+        Arithmetic operations are done per component, and BLACK refers to an
+        RGB color where red, green, and blue are all zero.
+
+    Since this image has an RGB format, there is no alpha component and the
+    image is considered fully opaque.
+
+
+    COMPRESSED_RGBA_S3TC_DXT1_EXT:  Each 4x4 block of texels consists of 64
+    bits of RGB image data and minimal alpha information.  The RGB components
+    of a texel are extracted in the same way as COMPRESSED_RGB_S3TC_DXT1_EXT.
+ 
+        The alpha component for a texel at location (x,y) in the block is
+        given by:
+
+            0.0,               if color0 <= color1 and code(x,y) == 3
+            1.0,               otherwise
+
+        IMPORTANT:  When encoding an RGBA image into a format using 1-bit
+        alpha, any texels with an alpha component less than 0.5 end up with an
+        alpha of 0.0 and any texels with an alpha component greater than or
+        equal to 0.5 end up with an alpha of 1.0.  When encoding an RGBA image
+        into the COMPRESSED_RGBA_S3TC_DXT1_EXT format, the resulting red,
+        green, and blue components of any texels with a final alpha of 0.0
+        will automatically be zero (black).  If this behavior is not desired
+        by an application, it should not use COMPRESSED_RGBA_S3TC_DXT1_EXT.
+        This format will never be used when a generic compressed internal
+        format (Table 3.16.2) is specified, although the nearly identical
+        format COMPRESSED_RGB_S3TC_DXT1_EXT (above) may be.
+
+
+    COMPRESSED_RGBA_S3TC_DXT3_EXT:  Each 4x4 block of texels consists of 64
+    bits of uncompressed alpha image data followed by 64 bits of RGB image
+    data.  
+
+    Each RGB image data block is encoded according to the
+    COMPRESSED_RGB_S3TC_DXT1_EXT format, with the exception that the two code
+    bits always use the non-transparent encodings.  In other words, they are
+    treated as though color0 > color1, regardless of the actual values of
+    color0 and color1.
+
+    Each alpha image data block is encoded as a sequence of 8 bytes, called
+    (in order of increasing address):
+
+            a0, a1, a2, a3, a4, a5, a6, a7
+
+        The 8 bytes of the block are decoded into one 64-bit integer:
+
+            alpha = a0 + 256 * (a1 + 256 * (a2 + 256 * (a3 + 256 * (a4 +
+                         256 * (a5 + 256 * (a6 + 256 * a7))))))
+
+        alpha is a 64-bit unsigned integer, from which a four-bit alpha value
+        is extracted for a texel at location (x,y) in the block using:
+
+            alpha(x,y) = bits[4*(4*y+x)+3..4*(4*y+x)+0]
+
+        where bit 63 is the most significant and bit 0 is the least
+        significant bit.
+
+        The alpha component for a texel at location (x,y) in the block is
+        given by alpha(x,y) / 15.
+
+ 
+    COMPRESSED_RGBA_S3TC_DXT5_EXT:  Each 4x4 block of texels consists of 64
+    bits of compressed alpha image data followed by 64 bits of RGB image data.
+
+    Each RGB image data block is encoded according to the
+    COMPRESSED_RGB_S3TC_DXT1_EXT format, with the exception that the two code
+    bits always use the non-transparent encodings.  In other words, they are
+    treated as though color0 > color1, regardless of the actual values of
+    color0 and color1.
+
+    Each alpha image data block is encoded as a sequence of 8 bytes, called
+    (in order of increasing address):
+
+        alpha0, alpha1, bits_0, bits_1, bits_2, bits_3, bits_4, bits_5
+
+        The alpha0 and alpha1 are 8-bit unsigned bytes converted to alpha
+        components by multiplying by 1/255.
+
+        The 6 "bits" bytes of the block are decoded into one 48-bit integer:
+
+          bits = bits_0 + 256 * (bits_1 + 256 * (bits_2 + 256 * (bits_3 + 
+                          256 * (bits_4 + 256 * bits_5))))
+
+        bits is a 48-bit unsigned integer, from which a three-bit control code
+        is extracted for a texel at location (x,y) in the block using:
+
+            code(x,y) = bits[3*(4*y+x)+1..3*(4*y+x)+0]
+
+        where bit 47 is the most significant and bit 0 is the least
+        significant bit.
+
+        The alpha component for a texel at location (x,y) in the block is
+        given by:
+
+              alpha0,                   code(x,y) == 0
+              alpha1,                   code(x,y) == 1
+
+              (6*alpha0 + 1*alpha1)/7,  alpha0 > alpha1 and code(x,y) == 2
+              (5*alpha0 + 2*alpha1)/7,  alpha0 > alpha1 and code(x,y) == 3
+              (4*alpha0 + 3*alpha1)/7,  alpha0 > alpha1 and code(x,y) == 4
+              (3*alpha0 + 4*alpha1)/7,  alpha0 > alpha1 and code(x,y) == 5
+              (2*alpha0 + 5*alpha1)/7,  alpha0 > alpha1 and code(x,y) == 6
+              (1*alpha0 + 6*alpha1)/7,  alpha0 > alpha1 and code(x,y) == 7
+
+              (4*alpha0 + 1*alpha1)/5,  alpha0 <= alpha1 and code(x,y) == 2
+              (3*alpha0 + 2*alpha1)/5,  alpha0 <= alpha1 and code(x,y) == 3
+              (2*alpha0 + 3*alpha1)/5,  alpha0 <= alpha1 and code(x,y) == 4
+              (1*alpha0 + 4*alpha1)/5,  alpha0 <= alpha1 and code(x,y) == 5
+              0.0,                      alpha0 <= alpha1 and code(x,y) == 6
+              1.0,                      alpha0 <= alpha1 and code(x,y) == 7
+
+
+Revision History
+
+    1.1,  11/16/01 pbrown:    Updated contact info, clarified where texels
+                              fall within a single block.
+
+    1.0,  07/07/00 prbrown1:  Published final version agreed to by working
+                              group members.
+
+    0.9,  06/24/00 prbrown1:  Documented that block-aligned TexSubImage calls
+                              do not modify existing texels outside the
+                              modified blocks.  Added caveat to allow for a
+                              (0,0)-anchored TexSubImage operation of
+                              arbitrary size.
+
+    0.7,  04/11/00 prbrown1:  Added issues on DXT1, DXT3, and DXT5 encodings
+                              where the MSDN documentation doesn't match what
+                              is really done.  Added enum values from the
+                              extension registry.
+
+    0.4,  03/28/00 prbrown1:  Updated to reflect final version of the
+                              ARB_texture_compression extension.  Allowed
+                              block-aligned TexSubImage calls.
+
+    0.3,  03/07/00 prbrown1:  Resolved issues pertaining to the format of RGB
+                              blocks in the DXT3 and DXT5 formats (they don't
+                              ever use the "transparent" encoding).  Fixed
+                              decoding of DXT1 blocks.  Pointed out issue of
+                              "transparent" texels in DXT1 encodings having
+                              different behaviors for RGB and RGBA internal
+                              formats.
+
+    0.2,  02/23/00 prbrown1:  Minor revisions; added several issues.
+
+    0.11, 02/17/00 prbrown1:  Slight modification to error semantics
+                              (INVALID_ENUM instead of INVALID_OPERATION).
+
+    0.1,  02/15/00 prbrown1:  Initial revision.
--- a/src/nvtt/squish/vs7/squish.sln
+++ b/src/nvtt/squish/vs7/squish.sln
@ -0,0 +1,39 @@
+Microsoft Visual Studio Solution File, Format Version 8.00
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "squish", "squish\squish.vcproj", "{6A8518C3-D81A-4428-BD7F-C37933088AC1}"
+	ProjectSection(ProjectDependencies) = postProject
+	EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "squishpng", "squishpng\squishpng.vcproj", "{3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}"
+	ProjectSection(ProjectDependencies) = postProject
+		{6A8518C3-D81A-4428-BD7F-C37933088AC1} = {6A8518C3-D81A-4428-BD7F-C37933088AC1}
+	EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "squishtest", "squishtest\squishtest.vcproj", "{77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}"
+	ProjectSection(ProjectDependencies) = postProject
+		{6A8518C3-D81A-4428-BD7F-C37933088AC1} = {6A8518C3-D81A-4428-BD7F-C37933088AC1}
+	EndProjectSection
+EndProject
+Global
+	GlobalSection(SolutionConfiguration) = preSolution
+		Debug = Debug
+		Release = Release
+	EndGlobalSection
+	GlobalSection(ProjectConfiguration) = postSolution
+		{6A8518C3-D81A-4428-BD7F-C37933088AC1}.Debug.ActiveCfg = Debug|Win32
+		{6A8518C3-D81A-4428-BD7F-C37933088AC1}.Debug.Build.0 = Debug|Win32
+		{6A8518C3-D81A-4428-BD7F-C37933088AC1}.Release.ActiveCfg = Release|Win32
+		{6A8518C3-D81A-4428-BD7F-C37933088AC1}.Release.Build.0 = Release|Win32
+		{3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}.Debug.ActiveCfg = Debug|Win32
+		{3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}.Debug.Build.0 = Debug|Win32
+		{3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}.Release.ActiveCfg = Release|Win32
+		{3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}.Release.Build.0 = Release|Win32
+		{77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}.Debug.ActiveCfg = Debug|Win32
+		{77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}.Debug.Build.0 = Debug|Win32
+		{77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}.Release.ActiveCfg = Release|Win32
+		{77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}.Release.Build.0 = Release|Win32
+	EndGlobalSection
+	GlobalSection(ExtensibilityGlobals) = postSolution
+	EndGlobalSection
+	GlobalSection(ExtensibilityAddIns) = postSolution
+	EndGlobalSection
+EndGlobal
--- a/src/nvtt/squish/vs7/squish/squish.vcproj
+++ b/src/nvtt/squish/vs7/squish/squish.vcproj
@ -0,0 +1,198 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="7.10"
+	Name="squish"
+	ProjectGUID="{6A8518C3-D81A-4428-BD7F-C37933088AC1}"
+	Keyword="Win32Proj">
+	<Platforms>
+		<Platform
+			Name="Win32"/>
+	</Platforms>
+	<Configurations>
+		<Configuration
+			Name="Debug|Win32"
+			OutputDirectory="Debug"
+			IntermediateDirectory="Debug"
+			ConfigurationType="4"
+			CharacterSet="2">
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				AdditionalIncludeDirectories="..\.."
+				PreprocessorDefinitions="WIN32;_DEBUG;_LIB;SQUISH_USE_SSE=1"
+				MinimalRebuild="TRUE"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="3"
+				EnableEnhancedInstructionSet="1"
+				ForceConformanceInForLoopScope="TRUE"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				WarnAsError="TRUE"
+				Detect64BitPortabilityProblems="TRUE"
+				DebugInformationFormat="3"/>
+			<Tool
+				Name="VCCustomBuildTool"/>
+			<Tool
+				Name="VCLibrarianTool"
+				OutputFile="$(OutDir)/squish.lib"/>
+			<Tool
+				Name="VCMIDLTool"/>
+			<Tool
+				Name="VCPostBuildEventTool"/>
+			<Tool
+				Name="VCPreBuildEventTool"/>
+			<Tool
+				Name="VCPreLinkEventTool"/>
+			<Tool
+				Name="VCResourceCompilerTool"/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"/>
+			<Tool
+				Name="VCManagedWrapperGeneratorTool"/>
+			<Tool
+				Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
+		</Configuration>
+		<Configuration
+			Name="Release|Win32"
+			OutputDirectory="Release"
+			IntermediateDirectory="Release"
+			ConfigurationType="4"
+			CharacterSet="2"
+			WholeProgramOptimization="TRUE">
+			<Tool
+				Name="VCCLCompilerTool"
+				GlobalOptimizations="TRUE"
+				InlineFunctionExpansion="2"
+				FavorSizeOrSpeed="1"
+				OmitFramePointers="TRUE"
+				AdditionalIncludeDirectories="..\.."
+				PreprocessorDefinitions="WIN32;NDEBUG;_LIB;SQUISH_USE_SSE=1"
+				RuntimeLibrary="2"
+				ForceConformanceInForLoopScope="TRUE"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				WarnAsError="TRUE"
+				Detect64BitPortabilityProblems="TRUE"
+				DebugInformationFormat="3"/>
+			<Tool
+				Name="VCCustomBuildTool"/>
+			<Tool
+				Name="VCLibrarianTool"
+				OutputFile="$(OutDir)/squish.lib"/>
+			<Tool
+				Name="VCMIDLTool"/>
+			<Tool
+				Name="VCPostBuildEventTool"/>
+			<Tool
+				Name="VCPreBuildEventTool"/>
+			<Tool
+				Name="VCPreLinkEventTool"/>
+			<Tool
+				Name="VCResourceCompilerTool"/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"/>
+			<Tool
+				Name="VCManagedWrapperGeneratorTool"/>
+			<Tool
+				Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter
+			Name="Source Files"
+			Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx"
+			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}">
+			<File
+				RelativePath="..\..\alpha.cpp">
+			</File>
+			<File
+				RelativePath="..\..\clusterfit.cpp">
+			</File>
+			<File
+				RelativePath="..\..\colourblock.cpp">
+			</File>
+			<File
+				RelativePath="..\..\colourfit.cpp">
+			</File>
+			<File
+				RelativePath="..\..\colourset.cpp">
+			</File>
+			<File
+				RelativePath="..\..\maths.cpp">
+			</File>
+			<File
+				RelativePath="..\..\rangefit.cpp">
+			</File>
+			<File
+				RelativePath="..\..\singlecolourfit.cpp">
+			</File>
+			<File
+				RelativePath="..\..\squish.cpp">
+			</File>
+		</Filter>
+		<Filter
+			Name="Header Files"
+			Filter="h;hpp;hxx;hm;inl;inc;xsd"
+			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}">
+			<File
+				RelativePath="..\..\alpha.h">
+			</File>
+			<File
+				RelativePath="..\..\clusterfit.h">
+			</File>
+			<File
+				RelativePath="..\..\colourblock.h">
+			</File>
+			<File
+				RelativePath="..\..\colourfit.h">
+			</File>
+			<File
+				RelativePath="..\..\colourset.h">
+			</File>
+			<File
+				RelativePath="..\..\config.h">
+			</File>
+			<File
+				RelativePath="..\..\maths.h">
+			</File>
+			<File
+				RelativePath="..\..\rangefit.h">
+			</File>
+			<File
+				RelativePath="..\..\simd.h">
+			</File>
+			<File
+				RelativePath="..\..\simd_sse.h">
+			</File>
+			<File
+				RelativePath="..\..\simd_ve.h">
+			</File>
+			<File
+				RelativePath="..\..\singlecolourfit.h">
+			</File>
+			<File
+				RelativePath="..\..\singlecolourlookup.inl">
+			</File>
+			<File
+				RelativePath="..\..\squish.h">
+			</File>
+		</Filter>
+		<Filter
+			Name="Resource Files"
+			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx"
+			UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}">
+			<File
+				RelativePath="..\..\texture_compression_s3tc.txt">
+			</File>
+		</Filter>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>
--- a/src/nvtt/squish/vs7/squishpng/squishpng.vcproj
+++ b/src/nvtt/squish/vs7/squishpng/squishpng.vcproj
@ -0,0 +1,140 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="7.10"
+	Name="squishpng"
+	ProjectGUID="{3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}"
+	Keyword="Win32Proj">
+	<Platforms>
+		<Platform
+			Name="Win32"/>
+	</Platforms>
+	<Configurations>
+		<Configuration
+			Name="Debug|Win32"
+			OutputDirectory="Debug"
+			IntermediateDirectory="Debug"
+			ConfigurationType="1"
+			CharacterSet="2">
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				AdditionalIncludeDirectories="..\.."
+				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
+				MinimalRebuild="TRUE"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="3"
+				ForceConformanceInForLoopScope="TRUE"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				WarnAsError="TRUE"
+				Detect64BitPortabilityProblems="TRUE"
+				DebugInformationFormat="3"/>
+			<Tool
+				Name="VCCustomBuildTool"/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalDependencies="libpng13d.lib"
+				OutputFile="$(OutDir)/squishpng.exe"
+				LinkIncremental="2"
+				GenerateDebugInformation="TRUE"
+				ProgramDatabaseFile="$(OutDir)/squishpng.pdb"
+				SubSystem="1"
+				TargetMachine="1"/>
+			<Tool
+				Name="VCMIDLTool"/>
+			<Tool
+				Name="VCPostBuildEventTool"/>
+			<Tool
+				Name="VCPreBuildEventTool"/>
+			<Tool
+				Name="VCPreLinkEventTool"/>
+			<Tool
+				Name="VCResourceCompilerTool"/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"/>
+			<Tool
+				Name="VCWebDeploymentTool"/>
+			<Tool
+				Name="VCManagedWrapperGeneratorTool"/>
+			<Tool
+				Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
+		</Configuration>
+		<Configuration
+			Name="Release|Win32"
+			OutputDirectory="Release"
+			IntermediateDirectory="Release"
+			ConfigurationType="1"
+			CharacterSet="2">
+			<Tool
+				Name="VCCLCompilerTool"
+				AdditionalIncludeDirectories="..\.."
+				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
+				RuntimeLibrary="2"
+				ForceConformanceInForLoopScope="TRUE"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				WarnAsError="TRUE"
+				Detect64BitPortabilityProblems="TRUE"
+				DebugInformationFormat="3"/>
+			<Tool
+				Name="VCCustomBuildTool"/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalDependencies="libpng13.lib"
+				OutputFile="$(OutDir)/squishpng.exe"
+				LinkIncremental="1"
+				GenerateDebugInformation="TRUE"
+				SubSystem="1"
+				OptimizeReferences="2"
+				EnableCOMDATFolding="2"
+				TargetMachine="1"/>
+			<Tool
+				Name="VCMIDLTool"/>
+			<Tool
+				Name="VCPostBuildEventTool"/>
+			<Tool
+				Name="VCPreBuildEventTool"/>
+			<Tool
+				Name="VCPreLinkEventTool"/>
+			<Tool
+				Name="VCResourceCompilerTool"/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"/>
+			<Tool
+				Name="VCWebDeploymentTool"/>
+			<Tool
+				Name="VCManagedWrapperGeneratorTool"/>
+			<Tool
+				Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter
+			Name="Source Files"
+			Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx"
+			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}">
+			<File
+				RelativePath="..\..\extra\squishpng.cpp">
+			</File>
+		</Filter>
+		<Filter
+			Name="Header Files"
+			Filter="h;hpp;hxx;hm;inl;inc;xsd"
+			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}">
+		</Filter>
+		<Filter
+			Name="Resource Files"
+			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx"
+			UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}">
+		</Filter>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>
--- a/src/nvtt/squish/vs7/squishtest/squishtest.vcproj
+++ b/src/nvtt/squish/vs7/squishtest/squishtest.vcproj
@ -0,0 +1,138 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="7.10"
+	Name="squishtest"
+	ProjectGUID="{77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}"
+	Keyword="Win32Proj">
+	<Platforms>
+		<Platform
+			Name="Win32"/>
+	</Platforms>
+	<Configurations>
+		<Configuration
+			Name="Debug|Win32"
+			OutputDirectory="Debug"
+			IntermediateDirectory="Debug"
+			ConfigurationType="1"
+			CharacterSet="2">
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				AdditionalIncludeDirectories="..\.."
+				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
+				MinimalRebuild="TRUE"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="3"
+				ForceConformanceInForLoopScope="TRUE"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				WarnAsError="TRUE"
+				Detect64BitPortabilityProblems="TRUE"
+				DebugInformationFormat="3"/>
+			<Tool
+				Name="VCCustomBuildTool"/>
+			<Tool
+				Name="VCLinkerTool"
+				OutputFile="$(OutDir)/squishtest.exe"
+				LinkIncremental="2"
+				GenerateDebugInformation="TRUE"
+				ProgramDatabaseFile="$(OutDir)/squishtest.pdb"
+				SubSystem="1"
+				TargetMachine="1"/>
+			<Tool
+				Name="VCMIDLTool"/>
+			<Tool
+				Name="VCPostBuildEventTool"/>
+			<Tool
+				Name="VCPreBuildEventTool"/>
+			<Tool
+				Name="VCPreLinkEventTool"/>
+			<Tool
+				Name="VCResourceCompilerTool"/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"/>
+			<Tool
+				Name="VCWebDeploymentTool"/>
+			<Tool
+				Name="VCManagedWrapperGeneratorTool"/>
+			<Tool
+				Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
+		</Configuration>
+		<Configuration
+			Name="Release|Win32"
+			OutputDirectory="Release"
+			IntermediateDirectory="Release"
+			ConfigurationType="1"
+			CharacterSet="2">
+			<Tool
+				Name="VCCLCompilerTool"
+				AdditionalIncludeDirectories="..\.."
+				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
+				RuntimeLibrary="2"
+				ForceConformanceInForLoopScope="TRUE"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				WarnAsError="TRUE"
+				Detect64BitPortabilityProblems="TRUE"
+				DebugInformationFormat="3"/>
+			<Tool
+				Name="VCCustomBuildTool"/>
+			<Tool
+				Name="VCLinkerTool"
+				OutputFile="$(OutDir)/squishtest.exe"
+				LinkIncremental="1"
+				GenerateDebugInformation="TRUE"
+				SubSystem="1"
+				OptimizeReferences="2"
+				EnableCOMDATFolding="2"
+				TargetMachine="1"/>
+			<Tool
+				Name="VCMIDLTool"/>
+			<Tool
+				Name="VCPostBuildEventTool"/>
+			<Tool
+				Name="VCPreBuildEventTool"/>
+			<Tool
+				Name="VCPreLinkEventTool"/>
+			<Tool
+				Name="VCResourceCompilerTool"/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"/>
+			<Tool
+				Name="VCWebDeploymentTool"/>
+			<Tool
+				Name="VCManagedWrapperGeneratorTool"/>
+			<Tool
+				Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter
+			Name="Source Files"
+			Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx"
+			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}">
+			<File
+				RelativePath="..\..\extra\squishtest.cpp">
+			</File>
+		</Filter>
+		<Filter
+			Name="Header Files"
+			Filter="h;hpp;hxx;hm;inl;inc;xsd"
+			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}">
+		</Filter>
+		<Filter
+			Name="Resource Files"
+			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx"
+			UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}">
+		</Filter>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>
--- a/src/nvtt/squish/weightedclusterfit.cpp
+++ b/src/nvtt/squish/weightedclusterfit.cpp
--- a/src/nvtt/squish/weightedclusterfit.h
+++ b/src/nvtt/squish/weightedclusterfit.h
@ -24,15 +24,15 @@
 	
   -------------------------------------------------------------------------- */
   
-#ifndef NV_SQUISH_WEIGHTEDCLUSTERFIT_H
-#define NV_SQUISH_WEIGHTEDCLUSTERFIT_H
+#ifndef SQUISH_WEIGHTEDCLUSTERFIT_H
+#define SQUISH_WEIGHTEDCLUSTERFIT_H

 #include "squish.h"
 #include "maths.h"
 #include "simd.h"
 #include "colourfit.h"

-namespace nvsquish {
+namespace squish {

 class WeightedClusterFit : public ColourFit
 {
--- a/src/nvtt/tests/CMakeLists.txt
+++ b/src/nvtt/tests/CMakeLists.txt
@ -1,28 +0,0 @@
-
-ADD_EXECUTABLE(filtertest filtertest.cpp ../tools/cmdline.h)
-TARGET_LINK_LIBRARIES(filtertest nvcore nvmath nvimage)
-
-ADD_EXECUTABLE(nvtestsuite testsuite.cpp)
-TARGET_LINK_LIBRARIES(nvtestsuite nvcore nvmath nvimage nvtt)
-ADD_TEST(NVTT.TestSuite.Kodak.cuda nvtestsuite -path ${NV_SOURCE_DIR}/data/testsuite -set 0 -out output-cuda-kodak)
-ADD_TEST(NVTT.TestSuite.Waterloo.cuda nvtestsuite -path ${NV_SOURCE_DIR}/data/testsuite -set 1 -out output-cuda-waterloo)
-ADD_TEST(NVTT.TestSuite.Epic.cuda nvtestsuite -path ${NV_SOURCE_DIR}/data/testsuite -set 2 -out output-cuda-epic)
-ADD_TEST(NVTT.TestSuite.Kodak.nocuda nvtestsuite -path ${NV_SOURCE_DIR}/data/testsuite -set 0 -nocuda -out output-nocuda-kodak)
-ADD_TEST(NVTT.TestSuite.Waterloo.nocuda nvtestsuite -path ${NV_SOURCE_DIR}/data/testsuite -set 1 -nocuda -out output-nocuda-waterloo)
-ADD_TEST(NVTT.TestSuite.Epic.nocuda nvtestsuite -path ${NV_SOURCE_DIR}/data/testsuite -set 2 -nocuda -out output-nocuda-epic)
-
-IF (CUDA_FOUND)
-	ADD_EXECUTABLE(driverapitest driverapi.cpp)
-	TARGET_LINK_LIBRARIES(driverapitest nvcore nvmath nvimage)
-ENDIF (CUDA_FOUND)
-
-ADD_EXECUTABLE(imperativeapi imperativeapi.cpp)
-TARGET_LINK_LIBRARIES(imperativeapi nvcore nvmath nvimage nvtt)
-
-INSTALL(TARGETS nvtestsuite DESTINATION bin)
- 
-#include_directories("/usr/include/ffmpeg/")
-#ADD_EXECUTABLE(nvmpegenc tools/mpegenc.cpp tools/cmdline.h)
-#TARGET_LINK_LIBRARIES(nvmpegenc nvcore nvmath nvimage avcodec z)
-#INSTALL(TARGETS nvmpegenc DESTINATION bin)
-
--- a/src/nvtt/tests/driverapi.cpp
+++ b/src/nvtt/tests/driverapi.cpp
@ -1,144 +0,0 @@
-
-#include <nvcore/Library.h>
-
-#include <cuda.h>
-#include <stdio.h>
-
-// Typedefs
-typedef CUresult (CUDAAPI * cuInitPtr)( unsigned int Flags );	
-
-typedef CUresult  (CUDAAPI * cuDeviceGetPtr)(CUdevice *device, int ordinal);
-typedef CUresult  (CUDAAPI * cuDeviceGetCountPtr)(int *count);
-typedef CUresult  (CUDAAPI * cuDeviceGetNamePtr)(char *name, int len, CUdevice dev);
-typedef CUresult  (CUDAAPI * cuDeviceComputeCapabilityPtr)(int *major, int *minor, CUdevice dev);
-typedef CUresult  (CUDAAPI * cuDeviceTotalMemPtr)(unsigned int *bytes, CUdevice dev);
-typedef CUresult  (CUDAAPI * cuDeviceGetPropertiesPtr)(CUdevprop *prop, CUdevice dev);
-typedef CUresult  (CUDAAPI * cuDeviceGetAttributePtr)(int *pi, CUdevice_attribute attrib, CUdevice dev);
-
-typedef CUresult  (CUDAAPI * cuCtxCreatePtr)(CUcontext *pctx, unsigned int flags, CUdevice dev );
-typedef CUresult  (CUDAAPI * cuCtxDestroyPtr)( CUcontext ctx );
-typedef CUresult  (CUDAAPI * cuCtxAttachPtr)(CUcontext *pctx, unsigned int flags);
-typedef CUresult  (CUDAAPI * cuCtxDetachPtr)(CUcontext ctx);
-typedef CUresult  (CUDAAPI * cuCtxPushCurrentPtr)( CUcontext ctx );
-typedef CUresult  (CUDAAPI * cuCtxPopCurrentPtr)( CUcontext *pctx );
-typedef CUresult  (CUDAAPI * cuCtxGetDevicePtr)(CUdevice *device);
-typedef CUresult  (CUDAAPI * cuCtxSynchronizePtr)(void);
-
-
-// A compressor inits CUDA and creates a context for each device.
-// 
-
-struct CudaDevice
-{
-	CUdevice device;
-	CUcontext context;
-};
-
-struct CudaContext
-{
-	CudaContext()
-	{
-		printf("CudaContext()\n");
-
-	#if NV_OS_WIN32
-		Library nvcuda("nvcuda.dll");
-	#else
-		Library nvcuda(NV_LIBRARY_NAME(cuda));
-	#endif
-
-		cuInit = (cuInitPtr)nvcuda.bindSymbol("cuInit");
-
-		cuDeviceGet = (cuDeviceGetPtr)nvcuda.bindSymbol("cuDeviceGet");
-		cuDeviceGetCount = (cuDeviceGetCountPtr)nvcuda.bindSymbol("cuDeviceGetCount");
-		cuDeviceGetName = (cuDeviceGetNamePtr)nvcuda.bindSymbol("cuDeviceGetName");
-		cuDeviceComputeCapability = (cuDeviceComputeCapabilityPtr)nvcuda.bindSymbol("cuDeviceComputeCapability");
-		cuDeviceTotalMem = (cuDeviceTotalMemPtr)nvcuda.bindSymbol("cuDeviceTotalMem");
-		cuDeviceGetProperties = (cuDeviceGetPropertiesPtr)nvcuda.bindSymbol("cuDeviceGetProperties");
-		cuDeviceGetAttribute = (cuDeviceGetAttributePtr)nvcuda.bindSymbol("cuDeviceGetAttribute");
-		
-		cuCtxCreate = (cuCtxCreatePtr)nvcuda.bindSymbol("cuCtxCreate");
-		cuCtxDestroy = (cuCtxDestroyPtr)nvcuda.bindSymbol("cuCtxDestroy");
-		cuCtxAttach = (cuCtxAttachPtr)nvcuda.bindSymbol("cuCtxAttach");
-		cuCtxDetach = (cuCtxDetachPtr)nvcuda.bindSymbol("cuCtxDetach");
-		cuCtxPushCurrent = (cuCtxPushCurrentPtr)nvcuda.bindSymbol("cuCtxPushCurrent");
-		cuCtxPopCurrent = (cuCtxPopCurrentPtr)nvcuda.bindSymbol("cuCtxPopCurrent");
-		cuCtxGetDevice = (cuCtxGetDevicePtr)nvcuda.bindSymbol("cuCtxGetDevice");
-		cuCtxSynchronize = (cuCtxSynchronizePtr)nvcuda.bindSymbol("cuCtxSynchronize");
-	
-		CUresult status = cuInit(0);
-	
-		if (status == CUDA_SUCCESS)
-		{
-			printf("cuInit succeeded.\n");
-		}
-		
-		m_deviceCount = 0;
-		cuDeviceGetCount(&m_deviceCount);
-		
-		printf("%d devices found.\n", m_deviceCount);
-		
-		if (m_deviceCount > 0)
-		{
-			m_devices = new CudaDevice[m_deviceCount];
-			
-			uint flags = CU_CTX_SCHED_AUTO;
-			if (m_deviceCount > 1) flags = CU_CTX_SCHED_YIELD;
-			
-			for (int i = 0; i < m_deviceCount; i++)
-			{
-				cuDeviceGet(&m_devices[i].device, i);
-				cuCtxCreate(&m_devices[i].context, flags, m_devices[i].device);
-			
-				cuCtxDestroy(m_devices[i].context);
-			}
-			
-		}
-	}
-	
-	~CudaContext()
-	{
-		printf("~CudaContext()\n");
-
-		if (m_deviceCount > 0)
-		{
-			for (int i = 0; i < m_deviceCount; i++)
-			{
-				cuCtxDestroy(m_devices[i].context);
-			}		
-
-			delete [] m_devices;
-		}
-	}
-	
-
-public:	
-	cuInitPtr cuInit;
-
-	cuDeviceGetPtr cuDeviceGet;
-	cuDeviceGetCountPtr cuDeviceGetCount;
-	cuDeviceGetNamePtr cuDeviceGetName;
-	cuDeviceComputeCapabilityPtr cuDeviceComputeCapability;
-	cuDeviceTotalMemPtr cuDeviceTotalMem;
-	cuDeviceGetPropertiesPtr cuDeviceGetProperties;
-	cuDeviceGetAttributePtr cuDeviceGetAttribute;
-
-	cuCtxCreatePtr cuCtxCreate;
-	cuCtxDestroyPtr cuCtxDestroy;
-	cuCtxAttachPtr cuCtxAttach;
-	cuCtxDetachPtr cuCtxDetach;
-	cuCtxPushCurrentPtr cuCtxPushCurrent;
-	cuCtxPopCurrentPtr  cuCtxPopCurrent;
-	cuCtxGetDevicePtr cuCtxGetDevice;
-	cuCtxSynchronizePtr cuCtxSynchronize;
-
-	int m_deviceCount;
-	CudaDevice * m_devices;
-};
-
-int main(void)
-{
-	CudaContext ctx;
-//	cuInit(0);
-
-	return 0;
-}
--- a/src/nvtt/tests/imperativeapi.cpp
+++ b/src/nvtt/tests/imperativeapi.cpp
@ -1,58 +0,0 @@
-// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
-// 
-// Permission is hereby granted, free of charge, to any person
-// obtaining a copy of this software and associated documentation
-// files (the "Software"), to deal in the Software without
-// restriction, including without limitation the rights to use,
-// copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following
-// conditions:
-// 
-// The above copyright notice and this permission notice shall be
-// included in all copies or substantial portions of the Software.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-// OTHER DEALINGS IN THE SOFTWARE.
-
-#include <nvtt/nvtt.h>
-
-#include <stdlib.h>
-
-
-int main(int argc, char *argv[])
-{
-	nvtt::CompressionOptions compressionOptions;
-	compressionOptions.setFormat(nvtt::Format_BC1);
-
-	nvtt::OutputOptions outputOptions;
-	outputOptions.setFileName("output.dds");
-
-	nvtt::Context context;
-	nvtt::TexImage image = context.createTexImage();
-
-	image.load("kodim01.png");
-
-	context.outputHeader(image, image.countMipmaps(), compressionOptions, outputOptions);
-
-	float gamma = 2.2;
-	image.toLinear(gamma);
-
-	while (image.buildNextMipmap(nvtt::MipmapFilter_Box))
-	{
-		nvtt::TexImage tmpImage = image;
-		tmpImage.toGamma(gamma);
-
-		context.compress(tmpImage, compressionOptions, outputOptions);
-	//	tmpImage.compress(compressionOptions, outputOptions);
-	}
-
-	return EXIT_SUCCESS;
-}
-
--- a/src/nvtt/tests/mpegenc.cpp
+++ b/src/nvtt/tests/mpegenc.cpp
@ -1,344 +0,0 @@
-// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
-// 
-// Permission is hereby granted, free of charge, to any person
-// obtaining a copy of this software and associated documentation
-// files (the "Software"), to deal in the Software without
-// restriction, including without limitation the rights to use,
-// copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following
-// conditions:
-// 
-// The above copyright notice and this permission notice shall be
-// included in all copies or substantial portions of the Software.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-// OTHER DEALINGS IN THE SOFTWARE.
-
-#include <nvcore/StrLib.h>
-#include <nvcore/StdStream.h>
-
-#include <nvmath/Color.h>
-
-#include <nvimage/Image.h>
-#include <nvimage/DirectDrawSurface.h>
-
-#include <nvtt/nvtt.h>
-
-#include "cmdline.h"
-
-extern "C" {
-#include <libavcodec/avcodec.h>
-//#include <libavformat/avformat.h>
-}
-
-// http://ffmpeg.mplayerhq.hu/general.html
-// http://cekirdek.pardus.org.tr/~ismail/ffmpeg-docs/apiexample_8c-source.html
-
-
-using namespace nv;
-
-static float s_quality = 0.5f;
-
-static AVFrame * createPicture(const Image & image)
-{
-	const uint w = image.width();
-	const uint h = image.height();
-	const uint size = w * h;
-	
-	AVFrame * picture = avcodec_alloc_frame();
-	
-	uint8_t * buffer = (uint8_t *)malloc((size * 3) / 2);
-	
-	picture->data[0] = buffer;
-	picture->data[1] = buffer + size;
-	picture->data[2] = buffer + size + size / 4;
-	picture->linesize[0] = w;
-	picture->linesize[1] = w / 2;
-	picture->linesize[2] = w / 2;
-
-	memset(buffer, 0, (size * 3) / 2);
-	
-	// Convert image to YCbCr 4:2:0
-	
-	// Y
-	for (uint y=0;y<h;y++)
-	{
-		for (uint x=0;x<w;x++)
-		{
-			Color32 c = image.pixel(x, y);
-			
-			float R = (1 / 255.0f) * c.r;
-			float G = (1 / 255.0f) * c.g;
-			float B = (1 / 255.0f) * c.b;
-			
-			//float Y = 0.299f * R + 0.587f * G + 0.114f * B;
-			float Y = 16  + (65.481f  * R + 128.553f * G +  24.966f * B);
-			
-			picture->data[0][y * picture->linesize[0] + x] = (uint8)clamp(Y, 0.0f, 255.0f);
-		}
-	}
-
-	// Cb and Cr
-	for (uint y=0;y<h/2;y++)
-	{
-		for (uint x=0;x<w/2;x++)
-		{
-			Color32 c0 = image.pixel(2*x+0, 2*y+0);
-			Color32 c1 = image.pixel(2*x+1, 2*y+0);
-			Color32 c2 = image.pixel(2*x+0, 2*y+1);
-			Color32 c3 = image.pixel(2*x+1, 2*y+1);
-
-			float R = (1 / 255.0f) * 0.25f * (c0.r + c1.r + c2.r + c3.r);
-			float G = (1 / 255.0f) * 0.25f * (c0.g + c1.g + c2.g + c3.g);
-			float B = (1 / 255.0f) * 0.25f * (c0.b + c1.b + c2.b + c3.b);
-			
-			//float Pb = - 0.168736f * R - 0.331264f * G + 0.5f * B;
-			//float Pr = + 0.5f * R - 0.418688f * G - 0.081312f * B;
-			float Cb = 128 + (-37.797f * R - 74.203f * G + 112.0f * B);
-			float Cr = 128 + (112.0f * R - 93.786 * G - 18.214f * B);
-			
-			picture->data[1][y * picture->linesize[1] + x] = (uint8)clamp(Cb, 0.0f, 255.0f);;
-			picture->data[2][y * picture->linesize[2] + x] = (uint8)clamp(Cr, 0.0f, 255.0f);;
-		}
-	}
-	
-	return picture;
-}
-
-static void pgm_save(unsigned char *buf, int wrap, int xsize, int ysize, const char * filename)
-{
-	FILE * f = fopen(filename, "w");
-	fprintf(f,"P5\n%d %d\n%d\n",xsize, ysize, 255);
-	
-	for (int i = 0; i < ysize; i++)
-		fwrite(buf + i * wrap,1,xsize,f);
-	
-	fclose(f);
-}
-
-static void savePicture(const AVFrame * picture, int w, int h)
-{
-	// @@ Combine planes.
-	pgm_save(picture->data[0], picture->linesize[0], w, h, "test_y.pgm");
-	pgm_save(picture->data[1], picture->linesize[1], w/2, h/2, "test_u.pgm");
-	pgm_save(picture->data[2], picture->linesize[2], w/2, h/2, "test_v.pgm");
-}
-
-static double psnr(double d) {
-	return -10.0*log(d)/log(10.0);
-}
-
-
-static void encodeFrame(const Image & image, CodecID format, Array<uint8> & frame)
-{
-	AVFrame * picture = createPicture(image);
-	
-	AVCodec * encoder = avcodec_find_encoder(format);
-
-	if (encoder == NULL)
-	{
-		printf("MPEG encoder not found.\n");
-		exit(1);
-	}
-
-	AVCodecContext * encoder_context = avcodec_alloc_context();
-
-	//encoder_context->me_method = 0;
-	encoder_context->width = image.width();
-	encoder_context->height = image.height();
-	encoder_context->pix_fmt = PIX_FMT_YUV420P;
-	//encoder_context->pix_fmt = PIX_FMT_YUV422P;
-	//encoder_context->pix_fmt = PIX_FMT_YUVJ420P;
-	
-	encoder_context->time_base = (AVRational){1,25};   // required parameter. 25 fps?
-	encoder_context->bit_rate = 400000;   // Quality?
-	//encoder_context->bit_rate = 200000;   // Default
-	//encoder_context->bit_rate_tolerance = 20000;
-	//encoder_context->qmin = ?;
-	//encoder_context->qmax = ?;
-	//encoder_context->qcompress = ?;
-	//encoder_context->qblur = ?;
-	
-	encoder_context->flags |= CODEC_FLAG_PSNR;
-	encoder_context->qcompress = s_quality;
-	//encoder_context->qblur = 1.0f;
-	//encoder_context->global_quality = FF_QP2LAMBDA * 0;
-	//encoder_context->max_qdiff = 3;
-	
-
-
-	
-	// Intra frames only
-	encoder_context->gop_size = 0;
-
-	if (avcodec_open(encoder_context, encoder) < 0)
-	{
-		printf("MPEG encoder initialization failed.\n");
-		exit(1);
-	}
-
-	frame.resize(1024 * 1024, 0);	// resize and initialize to 0.
-	
-	int out_size = avcodec_encode_video(encoder_context, frame.mutableBuffer(), frame.size(), picture);
-	frame.resize(out_size);
-	
-	// Append sequence end code.
-	frame.append(0x00);
-	frame.append(0x00);
-	frame.append(0x01);
-	frame.append(0xb7);
-	
-	int in_size = image.width() * image.height() * 3;
-	printf("Image size %d -> %d (1:%d)\n", in_size, out_size, in_size/out_size);
-	printf("PSNR = %4.2f\n", psnr(encoder_context->coded_frame->error[0]/(encoder_context->width*encoder_context->height*255.0*255.0)));
-
-	
-	avcodec_close(encoder_context);
-	av_free(encoder_context);
-	av_free(picture);
-}
-
-static void decodeFrame(const Array<uint8> & frame, CodecID format)
-{
-	AVCodec * decoder = avcodec_find_decoder(format);
-	if (decoder == NULL) {
-		printf("MPEG decoder not found.\n");
-		exit(1);
-	}
-
-	AVCodecContext * decoder_context = avcodec_alloc_context();
-	AVFrame * picture = avcodec_alloc_frame();
-	
-	if (decoder->capabilities & CODEC_CAP_TRUNCATED)
-		decoder_context->flags |= CODEC_FLAG_TRUNCATED; /* we do not send complete frames */
-	
-	
-	if (avcodec_open(decoder_context, decoder) < 0) {
-		printf("MPEG decoder initialization failed.\n");
-		exit(1);
-	}
-	
-	//memset(picture->data[0], 0, in_size / 2);
-	
-	int got_picture = 0;
-	int len = avcodec_decode_video(decoder_context, picture, &got_picture, frame.buffer(), frame.size());
-	
-	printf("decoded %d bytes\n", len);
-	
-	if (len < 0) {
-		printf("Error while decoding frame.\n");
-		exit(1);
-	}
-	
-	if (!got_picture) {
-		printf("Did not get any picture.\n");
-		exit(1);
-	}
-	
-	//nvDebugCheck(outbuf_size == len);
-	//nvDebugCheck(got_picture == true);
-
-	savePicture(picture, decoder_context->width, decoder_context->height);
-	
-	avcodec_close(decoder_context);
-	av_free(decoder_context);
-	av_free(picture);
-}
-
-
-
-int main(int argc, char *argv[])
-{
-	MyAssertHandler assertHandler;
-	MyMessageHandler messageHandler;
-
-	nv::Path input;
-	nv::Path output;
-
-	// Parse arguments.
-	for (int i = 1; i < argc; i++)
-	{
-		if (strcmp(argv[i], "-q") == 0 || strcmp(argv[i], "--quality") == 0)
-		{
-			if (i+1 < argc && argv[i+1][0] != '-')
-			{
-				s_quality = atof(argv[i+1]);
-				i++;
-			}
-		}
-			
-		else if (argv[i][0] != '-')
-		{
-			input = argv[i];
-
-			if (i+1 < argc && argv[i+1][0] != '-')
-			{
-				output = argv[i+1];
-				i++;
-			}
-			else
-			{
-				output.copy(input.str());
-				output.stripExtension();
-				output.append(".mpeg");
-			}
-
-			break;
-		}
-	}
-
-	printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007-2008\n\n");
-
-	if (input.isNull())
-	{
-		printf("usage: nvmpegcompress [options] infile [outfile]\n\n");
-		
-		return 1;
-	}
-
-	// Load image.
-	Image image;
-	if (!image.load(input))
-	{
-		fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str());
-		return 1;
-	}
-	
-	// Initialize codecs.
-	avcodec_init();
-	avcodec_register_all();
-
-	//CodecID format = CODEC_ID_MPEG1VIDEO;
-	CodecID format = CODEC_ID_MPEG2VIDEO;
-	//CodecID format = CODEC_ID_MJPEG;
-	//CodecID format = CODEC_ID_THEORA;
-	//CodecID format = CODEC_ID_H264;
-	
-	// Encode frame.
-	Array<uint8> frame;
-	encodeFrame(image, format, frame);
-
-	// Save resulting I-frame.
-	StdOutputStream outputStream(output.str());
-	if (outputStream.isError())
-	{
-		printf("Error opening '%s' for writing.\n", output.str());
-		return 1;
-	}
-
-	outputStream.serialize(frame.mutableBuffer(), frame.size());
-
-	//decodeFrame(frame, format);
-	
-	// @@ Compare image against original, and compute RMS.
-	
-	return 0;
-}
-
--- a/src/nvtt/tests/testsuite.cpp
+++ b/src/nvtt/tests/testsuite.cpp
@ -1,529 +0,0 @@
-// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
-// 
-// Permission is hereby granted, free of charge, to any person
-// obtaining a copy of this software and associated documentation
-// files (the "Software"), to deal in the Software without
-// restriction, including without limitation the rights to use,
-// copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following
-// conditions:
-// 
-// The above copyright notice and this permission notice shall be
-// included in all copies or substantial portions of the Software.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-// OTHER DEALINGS IN THE SOFTWARE.
-
-#include <nvtt/nvtt.h>
-#include <nvimage/Image.h>
-#include <nvimage/ImageIO.h>
-#include <nvimage/BlockDXT.h>
-#include <nvimage/ColorBlock.h>
-#include <nvcore/Ptr.h>
-#include <nvcore/Debug.h>
-#include <nvcore/StrLib.h>
-#include <nvcore/StdStream.h>
-#include <nvcore/TextWriter.h>
-#include <nvcore/FileSystem.h>
-#include <nvcore/Timer.h>
-
-#include <stdlib.h> // free
-#include <string.h> // memcpy
-
-
-using namespace nv;
-
-// Kodak image set
-static const char * s_kodakImageSet[] = {
-	"kodim01.png",
-	"kodim02.png",
-	"kodim03.png",
-	"kodim04.png",
-	"kodim05.png",
-	"kodim06.png",
-	"kodim07.png",
-	"kodim08.png",
-	"kodim09.png",
-	"kodim10.png",
-	"kodim11.png",
-	"kodim12.png",
-	"kodim13.png",
-	"kodim14.png",
-	"kodim15.png",
-	"kodim16.png",
-	"kodim17.png",
-	"kodim18.png",
-	"kodim19.png",
-	"kodim20.png",
-	"kodim21.png",
-	"kodim22.png",
-	"kodim23.png",
-	"kodim24.png",
-};
-
-// Waterloo image set
-static const char * s_waterlooImageSet[] = {
-	"clegg.png",
-	"frymire.png",
-	"lena.png",
-	"monarch.png",
-	"peppers.png",
-	"sail.png",
-	"serrano.png",
-	"tulips.png",
-};
-
-// Epic image set
-static const char * s_epicImageSet[] = {
-	"Bradley1.png",
-	"Gradient.png",
-	"MoreRocks.png",
-	"Wall.png",
-	"Rainbow.png",
-	"Text.png",
-};
-
-// Farbrausch
-static const char * s_farbrauschImageSet[] = {
-	"t.2d.pn02.bmp",
-	"t.aircondition.01.bmp",
-	"t.bricks.02.bmp",
-	"t.bricks.05.bmp",
-	"t.concrete.cracked.01.bmp",
-	"t.envi.colored02.bmp",
-	"t.envi.colored03.bmp",
-	"t.font.01.bmp",
-	"t.sewers.01.bmp",
-	"t.train.03.bmp",
-	"t.yello.01.bmp",
-};
-
-// Lugaru
-static const char * s_lugaruImageSet[] = {
-	"lugaru-blood.png",
-	"lugaru-bush.png",
-	"lugaru-cursor.png",
-	"lugaru-hawk.png",
-};
-
-// Quake3
-static const char * s_quake3ImageSet[] = {
-	"q3-blocks15cgeomtrn.tga",
-	"q3-blocks17bloody.tga",
-	"q3-dark_tin2.tga",
-	"q3-fan_grate.tga",
-	"q3-fan.tga",
-	"q3-metal2_2.tga",
-	"q3-panel_glo.tga",
-	"q3-proto_fence.tga",
-	"q3-wires02.tga",
-};
-
-
-struct ImageSet
-{
-	const char ** fileNames;
-	int fileCount;
-	nvtt::Format format;
-};
-
-static ImageSet s_imageSets[] = {
-	{s_kodakImageSet, sizeof(s_kodakImageSet)/sizeof(s_kodakImageSet[0]), nvtt::Format_DXT1},
-	{s_waterlooImageSet, sizeof(s_waterlooImageSet)/sizeof(s_waterlooImageSet[0]), nvtt::Format_DXT1},
-	{s_epicImageSet, sizeof(s_epicImageSet)/sizeof(s_epicImageSet[0]), nvtt::Format_DXT1},
-	{s_farbrauschImageSet, sizeof(s_farbrauschImageSet)/sizeof(s_farbrauschImageSet[0]), nvtt::Format_DXT1},
-	{s_lugaruImageSet, sizeof(s_lugaruImageSet)/sizeof(s_lugaruImageSet[0]), nvtt::Format_DXT5},
-	{s_quake3ImageSet, sizeof(s_quake3ImageSet)/sizeof(s_quake3ImageSet[0]), nvtt::Format_DXT5},
-};
-const int s_imageSetCount = sizeof(s_imageSets)/sizeof(s_imageSets[0]);
-
-enum Decoder
-{
-	Decoder_Reference,
-	Decoder_NVIDIA,
-};
-
-struct MyOutputHandler : public nvtt::OutputHandler
-{
-	MyOutputHandler() : m_data(NULL), m_ptr(NULL) {}
-	~MyOutputHandler()
-	{
-		free(m_data);
-	}
-
-	virtual void beginImage(int size, int width, int height, int depth, int face, int miplevel)
-	{
-		m_size = size;
-		m_width = width;
-		m_height = height;
-		free(m_data);
-		m_data = (unsigned char *)malloc(size);
-		m_ptr = m_data;
-	}
-	
-	virtual bool writeData(const void * data, int size)
-	{
-		memcpy(m_ptr, data, size);
-		m_ptr += size;
-		return true;
-	}
-
-	Image * decompress(nvtt::Format format, Decoder decoder)
-	{
-		int bw = (m_width + 3) / 4;
-		int bh = (m_height + 3) / 4;
-
-		AutoPtr<Image> img( new Image() );
-		img->allocate(m_width, m_height);
-
-		if (format == nvtt::Format_BC1)
-		{
-			BlockDXT1 * block = (BlockDXT1 *)m_data;
-
-			for (int y = 0; y < bh; y++)
-			{
-				for (int x = 0; x < bw; x++)
-				{
-					ColorBlock colors;
-					if (decoder == Decoder_Reference) {
-						block->decodeBlock(&colors);
-					}
-					else if (decoder == Decoder_NVIDIA) {
-						block->decodeBlockNV5x(&colors);
-					}
-
-					for (int yy = 0; yy < 4; yy++)
-					{
-						for (int xx = 0; xx < 4; xx++)
-						{
-							Color32 c = colors.color(xx, yy);
-
-							if (x * 4 + xx < m_width && y * 4 + yy < m_height)
-							{
-								img->pixel(x * 4 + xx, y * 4 + yy) = c;
-							}
-						}
-					}
-
-					block++;
-				}
-			}
-		}
-		else if (format == nvtt::Format_BC3)
-		{
-			BlockDXT5 * block = (BlockDXT5 *)m_data;
-
-			for (int y = 0; y < bh; y++)
-			{
-				for (int x = 0; x < bw; x++)
-				{
-					ColorBlock colors;
-					if (decoder == Decoder_Reference) {
-						block->decodeBlock(&colors);
-					}
-					else if (decoder == Decoder_NVIDIA) {
-						block->decodeBlockNV5x(&colors);
-					}
-
-					for (int yy = 0; yy < 4; yy++)
-					{
-						for (int xx = 0; xx < 4; xx++)
-						{
-							Color32 c = colors.color(xx, yy);
-
-							if (x * 4 + xx < m_width && y * 4 + yy < m_height)
-							{
-								img->pixel(x * 4 + xx, y * 4 + yy) = c;
-							}
-						}
-					}
-
-					block++;
-				}
-			}
-		}
-
-
-		return img.release();
-	}
-
-	int m_size;
-	int m_width;
-	int m_height;
-	unsigned char * m_data;
-	unsigned char * m_ptr;
-};
-
-
-float rmsError(const Image * a, const Image * b)
-{
-	nvCheck(a != NULL);
-	nvCheck(b != NULL);
-	nvCheck(a->width() == b->width());
-	nvCheck(a->height() == b->height());
-
-	double mse = 0;
-
-	const uint count = a->width() * a->height();
-
-	for (uint i = 0; i < count; i++)
-	{
-		Color32 c0 = a->pixel(i);
-		Color32 c1 = b->pixel(i);
-
-		int r = c0.r - c1.r;
-		int g = c0.g - c1.g;
-		int b = c0.b - c1.b;
-		int a = c0.a - c1.a;
-
-		mse += double(r * r * c0.a) / 255;
-		mse += double(g * g * c0.a) / 255;
-		mse += double(b * b * c0.a) / 255;
-	}
-
-	return float(sqrt(mse / count));
-}
-
-
-int main(int argc, char *argv[])
-{
-	const uint version = nvtt::version();
-	const uint major = version / 100;
-	const uint minor = version % 100;
-	
-	printf("NVIDIA Texture Tools %u.%u - Copyright NVIDIA Corporation 2007 - 2008\n\n", major, minor);
-	
-	int set = 0;
-	bool fast = false;
-	bool nocuda = false;
-	bool showHelp = false;
-	Decoder decoder = Decoder_Reference;
-	const char * basePath = "";
-	const char * outPath = "output";
-	const char * regressPath = NULL;
-	
-	// Parse arguments.
-	for (int i = 1; i < argc; i++)
-	{
-		if (strcmp("-set", argv[i]) == 0)
-		{
-			if (i+1 < argc && argv[i+1][0] != '-') {
-				set = atoi(argv[i+1]);
-				i++;
-			}
-		}
-		else if (strcmp("-dec", argv[i]) == 0)
-		{
-			if (i+1 < argc && argv[i+1][0] != '-') {
-				decoder = (Decoder)atoi(argv[i+1]);
-				i++;
-			}
-		}
-		else if (strcmp("-fast", argv[i]) == 0)
-		{
-			fast = true;
-		}
-		else if (strcmp("-nocuda", argv[i]) == 0)
-		{
-			nocuda = true;
-		}
-		else if (strcmp("-help", argv[i]) == 0)
-		{
-			showHelp = true;
-		}
-		else if (strcmp("-path", argv[i]) == 0)
-		{
-			if (i+1 < argc && argv[i+1][0] != '-') {
-				basePath = argv[i+1];
-				i++;
-			}
-		}
-		else if (strcmp("-out", argv[i]) == 0)
-		{
-			if (i+1 < argc && argv[i+1][0] != '-') {
-				outPath = argv[i+1];
-				i++;
-			}
-		}
-		else if (strcmp("-regress", argv[i]) == 0)
-		{
-			if (i+1 < argc && argv[i+1][0] != '-') {
-				regressPath = argv[i+1];
-				i++;
-			}
-		}
-	}
-
-	if (showHelp)
-	{
-		printf("usage: nvtestsuite [options]\n\n");
-		
-		printf("Input options:\n");
-		printf("  -path <path>   \tInput image path.\n");
-		printf("  -regress <path>\tRegression directory.\n");
-		printf("  -set [0:2]     \tImage set.\n");
-		printf("    0:           \tKodak.\n");
-		printf("    1:           \tWaterloo.\n");
-		printf("    2:           \tEpic.\n");
-		printf("    3:           \tFarbrausch.\n");
-		printf("  -dec x         \tDecompressor.\n");
-		printf("    0:           \tReference.\n");
-		printf("    1:           \tNVIDIA.\n");
-
-		printf("Compression options:\n");
-		printf("  -fast          \tFast compression.\n");
-		printf("  -nocuda        \tDo not use cuda compressor.\n");
-		
-		printf("Output options:\n");
-		printf("  -out <path>    \tOutput directory.\n");
-
-		return 1;
-	}
-	
-	nvtt::InputOptions inputOptions;
-	inputOptions.setMipmapGeneration(false);
-	inputOptions.setAlphaMode(nvtt::AlphaMode_Transparency);
-
-	nvtt::CompressionOptions compressionOptions;
-	compressionOptions.setFormat(nvtt::Format_BC1);
-	if (fast)
-	{
-		compressionOptions.setQuality(nvtt::Quality_Fastest);
-	}
-	else
-	{
-		compressionOptions.setQuality(nvtt::Quality_Production);
-	}
-	//compressionOptions.setExternalCompressor("ati");
-	//compressionOptions.setExternalCompressor("squish");
-	//compressionOptions.setExternalCompressor("d3dx");
-	//compressionOptions.setExternalCompressor("stb");
-
-	compressionOptions.setFormat(s_imageSets[set].format);
-
-
-	nvtt::OutputOptions outputOptions;
-	outputOptions.setOutputHeader(false);
-
-	MyOutputHandler outputHandler;
-	outputOptions.setOutputHandler(&outputHandler);
-
-	nvtt::Context context;
-	context.enableCudaAcceleration(!nocuda);
-
-	FileSystem::changeDirectory(basePath);
-	FileSystem::createDirectory(outPath);
-
-	Path csvFileName;
-	csvFileName.format("%s/result.csv", outPath);
-	StdOutputStream csvStream(csvFileName);
-	TextWriter csvWriter(&csvStream);
-
-	float totalTime = 0;
-	float totalRMSE = 0;
-	int failedTests = 0;
-	float totalDiff = 0;
-
-	const char ** fileNames = s_imageSets[set].fileNames;
-	int fileCount = s_imageSets[set].fileCount;
-
-	Timer timer;
-
-	for (int i = 0; i < fileCount; i++)
-	{
-		AutoPtr<Image> img( new Image() );
-		
-		if (!img->load(fileNames[i]))
-		{
-			printf("Input image '%s' not found.\n", fileNames[i]);
-			return EXIT_FAILURE;
-		}
-
-		inputOptions.setTextureLayout(nvtt::TextureType_2D, img->width(), img->height());
-		inputOptions.setMipmapData(img->pixels(), img->width(), img->height());
-
-		printf("Compressing: \t'%s'\n", fileNames[i]);
-
-		timer.start();
-
-		context.process(inputOptions, compressionOptions, outputOptions);
-
-		timer.stop();
-		printf("  Time: \t%.3f sec\n", float(timer.elapsed()) / 1000);
-		totalTime += float(timer.elapsed()) / 1000;
-
-		AutoPtr<Image> img_out( outputHandler.decompress(s_imageSets[set].format, decoder) );
-
-		Path outputFileName;
-		outputFileName.format("%s/%s", outPath, fileNames[i]);
-		outputFileName.stripExtension();
-		outputFileName.append(".png");
-		if (!ImageIO::save(outputFileName, img_out.ptr()))
-		{
-			printf("Error saving file '%s'.\n", outputFileName.str());
-		}
-
-		float rmse = rmsError(img.ptr(), img_out.ptr());
-		totalRMSE += rmse;
-
-		printf("  RMSE:  \t%.4f\n", rmse);
-
-		// Output csv file
-		csvWriter << "\"" << fileNames[i] << "\"," << rmse << "\n";
-
-		if (regressPath != NULL)
-		{
-			Path regressFileName;
-			regressFileName.format("%s/%s", regressPath, fileNames[i]);
-			regressFileName.stripExtension();
-			regressFileName.append(".png");
-
-			AutoPtr<Image> img_reg( new Image() );
-			if (!img_reg->load(regressFileName.str()))
-			{
-				printf("Regression image '%s' not found.\n", regressFileName.str());
-				return EXIT_FAILURE;
-			}
-
-			float rmse_reg = rmsError(img.ptr(), img_reg.ptr());
-
-			float diff = rmse_reg - rmse;
-			totalDiff += diff;
-
-			const char * text = "PASSED";
-			if (equal(diff, 0)) text = "PASSED";
-			else if (diff < 0) {
-				text = "FAILED";
-				failedTests++;
-			}
-
-			printf("  Diff: \t%.4f (%s)\n", diff, text);
-		}
-
-		fflush(stdout);
-	}
-
-	totalRMSE /= fileCount;
-	totalDiff /= fileCount;
-
-	printf("Total Results:\n");
-	printf("  Total Time: \t%.3f sec\n", totalTime);
-	printf("  Average RMSE:\t%.4f\n", totalRMSE);
-
-	if (regressPath != NULL)
-	{
-		printf("Regression Results:\n");
-		printf("  Diff: %.4f\n", totalDiff);
-		printf("  %d/%d tests failed.\n", failedTests, fileCount);
-	}
-
-	return EXIT_SUCCESS;
-}
-
--- a/src/nvtt/tools/CMakeLists.txt
+++ b/src/nvtt/tools/CMakeLists.txt
@ -1,63 +0,0 @@
-
-
-ADD_EXECUTABLE(nvcompress compress.cpp cmdline.h)
-TARGET_LINK_LIBRARIES(nvcompress nvcore nvmath nvimage nvtt)
-
-ADD_EXECUTABLE(nvdecompress decompress.cpp cmdline.h)
-TARGET_LINK_LIBRARIES(nvdecompress nvcore nvmath nvimage)
-
-ADD_EXECUTABLE(nvddsinfo ddsinfo.cpp cmdline.h)
-TARGET_LINK_LIBRARIES(nvddsinfo nvcore nvmath nvimage)
-
-ADD_EXECUTABLE(nvimgdiff imgdiff.cpp cmdline.h)
-TARGET_LINK_LIBRARIES(nvimgdiff nvcore nvmath nvimage)
-
-ADD_EXECUTABLE(nvassemble assemble.cpp cmdline.h)
-TARGET_LINK_LIBRARIES(nvassemble nvcore nvmath nvimage)
-
-ADD_EXECUTABLE(nvzoom resize.cpp cmdline.h)
-TARGET_LINK_LIBRARIES(nvzoom nvcore nvmath nvimage)
-
-ADD_EXECUTABLE(nv-gnome-thumbnailer thumbnailer.cpp cmdline.h)
-TARGET_LINK_LIBRARIES(nv-gnome-thumbnailer nvcore nvmath nvimage)
-  
-INSTALL(TARGETS nvcompress nvdecompress nvddsinfo nvimgdiff nvassemble nvzoom nv-gnome-thumbnailer DESTINATION bin)
- 
- # Use gconftool-2 to install gnome thumbnailer
-FIND_PROGRAM(GCONFTOOL2 gconftool-2)
-
-IF(GCONFTOOL2)
-	CONFIGURE_FILE(nvtt-thumbnailer.schema.in ${CMAKE_CURRENT_BINARY_DIR}/nvtt-thumbnailer.schema)
-
- 	INSTALL(CODE "MESSAGE(STATUS \"Installing thumbnailer schema\")")
- 	#gconftool-2 --get-default-source
- 	INSTALL(CODE "EXECUTE_PROCESS(COMMAND ${GCONFTOOL2} --get-default-source OUTPUT_VARIABLE GCONF_CONFIG_SOURCE OUTPUT_STRIP_TRAILING_WHITESPACE)")
- 	INSTALL(CODE "set(ENV{GCONF_CONFIG_SOURCE} \"\${GCONF_CONFIG_SOURCE}\")")
- 	INSTALL(CODE "EXECUTE_PROCESS(COMMAND ${GCONFTOOL2} --makefile-install-rule ${CMAKE_CURRENT_BINARY_DIR}/nvtt-thumbnailer.schema)")
-ENDIF(GCONFTOOL2)
-
-# UI tools
-IF(QT4_FOUND) # AND NOT MSVC)
-	SET(QT_USE_QTOPENGL TRUE)
-	INCLUDE_DIRECTORIES(${QT_INCLUDE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
-	
-	SET(SRCS
-		ui/main.cpp
-		ui/configdialog.h
-		ui/configdialog.cpp)
-
-	SET(LIBS
-		nvtt
-		${QT_QTCORE_LIBRARY}
-		${QT_QTGUI_LIBRARY}
-		${QT_QTOPENGL_LIBRARY})
-
-	QT4_WRAP_UI(UICS ui/configdialog.ui)
-	QT4_WRAP_CPP(MOCS ui/configdialog.h)
-	#QT4_ADD_RESOURCES(RCCS ui/configdialog.rc)
-
-	ADD_EXECUTABLE(nvcompressui MACOSX_BUNDLE ${SRCS} ${UICS} ${MOCS})
-	TARGET_LINK_LIBRARIES(nvcompressui ${LIBS})
-
-ENDIF(QT4_FOUND) # AND NOT MSVC)
-
--- a/src/nvtt/tools/compress.cpp
+++ b/src/nvtt/tools/compress.cpp
@ -21,20 +21,20 @@
 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 // OTHER DEALINGS IN THE SOFTWARE.

-#include "cmdline.h"
+#include <nvcore/StrLib.h>
+#include <nvcore/StdStream.h>
+
+#include <nvimage/Image.h>
+#include <nvimage/DirectDrawSurface.h>

 #include <nvtt/nvtt.h>

-#include <nvimage/Image.h>    // @@ It might be a good idea to use FreeImage directly instead of ImageIO.
-#include <nvimage/ImageIO.h>
-#include <nvimage/FloatImage.h>
-#include <nvimage/DirectDrawSurface.h>
+#include "cmdline.h"

-#include <nvcore/Ptr.h>
-#include <nvcore/StrLib.h>
-#include <nvcore/StdStream.h>
-#include <nvcore/FileSystem.h>
-#include <nvcore/Timer.h>
+#include <time.h> // clock
+
+//#define WINDOWS_LEAN_AND_MEAN
+//#include <windows.h> // TIMER


 struct MyOutputHandler : public nvtt::OutputHandler
@ -141,17 +141,12 @@ int main(int argc, char *argv[])
 	bool noMipmaps = false;
 	bool fast = false;
 	bool nocuda = false;
+	bool silent = false;
 	bool bc1n = false;
 	nvtt::Format format = nvtt::Format_BC1;
-	bool premultiplyAlpha = false;
-	nvtt::MipmapFilter mipmapFilter = nvtt::MipmapFilter_Box;
-	bool loadAsFloat = false;

 	const char * externalCompressor = NULL;

-	bool silent = false;
-	bool dds10 = false;
-
 	nv::Path input;
 	nv::Path output;

@ -186,23 +181,6 @@ int main(int argc, char *argv[])
 		{
 			noMipmaps = true;
 		}
-		else if (strcmp("-premula", argv[i]) == 0)
-		{
-			premultiplyAlpha = true;
-		}
-		else if (strcmp("-mipfilter", argv[i]) == 0)
-		{
-			if (i+1 == argc) break;
-			i++;
-
-			if (strcmp("box", argv[i]) == 0) mipmapFilter = nvtt::MipmapFilter_Box;
-			else if (strcmp("triangle", argv[i]) == 0) mipmapFilter = nvtt::MipmapFilter_Triangle;
-			else if (strcmp("kaiser", argv[i]) == 0) mipmapFilter = nvtt::MipmapFilter_Kaiser;
-		}
-		else if (strcmp("-float", argv[i]) == 0)
-		{
-			loadAsFloat = true;
-		}

 		// Compression options.
 		else if (strcmp("-fast", argv[i]) == 0)
@ -260,15 +238,11 @@ int main(int argc, char *argv[])
 			}
 		}

-		// Output options
+		// Misc options
 		else if (strcmp("-silent", argv[i]) == 0)
 		{
 			silent = true;
 		}
-		else if (strcmp("-dds10", argv[i]) == 0)
-		{
-			dds10 = true;
-		}

 		else if (argv[i][0] != '-')
 		{
@ -300,16 +274,13 @@ int main(int argc, char *argv[])
 		printf("usage: nvcompress [options] infile [outfile]\n\n");
 		
 		printf("Input options:\n");
-		printf("  -color     \tThe input image is a color map (default).\n");
-		printf("  -alpha     \tThe input image has an alpha channel used for transparency.\n");
-		printf("  -normal    \tThe input image is a normal map.\n");
-		printf("  -tonormal  \tConvert input to normal map.\n");
-		printf("  -clamp     \tClamp wrapping mode (default).\n");
-		printf("  -repeat    \tRepeat wrapping mode.\n");
-		printf("  -nomips    \tDisable mipmap generation.\n");
-		printf("  -premula   \tPremultiply alpha into color channel.\n");
-		printf("  -mipfilter \tMipmap filter. One of the following: box, triangle, kaiser.\n");
-		printf("  -float     \tLoad as floating point image.\n\n");
+		printf("  -color   \tThe input image is a color map (default).\n");
+		printf("  -alpha     \tThe input image has an alpha channel used for transparency.\n");		
+		printf("  -normal  \tThe input image is a normal map.\n");
+		printf("  -tonormal\tConvert input to normal map.\n");
+		printf("  -clamp   \tClamp wrapping mode (default).\n");
+		printf("  -repeat  \tRepeat wrapping mode.\n");
+		printf("  -nomips  \tDisable mipmap generation.\n\n");

 		printf("Compression options:\n");
 		printf("  -fast    \tFast compression.\n");
@ -324,19 +295,10 @@ int main(int argc, char *argv[])
 		printf("  -bc4     \tBC4 format (ATI1)\n");
 		printf("  -bc5     \tBC5 format (3Dc/ATI2)\n\n");
 		
-		printf("Output options:\n");
-		printf("  -silent  \tDo not output progress messages\n");
-		printf("  -dds10   \tUse DirectX 10 DDS format\n\n");
-
 		return EXIT_FAILURE;
 	}

-	// Make sure input file exists.
-	if (!nv::FileSystem::exists(input.str()))
-	{
-		fprintf(stderr, "The file '%s' does not exist.\n", input.str());
-		return 1;
-	}
+	// @@ Make sure input file exists.
 	
 	// Set input options.
 	nvtt::InputOptions inputOptions;
@ -378,7 +340,7 @@ int main(int argc, char *argv[])
 		{
 			for (uint m = 0; m < mipmapCount; m++)
 			{
-				dds.mipmap(&mipmap, f, m);	// @@ Load as float.
+				dds.mipmap(&mipmap, f, m);
 				
 				inputOptions.setMipmapData(mipmap.pixels(), mipmap.width(), mipmap.height(), 1, f, m);
 			}
@ -386,42 +348,16 @@ int main(int argc, char *argv[])
 	}
 	else
 	{
-		if (nv::strCaseCmp(input.extension(), ".exr") == 0)
+		// Regular image.
+		nv::Image image;
+		if (!image.load(input))
 		{
-			loadAsFloat = true;
-		}
-
-		if (loadAsFloat)
-		{
-			nv::AutoPtr<nv::FloatImage> image(nv::ImageIO::loadFloat(input));
-
-			if (image == NULL)
-			{
-				fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str());
-				return EXIT_FAILURE;
-			}
-			
-			inputOptions.setFormat(nvtt::InputFormat_RGBA_32F);
-			inputOptions.setTextureLayout(nvtt::TextureType_2D, image->width(), image->height());
-
-			for (uint i = 0; i < image->componentNum(); i++)
-			{
-				inputOptions.setMipmapChannelData(image->channel(i), i, image->width(), image->height());
-			}
-		}
-		else
-		{
-			// Regular image.
-			nv::Image image;
-			if (!image.load(input))
-			{
-				fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str());
-				return 1;
-			}
-			
-			inputOptions.setTextureLayout(nvtt::TextureType_2D, image.width(), image.height());
-			inputOptions.setMipmapData(image.pixels(), image.width(), image.height());
+			fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str());
+			return EXIT_FAILURE;
 		}
+		
+		inputOptions.setTextureLayout(nvtt::TextureType_2D, image.width(), image.height());
+		inputOptions.setMipmapData(image.pixels(), image.width(), image.height());
 	}

 	if (wrapRepeat)
@ -460,31 +396,8 @@ int main(int argc, char *argv[])
 		inputOptions.setMipmapGeneration(false);
 	}

-	if (premultiplyAlpha)
-	{
-		inputOptions.setPremultiplyAlpha(true);
-		inputOptions.setAlphaMode(nvtt::AlphaMode_Premultiplied);
-	}
-	
-	inputOptions.setMipmapFilter(mipmapFilter);
-
 	nvtt::CompressionOptions compressionOptions;
 	compressionOptions.setFormat(format);
-	
-	if (format == nvtt::Format_BC2) {
-		// Dither alpha when using BC2.
-		compressionOptions.setQuantization(false, true, false);
-	}
-
-	if (format == nvtt::Format_RGBA)
-	{
-		// @@ Edit this to choose the desired pixel format:
-	//	compressionOptions.setPixelType(nvtt::PixelType_Float);
-	//	compressionOptions.setPixelFormat(16, 16, 16, 16);
-	//	compressionOptions.setPixelType(nvtt::PixelType_UnsignedNorm);
-	//	compressionOptions.setPixelFormat(16, 0, 0, 0);
-	}
-
 	if (fast)
 	{
 		compressionOptions.setQuality(nvtt::Quality_Fastest);
@ -515,11 +428,11 @@ int main(int argc, char *argv[])
 		return EXIT_FAILURE;
 	}

-	nvtt::Context context;
-	context.enableCudaAcceleration(!nocuda);
+	nvtt::Compressor compressor;
+	compressor.enableCudaAcceleration(!nocuda);

 	printf("CUDA acceleration ");
-	if (context.isCudaAccelerationEnabled())
+	if (compressor.isCudaAccelerationEnabled())
 	{
 		printf("ENABLED\n\n");
 	}
@ -528,7 +441,7 @@ int main(int argc, char *argv[])
 		printf("DISABLED\n\n");
 	}
 	
-	outputHandler.setTotal(context.estimateSize(inputOptions, compressionOptions));
+	outputHandler.setTotal(compressor.estimateSize(inputOptions, compressionOptions));
 	outputHandler.setDisplayProgress(!silent);

 	nvtt::OutputOptions outputOptions;
@ -536,25 +449,19 @@ int main(int argc, char *argv[])
 	outputOptions.setOutputHandler(&outputHandler);
 	outputOptions.setErrorHandler(&errorHandler);
 	
-	if (dds10)
-	{
-		outputOptions.setContainer(nvtt::Container_DDS10);
-	}
-
 //	printf("Press ENTER.\n");
 //	fflush(stdout);
 //	getchar();

-	Timer timer;
-	timer.start();
+	clock_t start = clock();
 	
-	if (!context.process(inputOptions, compressionOptions, outputOptions))
+	if (!compressor.process(inputOptions, compressionOptions, outputOptions))
 	{
 		return EXIT_FAILURE;
 	}
-	timer.stop();

-	printf("\rtime taken: %.3f seconds\n", timer.elapsed());
+	clock_t end = clock();
+	printf("\rtime taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
 	
 	return EXIT_SUCCESS;
 }
--- a/src/nvtt/tools/ui/configdialog.cpp
+++ b/src/nvtt/tools/ui/configdialog.cpp
--- a/src/nvtt/tools/ui/configdialog.h
+++ b/src/nvtt/tools/ui/configdialog.h
--- a/src/nvtt/tools/ui/configdialog.ui
+++ b/src/nvtt/tools/ui/configdialog.ui
@ -12,6 +12,9 @@
  <property name="windowTitle" >
   <string>NVIDIA Texture Tools</string>
  </property>
+  <property name="windowIcon" >
+   <iconset/>
+  </property>
  <property name="sizeGripEnabled" >
   <bool>true</bool>
  </property>
--- a/src/nvtt/tools/decompress.cpp
+++ b/src/nvtt/tools/decompress.cpp
@ -31,161 +31,41 @@

 #include "cmdline.h"

-#include <time.h> // clock
-
 int main(int argc, char *argv[])
 {
 	MyAssertHandler assertHandler;
 	MyMessageHandler messageHandler;

-	bool forcenormal = false;
-	bool mipmaps = false;
-	bool faces = false;
-	bool savePNG = false;
-
-	nv::Path input;
-	nv::Path output;
-
-	// Parse arguments.
-	for (int i = 1; i < argc; i++)
- 	{
-		if (strcmp("-forcenormal", argv[i]) == 0)
-		{
-			forcenormal = true;
-		}
-		else if (strcmp("-mipmaps", argv[i]) == 0)
-		{
-			mipmaps = true;
-		}
-		else if (strcmp("-faces", argv[i]) == 0)
-		{
-			faces = true;
-		}
-		else if (strcmp("-format", argv[i]) == 0)
-		{
-			if (i+1 == argc) break;
-			i++;
-
-#ifdef HAVE_PNG
-			if (strcmp("png", argv[i]) == 0) savePNG = true;
-			else 
-#endif
-			if (strcmp("tga", argv[i]) == 0) savePNG = false;
-			else
-			{
-				fprintf(stderr, "Unsupported output format '%s', defaulting to 'tga'.\n", argv[i]);
-				savePNG = false;
-			}
-		}
-		else if (argv[i][0] != '-')
-		{
-			input = argv[i];
-
-			if (i+1 < argc && argv[i+1][0] != '-')
-			{
-				output = argv[i+1];
-			}
-			else
-			{
-				output.copy(input.str());
-			}
-
-			break;
-		}
-	}
-	
-	printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n");
-
-	if (input.isNull())
+	if (argc != 2)
 	{
-		printf("usage: nvdecompress [options] infile [outfile]\n\n");
-
-		printf("Note: the .tga or .png extension is forced on outfile\n\n");
-
-		printf("Input options:\n");
-		printf("  -forcenormal    \tThe input image is a normal map.\n");
-		printf("  -mipmaps        \tDecompress all mipmaps.\n");
-		printf("  -faces          \tDecompress all faces.\n");
-		printf("  -format <format>\tOutput format ('tga' or 'png').\n");
-
- 		return 1;
- 	}
+		printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n");
+		printf("usage: nvdecompress 'ddsfile'\n\n");
+		return 1;
+	}

 	// Load surface.
-	nv::DirectDrawSurface dds(input);
+	nv::DirectDrawSurface dds(argv[1]);
 	if (!dds.isValid())
 	{
-		fprintf(stderr, "The file '%s' is not a valid DDS file.\n", input.str());
-		return 1;
-	}
-
-	if (!dds.isSupported() || dds.isTexture3D())
-	{
-		fprintf(stderr, "The file '%s' is not a supported DDS file.\n", input.str());
+		printf("The file '%s' is not a valid DDS file.\n", argv[1]);
 		return 1;
 	}
 	
-	uint faceCount;
-	if (dds.isTexture2D())
-	{
-		faceCount = 1;
-	}
-	else
-	{
-		nvCheck(dds.isTextureCube());
-		faceCount = 6;
+	nv::Path name(argv[1]);
+	name.stripExtension();
+	name.append(".tga");
+	
+	nv::StdOutputStream stream(name.str());
+	if (stream.isError()) {
+		printf("Error opening '%s' for writting\n", name.str());
+		return 1;
 	}
 	
-	uint mipmapCount = dds.mipmapCount();
-	
-	clock_t start = clock();
- 
-	// apply arguments
-	if (forcenormal)
-	{
-		dds.setNormalFlag(true);
-	}
-	if (!faces)
-	{
-		faceCount = 1;
-	}
-	if (!mipmaps)
-	{
-		mipmapCount = 1;
-	}
+	// @@ TODO: Add command line options to output mipmaps, cubemap faces, etc.
+	nv::Image img;
+	dds.mipmap(&img, 0, 0); // get first image
+	nv::ImageIO::saveTGA(stream, &img);

-	nv::Image mipmap;	
-	nv::Path name;
-
-	// strip extension, we force the tga extension
-	output.stripExtension();
-
-	// extract faces and mipmaps
-	for (uint f = 0; f < faceCount; f++)
-	{
-		for (uint m = 0; m < mipmapCount; m++)
-		{
-			dds.mipmap(&mipmap, f, m);
-	
-			// set output filename, if we are doing faces and/or mipmaps
-			name.copy(output);
-			if (faces) name.appendFormat("_face%d", f);
-			if (mipmaps) name.appendFormat("_mipmap%d", m);
-			name.append(savePNG ? ".png" : ".tga");
-			
-			nv::StdOutputStream stream(name.str());
-			if (stream.isError()) {
-				fprintf(stderr, "Error opening '%s' for writting\n", name.str());
-				return 1;
-			}
-			
-			nv::ImageIO::save(name, stream, &mipmap);
-		}
-	}
-
-	clock_t end = clock();
-	printf("\rtime taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
-	
 	return 0;
 }

--- a/src/nvtt/tools/ui/main.cpp
+++ b/src/nvtt/tools/ui/main.cpp
--- a/src/nvtt/tools/nvtt-thumbnailer.schema.in
+++ b/src/nvtt/tools/nvtt-thumbnailer.schema.in
@ -1,26 +0,0 @@
-<gconfschemafile>
-  <schemalist>
-<schema>
-<key>/schemas/desktop/gnome/thumbnailers/image@x-dds/enable</key>
-<applyto>/desktop/gnome/thumbnailers/image@x-dds/enable</applyto>
-<owner>nvtt-thumbnailer</owner>
-<type>bool</type>
-<default>true</default>
-<locale name="C">
-<short></short>
-<long></long>
-</locale>
-</schema>
-<schema>
-<key>/schemas/desktop/gnome/thumbnailers/image@x-dds/command</key>
-<applyto>/desktop/gnome/thumbnailers/image@x-dds/command</applyto>
-<owner>nvtt-thumbnailer</owner>
-<type>string</type>
-<default>@CMAKE_INSTALL_PREFIX@/bin/nv-gnome-thumbnailer -s %s %i %o</default>
-<locale name="C">
-<short></short>
-<long></long>
-</locale>
-</schema>
-  </schemalist>
-</gconfschemafile>
--- a/src/nvtt/tools/resize.cpp
+++ b/src/nvtt/tools/resize.cpp
@ -176,7 +176,7 @@ int main(int argc, char *argv[])
 	result->setFormat(nv::Image::Format_ARGB);

 	nv::StdOutputStream stream(output);
-	nv::ImageIO::save(output, stream, result.ptr());
+	nv::ImageIO::saveTGA(stream, result.ptr());	// @@ Add generic save function. Add support for png too.
 	
 	return 0;
 }
--- a/src/nvtt/tools/thumbnailer.cpp
+++ b/src/nvtt/tools/thumbnailer.cpp
@ -1,158 +0,0 @@
-// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
-// 
-// Permission is hereby granted, free of charge, to any person
-// obtaining a copy of this software and associated documentation
-// files (the "Software"), to deal in the Software without
-// restriction, including without limitation the rights to use,
-// copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following
-// conditions:
-// 
-// The above copyright notice and this permission notice shall be
-// included in all copies or substantial portions of the Software.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-// OTHER DEALINGS IN THE SOFTWARE.
-
-#include <nvcore/Ptr.h>
-#include <nvcore/StrLib.h>
-#include <nvcore/StdStream.h>
-#include <nvcore/Containers.h>
-
-#include <nvimage/Image.h>
-#include <nvimage/ImageIO.h>
-#include <nvimage/FloatImage.h>
-#include <nvimage/Filter.h>
-#include <nvimage/DirectDrawSurface.h>
-
-#include <nvmath/Color.h>
-#include <nvmath/Vector.h>
-
-#include <math.h>
-
-#include "cmdline.h"
-
-static bool loadImage(nv::Image & image, const char * fileName)
-{
-	if (nv::strCaseCmp(nv::Path::extension(fileName), ".dds") == 0)
-	{
-		nv::DirectDrawSurface dds(fileName);
-		if (!dds.isValid())
-		{
-			fprintf(stderr, "The file '%s' is not a valid DDS file.\n", fileName);
-			return false;
-		}
-		
-		dds.mipmap(&image, 0, 0); // get first image
-	}
-	else
-	{
-		// Regular image.
-		if (!image.load(fileName))
-		{
-			fprintf(stderr, "The file '%s' is not a supported image type.\n", fileName);
-			return false;
-		}
-	}
-
-	return true;
-}
-
-
-int main(int argc, char *argv[])
-{
-	//MyAssertHandler assertHandler;
-	MyMessageHandler messageHandler;
-
-	float gamma = 2.2f;
-	nv::Path input;
-	nv::Path output;
-	int size = 128;
-	
-	// Parse arguments.
-	for (int i = 1; i < argc; i++)
-	{
-		// Input options.
-		if (strcmp("-s", argv[i]) == 0)
-		{
-			if (i+1 < argc && argv[i+1][0] != '-') {
-				size = (int)atoi(argv[i+1]);
-				i++;
-			}
-		}
-		else if (argv[i][0] != '-')
-		{
-			input = argv[i];
-
-			if (i+1 < argc && argv[i+1][0] != '-') {
-				output = argv[i+1];
-			}
-			else
-			{
-				fprintf(stderr, "No output filename.\n");
-				return 1;
-			}
-
-			break;
-		}
-	}
-
-	if (input.isNull() || output.isNull())
-	{
-		printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n");	
-		
-		printf("usage: nv-gnome-thumbnailer [options] input output\n\n");
-		
-		printf("Options:\n");
-		printf("  -s size\tThumbnail size (default = 128)\n");
-
-		return 1;
-	}
-	
-	nv::Image image;
-	if (!loadImage(image, input)) return 1;
-
-	nv::ImageIO::ImageMetaData metaData;
-	metaData.tagMap.add("Thumb::Image::Width", nv::StringBuilder().number (image.width()));
-	metaData.tagMap.add("Thumb::Image::Height", nv::StringBuilder().number (image.height()));
-
-	if ((image.width() > size) || (image.height() > size))
-	{
-		nv::FloatImage fimage(&image);
-		fimage.toLinear(0, 3, gamma);
-
-		uint thumbW, thumbH;
-		if (image.width() > image.height())
-		{
-			thumbW = size;
-			thumbH = uint ((float (image.height()) / float (image.width())) * size);
-		}
-		else
-		{
-			thumbW = uint ((float (image.width()) / float (image.height())) * size);
-			thumbH = size;
-		}
-		nv::AutoPtr<nv::FloatImage> fresult(fimage.resize(nv::BoxFilter(), thumbW, thumbH, nv::FloatImage::WrapMode_Clamp));
-		
-		nv::AutoPtr<nv::Image> result(fresult->createImageGammaCorrect(gamma));
-		result->setFormat(nv::Image::Format_ARGB);
-
-		nv::StdOutputStream stream(output);
-		nv::ImageIO::save(output, stream, result.ptr(), &metaData);
-	}
-	else
-	{
-		nv::StdOutputStream stream(output);
-		nv::ImageIO::save(output, stream, &image, &metaData);
-	}
-	
-	return 0;
-}
-