Work in progress.

Merging squish into nvtt. Using squish only to find endpoints, do discrete refinement afterwards.
2010-11-09 03:38:03 +00:00 · 2010-11-09 03:38:03 +00:00 · 49482d1441
commit 49482d1441
parent c532ffb34e
25 changed files with 1726 additions and 1008 deletions
--- a/project/vc9/nvconfig.h
+++ b/project/vc9/nvconfig.h
@ -12,10 +12,10 @@
 #endif
 #if !defined(_M_X64)
-#define HAVE_FREEIMAGE
+//#define HAVE_FREEIMAGE
-//#define HAVE_PNG
+#define HAVE_PNG
-//#define HAVE_JPEG
+#define HAVE_JPEG
-//#define HAVE_TIFF
+#define HAVE_TIFF
 #endif
 #endif // NV_CONFIG
--- a/project/vc9/nvcore/nvcore.vcproj
+++ b/project/vc9/nvcore/nvcore.vcproj
@ -288,18 +288,10 @@
 	<References>
 	</References>
 	<Files>
 		<File
 			RelativePath="..\..\..\src\nvcore\Algorithms.h"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvcore\Array.h"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvcore\Containers.h"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvcore\Debug.cpp"
 			>
--- a/project/vc9/nvimage/nvimage.vcproj
+++ b/project/vc9/nvimage/nvimage.vcproj
@ -287,118 +287,102 @@
 	<References>
 	</References>
 	<Files>
-		<Filter
+		<File
-			Name="Source Files"
+			RelativePath="..\..\..\src\nvimage\BlockDXT.cpp"
 			Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
 			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
 			>
-			<File
+		</File>
-				RelativePath="..\..\..\src\nvimage\BlockDXT.cpp"
+		<File
-				>
+			RelativePath="..\..\..\src\nvimage\BlockDXT.h"
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvimage\ColorBlock.cpp"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvimage\ColorSpace.cpp"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvimage\DirectDrawSurface.cpp"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvimage\Filter.cpp"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvimage\FloatImage.cpp"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvmath\Half.cpp"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvimage\Image.cpp"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvimage\ImageIO.cpp"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvimage\NormalMap.cpp"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvimage\Quantize.cpp"
 				>
 			</File>
 		</Filter>
 		<Filter
 			Name="Header Files"
 			Filter="h;hpp;hxx;hm;inl;inc;xsd"
 			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
 			>
-			<File
+		</File>
-				RelativePath="..\..\..\src\nvimage\BlockDXT.h"
+		<File
-				>
+			RelativePath="..\..\..\src\nvimage\ColorBlock.cpp"
-			</File>
+			>
-			<File
+		</File>
-				RelativePath="..\..\..\src\nvimage\ColorBlock.h"
+		<File
-				>
+			RelativePath="..\..\..\src\nvimage\ColorBlock.h"
-			</File>
+			>
-			<File
+		</File>
-				RelativePath="..\..\..\src\nvimage\ColorSpace.h"
+		<File
-				>
+			RelativePath="..\..\..\src\nvimage\DirectDrawSurface.cpp"
-			</File>
+			>
-			<File
+		</File>
-				RelativePath="..\..\..\src\nvimage\DirectDrawSurface.h"
+		<File
-				>
+			RelativePath="..\..\..\src\nvimage\DirectDrawSurface.h"
-			</File>
+			>
-			<File
+		</File>
-				RelativePath="..\..\..\src\nvimage\Filter.h"
+		<File
-				>
+			RelativePath="..\..\..\src\nvimage\Filter.cpp"
-			</File>
+			>
-			<File
+		</File>
-				RelativePath="..\..\..\src\nvimage\FloatImage.h"
+		<File
-				>
+			RelativePath="..\..\..\src\nvimage\Filter.h"
-			</File>
+			>
-			<File
+		</File>
-				RelativePath="..\..\..\src\nvmath\Half.h"
+		<File
-				>
+			RelativePath="..\..\..\src\nvimage\FloatImage.cpp"
-			</File>
+			>
-			<File
+		</File>
-				RelativePath="..\..\..\src\nvimage\Image.h"
+		<File
-				>
+			RelativePath="..\..\..\src\nvimage\FloatImage.h"
-			</File>
+			>
-			<File
+		</File>
-				RelativePath="..\..\..\src\nvimage\ImageIO.h"
+		<File
-				>
+			RelativePath="..\..\..\src\nvmath\Half.cpp"
-			</File>
+			>
-			<File
+		</File>
-				RelativePath="..\..\..\src\nvimage\NormalMap.h"
+		<File
-				>
+			RelativePath="..\..\..\src\nvmath\Half.h"
-			</File>
+			>
-			<File
+		</File>
-				RelativePath="..\..\..\src\nvimage\nvimage.h"
+		<File
-				>
+			RelativePath="..\..\..\src\nvimage\Image.cpp"
-			</File>
+			>
-			<File
+		</File>
-				RelativePath="..\..\..\src\nvimage\PsdFile.h"
+		<File
-				>
+			RelativePath="..\..\..\src\nvimage\Image.h"
-			</File>
+			>
-			<File
+		</File>
-				RelativePath="..\..\..\src\nvimage\Quantize.h"
+		<File
-				>
+			RelativePath="..\..\..\src\nvimage\ImageIO.cpp"
-			</File>
+			>
-			<File
+		</File>
-				RelativePath="..\..\..\src\nvimage\TgaFile.h"
+		<File
-				>
+			RelativePath="..\..\..\src\nvimage\ImageIO.h"
-			</File>
+			>
-		</Filter>
+		</File>
 		<File
 			RelativePath="..\..\..\src\nvimage\NormalMap.cpp"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvimage\NormalMap.h"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvimage\nvimage.h"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvimage\PixelFormat.h"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvimage\PsdFile.h"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvimage\Quantize.cpp"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvimage\Quantize.h"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvimage\TgaFile.h"
 			>
 		</File>
 	</Files>
 	<Globals>
 	</Globals>
--- a/project/vc9/nvmath/nvmath.vcproj
+++ b/project/vc9/nvmath/nvmath.vcproj
@ -288,62 +288,62 @@
 	<References>
 	</References>
 	<Files>
-		<Filter
+		<File
-			Name="Source Files"
+			RelativePath="..\..\..\src\nvmath\Box.h"
 			Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
 			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
 			>
-			<File
+		</File>
-				RelativePath="..\..\..\src\nvmath\Fitting.cpp"
+		<File
-				>
+			RelativePath="..\..\..\src\nvmath\Color.h"
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvmath\Half.cpp"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvmath\Plane.cpp"
 				>
 			</File>
 		</Filter>
 		<Filter
 			Name="Header Files"
 			Filter="h;hpp;hxx;hm;inl;inc;xsd"
 			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
 			>
-			<File
+		</File>
-				RelativePath="..\..\..\src\nvmath\Box.h"
+		<File
-				>
+			RelativePath="..\..\..\src\nvmath\Fitting.cpp"
-			</File>
+			>
-			<File
+		</File>
-				RelativePath="..\..\..\src\nvmath\Color.h"
+		<File
-				>
+			RelativePath="..\..\..\src\nvmath\Fitting.h"
-			</File>
+			>
-			<File
+		</File>
-				RelativePath="..\..\..\src\nvmath\Fitting.h"
+		<File
-				>
+			RelativePath="..\..\..\src\nvmath\Half.cpp"
-			</File>
+			>
-			<File
+		</File>
-				RelativePath="..\..\..\src\nvmath\Half.h"
+		<File
-				>
+			RelativePath="..\..\..\src\nvmath\Half.h"
-			</File>
+			>
-			<File
+		</File>
-				RelativePath="..\..\..\src\nvmath\Matrix.h"
+		<File
-				>
+			RelativePath="..\..\..\src\nvmath\Matrix.h"
-			</File>
+			>
-			<File
+		</File>
-				RelativePath="..\..\..\src\nvmath\nvmath.h"
+		<File
-				>
+			RelativePath="..\..\..\src\nvmath\nvmath.h"
-			</File>
+			>
-			<File
+		</File>
-				RelativePath="..\..\..\src\nvmath\Plane.h"
+		<File
-				>
+			RelativePath="..\..\..\src\nvmath\Plane.cpp"
-			</File>
+			>
-			<File
+		</File>
-				RelativePath="..\..\..\src\nvmath\Vector.h"
+		<File
-				>
+			RelativePath="..\..\..\src\nvmath\Plane.h"
-			</File>
+			>
-		</Filter>
+		</File>
 		<File
 			RelativePath="..\..\..\src\nvmath\SimdVector.h"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvmath\SimdVector_SSE.h"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvmath\SimdVector_VE.h"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvmath\Vector.h"
 			>
 		</File>
 	</Files>
 	<Globals>
 	</Globals>
--- a/project/vc9/nvtt.sln
+++ b/project/vc9/nvtt.sln
@ -393,7 +393,8 @@ Global
 		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Mixed Platforms.Build.0 = Debug|x64
 		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Win32.ActiveCfg = Debug|Win32
 		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Win32.Build.0 = Debug|Win32
-		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|x64.ActiveCfg = Debug|Win32
+		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|x64.ActiveCfg = Debug|x64
 		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|x64.Build.0 = Debug|x64
 		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release|x64
 		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Mixed Platforms.Build.0 = Release|x64
 		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Win32.ActiveCfg = Release|Win32
--- a/project/vc9/nvtt/nvtt.vcproj
+++ b/project/vc9/nvtt/nvtt.vcproj
@ -45,7 +45,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="..;..\..\..\src;..\..\..\extern\gnuwin32\include;$(CUDA_INC_PATH)"
+				AdditionalIncludeDirectories="..;..\..\..\src;..\..\..\extern\gnuwin32\include;$(CUDA_PATH)\include"
 				PreprocessorDefinitions="WIN32;_DEBUG;_WINDOWS;_USRDLL;NVTT_EXPORTS;NVTT_SHARED;HAVE_CUDA;__SSE2__;__SSE__;__MMX__"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
@ -68,7 +68,7 @@
 				AdditionalDependencies="libpng.lib jpeg.lib tiff.lib FreeImage.lib cudart.lib"
 				OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).dll"
 				LinkIncremental="2"
-				AdditionalLibraryDirectories="$(GnuWinDir)\lib; $(FreeImageDir); &quot;$(CUDA_LIB_PATH)\..\lib&quot;"
+				AdditionalLibraryDirectories="$(GnuWinDir)\lib; $(FreeImageDir); &quot;$(CUDA_PATH)\lib\Win32&quot;"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				RandomizedBaseAddress="1"
@ -127,7 +127,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="..;..\..\..\src;..\..\..\extern\gnuwin32\include;$(CUDA_INC_PATH)"
+				AdditionalIncludeDirectories="..;..\..\..\src;..\..\..\extern\gnuwin32\include;$(CUDA_PATH)\include"
 				PreprocessorDefinitions="WIN32;_DEBUG;_WINDOWS;_USRDLL;NVTT_EXPORTS;NVTT_SHARED;HAVE_CUDA;__SSE2__;__SSE__;__MMX__"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
@ -150,7 +150,7 @@
 				AdditionalDependencies="cudart.lib"
 				OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).dll"
 				LinkIncremental="2"
-				AdditionalLibraryDirectories="&quot;$(CUDA_LIB_PATH)\..\lib64&quot;"
+				AdditionalLibraryDirectories="&quot;$(CUDA_PATH)\lib\x64&quot;"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				RandomizedBaseAddress="1"
@ -212,7 +212,7 @@
 				FavorSizeOrSpeed="0"
 				OmitFramePointers="true"
 				EnableFiberSafeOptimizations="true"
-				AdditionalIncludeDirectories="..;..\..\..\src;..\..\..\extern\gnuwin32\include;$(CUDA_INC_PATH)"
+				AdditionalIncludeDirectories="..;..\..\..\src;..\..\..\extern\gnuwin32\include;$(CUDA_PATH)\include"
 				PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;NVTT_EXPORTS;NVTT_SHARED;HAVE_CUDA;__SSE2__;__SSE__;__MMX__"
 				StringPooling="true"
 				RuntimeLibrary="2"
@ -220,7 +220,7 @@
 				EnableEnhancedInstructionSet="2"
 				UsePrecompiledHeader="0"
 				WarningLevel="3"
-				DebugInformationFormat="0"
+				DebugInformationFormat="3"
 			/>
 			<Tool
 				Name="VCManagedResourceCompilerTool"
@ -236,7 +236,7 @@
 				AdditionalDependencies="libpng.lib jpeg.lib tiff.lib FreeImage.lib cudart.lib"
 				OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).dll"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(GnuWinDir)\lib; $(FreeImageDir); &quot;$(CUDA_LIB_PATH)\..\lib&quot;"
+				AdditionalLibraryDirectories="$(GnuWinDir)\lib; $(FreeImageDir); &quot;$(CUDA_PATH)\lib\Win32&quot;"
 				SubSystem="2"
 				OptimizeReferences="2"
 				EnableCOMDATFolding="2"
@ -300,14 +300,14 @@
 				EnableIntrinsicFunctions="true"
 				OmitFramePointers="true"
 				WholeProgramOptimization="true"
-				AdditionalIncludeDirectories="..;..\..\..\src;..\..\..\extern\gnuwin32\include;$(CUDA_INC_PATH)"
+				AdditionalIncludeDirectories="..;..\..\..\src;..\..\..\extern\gnuwin32\include;$(CUDA_PATH)\include"
 				PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;NVTT_EXPORTS;NVTT_SHARED;HAVE_CUDA;__SSE2__;__SSE__;__MMX__"
 				StringPooling="true"
 				RuntimeLibrary="2"
 				EnableFunctionLevelLinking="true"
 				UsePrecompiledHeader="0"
 				WarningLevel="3"
-				DebugInformationFormat="0"
+				DebugInformationFormat="3"
 			/>
 			<Tool
 				Name="VCManagedResourceCompilerTool"
@ -323,7 +323,7 @@
 				AdditionalDependencies="cudart.lib"
 				OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).dll"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="&quot;$(CUDA_LIB_PATH)\..\lib64&quot;"
+				AdditionalLibraryDirectories="&quot;$(CUDA_PATH)\lib\x64&quot;"
 				SubSystem="2"
 				OptimizeReferences="2"
 				EnableCOMDATFolding="2"
@ -677,14 +677,22 @@
 	</References>
 	<Files>
 		<Filter
-			Name="Source Files"
+			Name="Resource Files"
-			Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
+			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
-			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
+			UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
 			>
 			<File
-				RelativePath="..\..\..\src\nvtt\CompressionOptions.cpp"
+				RelativePath=".\nvtt.rc"
 				>
 			</File>
 			<File
 				RelativePath=".\resource.h"
 				>
 			</File>
 		</Filter>
 		<Filter
 			Name="cuda"
 			>
 			<File
 				RelativePath="..\..\..\src\nvtt\cuda\CompressKernel.cu"
 				>
@ -767,34 +775,6 @@
 					/>
 				</FileConfiguration>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\CompressorDX10.cpp"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\CompressorDX11.cpp"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\CompressorDX9.cpp"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\CompressorDXT.cpp"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\CompressorRGB.cpp"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\CompressorRGBE.cpp"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\Context.cpp"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\cuda\ConvolveKernel.cu"
 				>
@ -867,84 +847,6 @@
 				RelativePath="..\..\..\src\nvtt\cuda\CudaCompressorDXT.cpp"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\cuda\CudaUtils.cpp"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\InputOptions.cpp"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\nvtt.cpp"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\nvtt_wrapper.cpp"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\OptimalCompressDXT.cpp"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\OutputOptions.cpp"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\QuickCompressDXT.cpp"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\SingleColorLookup.cpp"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\TexImage.cpp"
 				>
 			</File>
 		</Filter>
 		<Filter
 			Name="Header Files"
 			Filter="h;hpp;hxx;hm;inl;inc;xsd"
 			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
 			>
 			<File
 				RelativePath="..\..\..\src\nvtt\CompressionOptions.h"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\Compressor.h"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\CompressorDX10.h"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\CompressorDX11.h"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\CompressorDX9.h"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\CompressorDXT.h"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\CompressorRGB.h"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\CompressorRGBE.h"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\Context.h"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\cuda\CudaCompressorDXT.h"
 				>
@ -953,53 +855,143 @@
 				RelativePath="..\..\..\src\nvtt\cuda\CudaMath.h"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\cuda\CudaUtils.cpp"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\cuda\CudaUtils.h"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\InputOptions.h"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\nvtt.h"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\nvtt_wrapper.h"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\OptimalCompressDXT.h"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\OutputOptions.h"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\QuickCompressDXT.h"
 				>
 			</File>
 			<File
 				RelativePath=".\resource.h"
 				>
 			</File>
 			<File
 				RelativePath="..\..\..\src\nvtt\TexImage.h"
 				>
 			</File>
 		</Filter>
-		<Filter
+		<File
-			Name="Resource Files"
+			RelativePath="..\..\..\src\nvtt\CompressionOptions.cpp"
 			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
 			UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
 			>
-			<File
+		</File>
-				RelativePath=".\nvtt.rc"
+		<File
-				>
+			RelativePath="..\..\..\src\nvtt\CompressionOptions.h"
-			</File>
+			>
-		</Filter>
+		</File>
 		<File
 			RelativePath="..\..\..\src\nvtt\Compressor.h"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvtt\CompressorDX10.cpp"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvtt\CompressorDX10.h"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvtt\CompressorDX11.cpp"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvtt\CompressorDX11.h"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvtt\CompressorDX9.cpp"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvtt\CompressorDX9.h"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvtt\CompressorDXT.cpp"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvtt\CompressorDXT.h"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvtt\CompressorRGB.cpp"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvtt\CompressorRGB.h"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvtt\CompressorRGBE.cpp"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvtt\CompressorRGBE.h"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvtt\Context.cpp"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvtt\Context.h"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvtt\InputOptions.cpp"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvtt\InputOptions.h"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvtt\nvtt.cpp"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvtt\nvtt.h"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvtt\nvtt_wrapper.cpp"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvtt\nvtt_wrapper.h"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvtt\OptimalCompressDXT.cpp"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvtt\OptimalCompressDXT.h"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvtt\OutputOptions.cpp"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvtt\OutputOptions.h"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvtt\QuickCompressDXT.cpp"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvtt\QuickCompressDXT.h"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvtt\SingleColorLookup.cpp"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvtt\TexImage.cpp"
 			>
 		</File>
 		<File
 			RelativePath="..\..\..\src\nvtt\TexImage.h"
 			>
 		</File>
 	</Files>
 	<Globals>
 	</Globals>
--- a/src/nvcore/Memory.h
+++ b/src/nvcore/Memory.h
@ -11,6 +11,14 @@
 #include <new>	// new and delete
 #if NV_CC_GNUC
 #   define NV_ALIGN_16 __attribute__ ((__aligned__ (16)))
 #else
 #   define NV_ALIGN_16 __declspec(align(16))
 #endif
 #define NV_OVERRIDE_ALLOC 0
 #if NV_OVERRIDE_ALLOC
--- a/src/nvimage/ColorBlock.cpp
+++ b/src/nvimage/ColorBlock.cpp
@ -458,7 +458,8 @@ float ColorBlock::volume() const
 }*/
-void FloatColorBlock::init(const Image * img, uint x, uint y)
+
 void ColorSet::init(const Image * img, uint x, uint y)
 {
    w = min(4U, img->width() - x);
    h = min(4U, img->height() - y);
@ -485,15 +486,15 @@ void FloatColorBlock::init(const Image * img, uint x, uint y)
    }
 }
-void FloatColorBlock::init(const FloatImage * img, uint x, uint y)
+void ColorSet::init(const FloatImage * img, uint x, uint y)
 {
 }
-void FloatColorBlock::init(const uint * data, uint w, uint h, uint x, uint y)
+void ColorSet::init(const uint * data, uint w, uint h, uint x, uint y)
 {
 }
-void FloatColorBlock::init(const float * data, uint w, uint h, uint x, uint y)
+void ColorSet::init(const float * data, uint w, uint h, uint x, uint y)
 {
 }
--- a/src/nvimage/ColorBlock.h
+++ b/src/nvimage/ColorBlock.h
@ -26,21 +26,8 @@ namespace nv
        void swizzle(uint x, uint y, uint z, uint w); // 0=r, 1=g, 2=b, 3=a, 4=0xFF, 5=0
        bool isSingleColor(Color32 mask = Color32(0xFF, 0xFF, 0xFF, 0x00)) const;
        //uint countUniqueColors() const;
        //Color32 averageColor() const;
        bool hasAlpha() const;
        //void diameterRange(Color32 * start, Color32 * end) const;
        //void luminanceRange(Color32 * start, Color32 * end) const;
        //void boundsRange(Color32 * start, Color32 * end) const;
        //void boundsRangeAlpha(Color32 * start, Color32 * end) const;
        //void sortColorsByAbsoluteValue();
        //void computeRange(const Vector3 & axis, Color32 * start, Color32 * end) const;
        //void sortColors(const Vector3 & axis);
        //float volume() const;
        // Accessors
        const Color32 * colors() const;
@ -93,19 +80,21 @@ namespace nv
    }
-    struct FloatColorBlock
+    struct ColorSet
    {
-        FloatColorBlock() : w(4), h(4) {}
+        ColorSet() : w(4), h(4) {}
-        FloatColorBlock(uint w, uint h) : w(w), h(h) {}
+        ColorSet(uint w, uint h) : w(w), h(h) {}
        void init(const Image * img, uint x, uint y);
        void init(const FloatImage * img, uint x, uint y);
        void init(const uint * data, uint w, uint h, uint x, uint y);
        void init(const float * data, uint w, uint h, uint x, uint y);
-        Vector4 color(uint x, uint y) const { 	nvDebugCheck(x < w && y < h); return colors[y * 4 + x]; }
+        Vector4 color(uint x, uint y) const { nvDebugCheck(x < w && y < h); return colors[y * 4 + x]; }
        Vector4 & color(uint x, uint y) { nvDebugCheck(x < w && y < h); return colors[y * 4 + x]; }
        Vector4 color(uint i) const { nvDebugCheck(i < 16); return colors[i]; }
        Vector4 & color(uint i) { nvDebugCheck(i < 16); return colors[i]; }
        Vector4 colors[16];
        uint w, h;
--- a/src/nvimage/FloatImage.h
+++ b/src/nvimage/FloatImage.h
@ -111,6 +111,9 @@ namespace nv
        float pixel(uint x, uint y, uint c) const;
        float & pixel(uint x, uint y, uint c);
        float pixel(uint idx, uint c) const;
        float & pixel(uint idx, uint c);
        float pixel(uint idx) const;
        float & pixel(uint idx);
@ -197,6 +200,24 @@ namespace nv
        return m_mem[(c * m_height + y) * m_width + x];
    }
    /// Get pixel component.
    inline float FloatImage::pixel(uint idx, uint c) const
    {
        nvDebugCheck(m_mem != NULL);
        nvDebugCheck(idx < uint(m_width*m_height));
        nvDebugCheck(c < m_componentNum);
        return m_mem[c * m_height * m_width + idx];
    }
    /// Get pixel component.
    inline float & FloatImage::pixel(uint idx, uint c)
    {
        nvDebugCheck(m_mem != NULL);
        nvDebugCheck(idx < uint(m_width*m_height));
        nvDebugCheck(c < m_componentNum);
        return m_mem[c * m_height * m_width + idx];
    }
    /// Get pixel component.
    inline float FloatImage::pixel(uint idx) const
    {
--- a/src/nvimage/ImageIO.cpp
+++ b/src/nvimage/ImageIO.cpp
@ -97,6 +97,7 @@ namespace nv
 	#endif // defined(HAVE_FREEIMAGE)
        static FloatImage * loadFloatDDS(Stream & s);
        static bool saveFloatDDS(const char * fileName, Stream & s, const FloatImage * img, uint base_component, uint num_components);
 	} // ImageIO namespace
 } // nv namespace
@ -264,6 +265,12 @@ bool nv::ImageIO::saveFloat(const char * fileName, Stream & s, const FloatImage
 		return false;
 	}
    const char * extension = Path::extension(fileName);
 	if (strCaseCmp(extension, ".dds") == 0) {
 		return saveFloatDDS(fileName, s, fimage, baseComponent, componentCount);
 	}
 #if defined(HAVE_FREEIMAGE)
 	FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName);
 	if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsWriting(fif)) {
@ -1792,3 +1799,42 @@ FloatImage * nv::ImageIO::loadFloatDDS(Stream & s)
    return NULL;
 }
 bool nv::ImageIO::saveFloatDDS(const char * fileName, Stream & s, const FloatImage * img, uint base_component, uint num_components)
 {
    nvCheck(s.isSaving());
 	nvCheck(!s.isError());
    if (num_components != 4) return false;
    static const uint D3DFMT_A16B16G16R16F = 113;
    DDSHeader header;
    header.setTexture2D();
    header.setWidth(img->width());
    header.setHeight(img->height());
    header.setFormatCode(D3DFMT_A16B16G16R16F);
    // ...
    s << header;
    uint32 * r = (uint32 *)img->channel(base_component + 0);
    uint32 * g = (uint32 *)img->channel(base_component + 1);
    uint32 * b = (uint32 *)img->channel(base_component + 2);
    uint32 * a = (uint32 *)img->channel(base_component + 3);
    const uint size = img->width() * img->height();
    for (uint i = 0; i < size; i++) {
        uint16 R = half_from_float( *r++ );
        uint16 G = half_from_float( *g++ );
        uint16 B = half_from_float( *b++ );
        uint16 A = half_from_float( *a++ );
        s.serialize(&R, sizeof(uint16));
        s.serialize(&G, sizeof(uint16));
        s.serialize(&B, sizeof(uint16));
        s.serialize(&A, sizeof(uint16));
    }
    return true;
 }
--- a/src/nvimage/PixelFormat.h
+++ b/src/nvimage/PixelFormat.h
@ -85,7 +85,7 @@ namespace nv
            float result;
            int offset = 0;
            do {
-                uint i = offset + f * (float(1 << inbits) - 1);
+                uint i = offset + uint(f * (float(1 << inbits) - 1));
                i = convert(i, inbits, outbits);
                result = float(i) / (float(1 << outbits) - 1);
                offset++;
--- a/src/nvmath/SimdVector.h
+++ b/src/nvmath/SimdVector.h
@ -0,0 +1,24 @@
 // This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
 #include "Vector.h" // Vector3, Vector4
 // Set some reasonable defaults.
 #ifndef NV_USE_ALTIVEC
 #   define NV_USE_ALTIVEC POSH_CPU_PPC
 #endif
 #ifndef NV_USE_SSE
 #   if NV_CPU_X86 || NV_CPU_X86_64
 #       define NV_USE_SSE 2
 #   endif
 #endif
 #if NV_USE_ALTIVEC
 #   include "SimdVector_VE.h"
 #endif
 #if NV_USE_SSE
 #   include "SimdVector_SSE.h"
 #endif
--- a/src/nvmath/SimdVector_SSE.h
+++ b/src/nvmath/SimdVector_SSE.h
@ -0,0 +1,201 @@
 /* -----------------------------------------------------------------------------
 	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
 	Permission is hereby granted, free of charge, to any person obtaining
 	a copy of this software and associated documentation files (the 
 	"Software"), to	deal in the Software without restriction, including
 	without limitation the rights to use, copy, modify, merge, publish,
 	distribute, sublicense, and/or sell copies of the Software, and to 
 	permit persons to whom the Software is furnished to do so, subject to 
 	the following conditions:
 	The above copyright notice and this permission notice shall be included
 	in all copies or substantial portions of the Software.
 	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
 	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
 	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
 	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
 	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   -------------------------------------------------------------------------- */
 #ifndef NV_SIMD_VECTOR_SSE_H
 #define NV_SIMD_VECTOR_SSE_H
 #include <xmmintrin.h>
 #if (NV_USE_SSE > 1)
 #include <emmintrin.h>
 #endif
 namespace nv {
    class SimdVector
    {
        __m128 vec;
        typedef SimdVector const& Arg;
        SimdVector() {}
        explicit SimdVector(float f) : vec(_mm_set1_ps(f)) {}
        explicit SimdVector(__m128 v) : vec(v) {}
        SimdVector(const SimdVector & arg) : vec(arg.vec) {}
        SimdVector & operator=(const SimdVector & arg)
        {
            vec = arg.vec;
            return *this;
        }
        SimdVector(const float * v)
        {
            vec = _mm_load_ps( v );
        }
        SimdVector(float x, float y, float z, float w)
        {
            vec = _mm_setr_ps( x, y, z, w );
        }
        float toFloat() const 
        {
            NV_ALIGN_16 float f;
            _mm_store_ss(&f, vec);
            return f;
        }
        Vector3 toVector3() const
        {
            NV_ALIGN_16 float c[4];
            _mm_store_ps( c, vec );
            return Vector3( c[0], c[1], c[2] );
        }
        Vector4 toVector4() const
        {
            NV_ALIGN_16 float c[4];
            _mm_store_ps( v.components, vec );
            return Vector4( c[0], c[1], c[2], c[3] );
        }
 #define SSE_SPLAT( a ) ((a) | ((a) << 2) | ((a) << 4) | ((a) << 6))
        SimdVector splatX() const { return SimdVector( _mm_shuffle_ps( vec, vec, SSE_SPLAT( 0 ) ) ); }
        SimdVector splatY() const { return SimdVector( _mm_shuffle_ps( vec, vec, SSE_SPLAT( 1 ) ) ); }
        SimdVector splatZ() const { return SimdVector( _mm_shuffle_ps( vec, vec, SSE_SPLAT( 2 ) ) ); }
        SimdVector splatW() const { return SimdVector( _mm_shuffle_ps( vec, vec, SSE_SPLAT( 3 ) ) ); }
 #undef SSE_SPLAT
        SimdVector& operator+=( Arg v )
        {
            vec = _mm_add_ps( vec, v.vec );
            return *this;
        }
        SimdVector& operator-=( Arg v )
        {
            vec = _mm_sub_ps( vec, v.vec );
            return *this;
        }
        SimdVector& operator*=( Arg v )
        {
            vec = _mm_mul_ps( vec, v.vec );
            return *this;
        }
    };
    SimdVector operator+( SimdVector::Arg left, SimdVector::Arg right  )
    {
        return SimdVector( _mm_add_ps( left.vec, right.vec ) );
    }
    SimdVector operator-( SimdVector::Arg left, SimdVector::Arg right  )
    {
        return SimdVector( _mm_sub_ps( left.vec, right.vec ) );
    }
    SimdVector operator*( SimdVector::Arg left, SimdVector::Arg right  )
    {
        return SimdVector( _mm_mul_ps( left.vec, right.vec ) );
    }
    // Returns a*b + c
    SimdVector multiplyAdd( SimdVector::Arg a, SimdVector::Arg b, SimdVector::Arg c )
    {
        return SimdVector( _mm_add_ps( _mm_mul_ps( a.vec, b.vec ), c.vec ) );
    }
    // Returns -( a*b - c )
    SimdVector negativeMultiplySubtract( SimdVector::Arg a, SimdVector::Arg b, SimdVector::Arg c )
    {
        return SimdVector( _mm_sub_ps( c.vec, _mm_mul_ps( a.vec, b.vec ) ) );
    }
    SimdVector reciprocal( SimdVector::Arg v )
    {
        // get the reciprocal estimate
        __m128 estimate = _mm_rcp_ps( v.vec );
        // one round of Newton-Rhaphson refinement
        __m128 diff = _mm_sub_ps( _mm_set1_ps( 1.0f ), _mm_mul_ps( estimate, v.vec ) );
        return SimdVector( _mm_add_ps( _mm_mul_ps( diff, estimate ), estimate ) );
    }
    SimdVector min( SimdVector::Arg left, SimdVector::Arg right )
    {
        return SimdVector( _mm_min_ps( left.vec, right.vec ) );
    }
    SimdVector max( SimdVector::Arg left, SimdVector::Arg right )
    {
        return SimdVector( _mm_max_ps( left.vec, right.vec ) );
    }
    SimdVector truncate( SimdVector::Arg v )
    {
 #if (NV_USE_SSE == 1)
        // convert to ints
        __m128 input = v.vec;
        __m64 lo = _mm_cvttps_pi32( input );
        __m64 hi = _mm_cvttps_pi32( _mm_movehl_ps( input, input ) );
        // convert to floats
        __m128 part = _mm_movelh_ps( input, _mm_cvtpi32_ps( input, hi ) );
        __m128 truncated = _mm_cvtpi32_ps( part, lo );
        // clear out the MMX multimedia state to allow FP calls later
        _mm_empty(); 
        return SimdVector( truncated );
 #else
        // use SSE2 instructions
        return SimdVector( _mm_cvtepi32_ps( _mm_cvttps_epi32( v.vec ) ) );
 #endif
    }
    SimdVector compareEqual( SimdVector::Arg left, SimdVector::Arg right )
    {
        return SimdVector( _mm_cmpeq_ps( left.vec, right.vec ) );
    }
    SimdVector select( SimdVector::Arg off, SimdVector::Arg on, SimdVector::Arg bits )
    {
        __m128 a = _mm_andnot_ps( bits.vec, off.vec );
        __m128 b = _mm_and_ps( bits.vec, on.vec );
        return SimdVector( _mm_or_ps( a, b ) );
    }
    bool compareAnyLessThan( SimdVector::Arg left, SimdVector::Arg right ) 
    {
        __m128 bits = _mm_cmplt_ps( left.vec, right.vec );
        int value = _mm_movemask_ps( bits );
        return value != 0;
    }
 } // namespace nv
 #endif // NV_SIMD_VECTOR_SSE_H
--- a/src/nvmath/SimdVector_VE.h
+++ b/src/nvmath/SimdVector_VE.h
@ -0,0 +1,187 @@
 /* -----------------------------------------------------------------------------
 	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
 	Permission is hereby granted, free of charge, to any person obtaining
 	a copy of this software and associated documentation files (the 
 	"Software"), to	deal in the Software without restriction, including
 	without limitation the rights to use, copy, modify, merge, publish,
 	distribute, sublicense, and/or sell copies of the Software, and to 
 	permit persons to whom the Software is furnished to do so, subject to 
 	the following conditions:
 	The above copyright notice and this permission notice shall be included
 	in all copies or substantial portions of the Software.
 	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
 	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
 	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
 	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
 	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   -------------------------------------------------------------------------- */
 #ifndef NV_SIMD_VECTOR_VE_H
 #define NV_SIMD_VECTOR_VE_H
 #ifndef __APPLE_ALTIVEC__
 #include <altivec.h>
 #undef bool
 #endif
 namespace nv {
    class SimdVector
    {
        vector float vec;
        typedef SimdVector Arg;
        SimdVector() {}
        explicit SimdVector(float v) : vec((vector float)(X)) {}	
        explicit SimdVector(vector float v) : vec(v) {}
        SimdVector(const SimdVector & arg) : vec(arg.vec) {}
        SimdVector& operator=(const SimdVector & arg)
        {
            vec = arg.vec;
            return *this;
        }
        SimdVector(const float * v)
        {
            union { vector float v; float c[4]; } u;
            u.c[0] = v[0];
            u.c[1] = v[1];
            u.c[2] = v[2];
            u.c[3] = v[3];
            vec = u.v;
        }
        SimdVector(float x, float y, float z, float w)
        {
            union { vector float v; float c[4]; } u;
            u.c[0] = x;
            u.c[1] = y;
            u.c[2] = z;
            u.c[3] = w;
            vec = u.v;
        }
        float toFloat() const
        {
            union { vector float v; float c[4]; } u;
            u.v = vec;
            return u.c[0];
        }
        Vector3 toVector3() const
        {
            union { vector float v; float c[4]; } u;
            u.v = vec;
            return Vector3( u.c[0], u.c[1], u.c[2] );
        }
        Vector4 toVector4() const
        {
            union { vector float v; float c[4]; } u;
            u.v = vec;
            return Vector4( u.c[0], u.c[1], u.c[2], u.c[3] );
        }
        SimdVector splatX() const { return SimdVector( vec_splat( vec, 0 ) ); }
        SimdVector splatY() const { return SimdVector( vec_splat( vec, 1 ) ); }
        SimdVector splatZ() const { return SimdVector( vec_splat( vec, 2 ) ); }
        SimdVector splatW() const { return SimdVector( vec_splat( vec, 3 ) ); }
        SimdVector& operator+=( Arg v )
        {
            vec = vec_add( vec, v.vec );
            return *this;
        }
        SimdVector& operator-=( Arg v )
        {
            vec = vec_sub( vec, v.vec );
            return *this;
        }
        SimdVector& operator*=( Arg v )
        {
            vec = vec_madd( vec, v.vec, ( vector float )( -0.0f ) );
            return *this;
        }
    };
    SimdVector operator+( SimdVector::Arg left, SimdVector::Arg right  )
    {
        return SimdVector( vec_add( left.vec, right.vec ) );
    }
    SimdVector operator-( SimdVector::Arg left, SimdVector::Arg right  )
    {
        return SimdVector( vec_sub( left.vec, right.vec ) );
    }
    SimdVector operator*( SimdVector::Arg left, SimdVector::Arg right  )
    {
        return SimdVector( vec_madd( left.vec, right.vec, ( vector float )( -0.0f ) ) );
    }
    // Returns a*b + c
    SimdVector multiplyAdd( SimdVector::Arg a, SimdVector::Arg b, SimdVector::Arg c )
    {
        return SimdVector( vec_madd( a.vec, b.vec, c.vec ) );
    }
    // Returns -( a*b - c )
    SimdVector negativeMultiplySubtract( SimdVector::Arg a, SimdVector::Arg b, SimdVector::Arg c )
    {
        return SimdVector( vec_nmsub( a.vec, b.vec, c.vec ) );
    }
    SimdVector reciprocal( SimdVector::Arg v )
    {
        // get the reciprocal estimate
        vector float estimate = vec_re( v.vec );
        // one round of Newton-Rhaphson refinement
        vector float diff = vec_nmsub( estimate, v.vec, ( vector float )( 1.0f ) );
        return SimdVector( vec_madd( diff, estimate, estimate ) );
    }
    SimdVector min( SimdVector::Arg left, SimdVector::Arg right )
    {
        return SimdVector( vec_min( left.vec, right.vec ) );
    }
    SimdVector max( SimdVector::Arg left, SimdVector::Arg right )
    {
        return SimdVector( vec_max( left.vec, right.vec ) );
    }
    SimdVector truncate( SimdVector::Arg v )
    {
        return SimdVector( vec_trunc( v.vec ) );
    }
    SimdVector compareEqual( SimdVector::Arg left, SimdVector::Arg right )
    {
        return SimdVector( ( vector float )vec_cmpeq( left.vec, right.vec ) );
    }
    SimdVector select( SimdVector::Arg off, SimdVector::Arg on, SimdVector::Arg bits )
    {
        return SimdVector( vec_sel( off.vec, on.vec, ( vector unsigned int )bits.vec ) );
    }
    bool compareAnyLessThan( SimdVector::Arg left, SimdVector::Arg right ) 
    {
        return vec_any_lt( left.vec, right.vec ) != 0;
    }
 } // namespace nv
 #endif // NV_SIMD_VECTOR_VE_H
--- a/src/nvtt/CompressorDX9.cpp
+++ b/src/nvtt/CompressorDX9.cpp
--- a/src/nvtt/QuickCompressDXT.cpp
+++ b/src/nvtt/QuickCompressDXT.cpp
@ -217,6 +217,33 @@ inline static uint computeIndices3(const ColorBlock & rgba, Vector3::Arg maxColo
 	return indices;
 }
 inline static uint computeIndices3(const Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor)
 {
 	Vector3 palette[4];
 	palette[0] = minColor;
 	palette[1] = maxColor;
 	palette[2] = (palette[0] + palette[1]) * 0.5f;
 	uint indices = 0;
 	for(int i = 0; i < 16; i++)
 	{
 		float d0 = colorDistance(palette[0], block[i]);
 		float d1 = colorDistance(palette[1], block[i]);
 		float d2 = colorDistance(palette[2], block[i]);
 		uint index;
 		if (d0 < d1 && d0 < d2) index = 0;
 		else if (d1 < d2) index = 1;
 		else index = 2;
 		indices |= index << (2 * i);
 	}
 	return indices;
 }
 static void optimizeEndPoints4(Vector3 block[16], BlockDXT1 * dxtBlock)
 {
@ -266,7 +293,7 @@ static void optimizeEndPoints4(Vector3 block[16], BlockDXT1 * dxtBlock)
 	dxtBlock->indices = computeIndices4(block, a, b);
 }
-/*static void optimizeEndPoints3(Vector3 block[16], BlockDXT1 * dxtBlock)
+static void optimizeEndPoints3(Vector3 block[16], BlockDXT1 * dxtBlock)
 {
 	float alpha2_sum = 0.0f;
 	float beta2_sum = 0.0f;
@ -278,7 +305,7 @@ static void optimizeEndPoints4(Vector3 block[16], BlockDXT1 * dxtBlock)
 	{
 		const uint bits = dxtBlock->indices >> (2 * i);
-		float beta = (bits & 1);
+		float beta = float(bits & 1);
 		if (bits & 2) beta = 0.5f;
 		float alpha = 1.0f - beta;
@ -312,7 +339,7 @@ static void optimizeEndPoints4(Vector3 block[16], BlockDXT1 * dxtBlock)
 	dxtBlock->col0 = Color16(color1);
 	dxtBlock->col1 = Color16(color0);
 	dxtBlock->indices = computeIndices3(block, a, b);
-}*/
+}
 namespace
 {
@ -571,7 +598,7 @@ void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
 		dxtBlock->col0 = Color16(color1);
 		dxtBlock->col1 = Color16(color0);
-		dxtBlock->indices = computeIndices3(rgba, maxColor, minColor);
+		dxtBlock->indices = computeIndices3(block, maxColor, minColor);
 		//	optimizeEndPoints(block, dxtBlock);
 	}
@ -634,3 +661,51 @@ void QuickCompress::compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock,
 	compressDXT1(rgba, &dxtBlock->color);
 	compressDXT5A(rgba, &dxtBlock->alpha, iterationCount);
 }
 void QuickCompress::outputBlock4(const ColorBlock & rgba, const Vector3 & start, const Vector3 & end, BlockDXT1 * dxtBlock)
 {
 	Vector3 block[16];
 	extractColorBlockRGB(rgba, block);
    Vector3 maxColor = start * 255;
    Vector3 minColor = end * 255;
 	uint16 color0 = roundAndExpand(&maxColor);
 	uint16 color1 = roundAndExpand(&minColor);
 	if (color0 < color1)
 	{
 		swap(maxColor, minColor);
 		swap(color0, color1);
 	}
 	dxtBlock->col0 = Color16(color0);
 	dxtBlock->col1 = Color16(color1);
 	dxtBlock->indices = computeIndices4(block, maxColor, minColor);
 	optimizeEndPoints4(block, dxtBlock);
 }
 void QuickCompress::outputBlock3(const ColorBlock & rgba, const Vector3 & start, const Vector3 & end, BlockDXT1 * dxtBlock)
 {
 	Vector3 block[16];
 	extractColorBlockRGB(rgba, block);
    Vector3 maxColor = start * 255;
    Vector3 minColor = end * 255;
 	uint16 color0 = roundAndExpand(&maxColor);
 	uint16 color1 = roundAndExpand(&minColor);
 	if (color0 > color1)
 	{
 		swap(maxColor, minColor);
 		swap(color0, color1);
 	}
 	dxtBlock->col0 = Color16(color0);
 	dxtBlock->col1 = Color16(color1);
    dxtBlock->indices = computeIndices3(block, maxColor, minColor);
 	optimizeEndPoints3(block, dxtBlock);
 }
--- a/src/nvtt/QuickCompressDXT.h
+++ b/src/nvtt/QuickCompressDXT.h
@ -35,6 +35,7 @@ namespace nv
 	struct BlockDXT5;
 	struct AlphaBlockDXT3;
 	struct AlphaBlockDXT5;
    class Vector3;
 	namespace QuickCompress
 	{
@ -45,6 +46,9 @@ namespace nv
 		void compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock, int iterationCount=8);
 		void compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock, int iterationCount=8);
        void outputBlock4(const ColorBlock & rgba, const Vector3 & start, const Vector3 & end, BlockDXT1 * block);
        void outputBlock3(const ColorBlock & rgba, const Vector3 & start, const Vector3 & end, BlockDXT1 * block);
 	}
 } // nv namespace
--- a/src/nvtt/TexImage.cpp
+++ b/src/nvtt/TexImage.cpp
@ -299,6 +299,46 @@ const float * TexImage::data() const
    return m->image->channel(0);
 }
 void TexImage::histogram(int channel, float rangeMin, float rangeMax, int binCount, int * binPtr) const
 {
    // We assume it's clear in case we want to accumulate multiple histograms.
    //memset(bins, 0, sizeof(int)*count);
    if (m->image == NULL) return;
    const float * c = m->image->channel(channel);
    float scale = float(binCount) / rangeMax;
    float bias = - scale * rangeMin;
    const uint count = m->image->width() * m->image->height();
    for (uint i = 0; i < count; i++) {
        float f = c[i] * scale + bias;
        int idx = ifloor(f);
        if (idx < 0) idx = 0;
        if (idx > binCount-1) idx = binCount-1;
        binPtr[idx]++;
    }
 }
 void TexImage::range(int channel, float * rangeMin, float * rangeMax)
 {
    Vector2 range(FLT_MAX, -FLT_MAX);
    FloatImage * img = m->image;
    float * c = img->channel(channel);
    const uint count = img->width() * img->height();
    for (uint p = 0; p < count; p++) {
        float f = c[p];
        if (f < range.x) range.x = f;
        if (f > range.y) range.y = f;
    }
    *rangeMin = range.x;
    *rangeMax = range.y;
 }
 bool TexImage::load(const char * fileName)
 {
@ -320,8 +360,6 @@ bool TexImage::load(const char * fileName)
 bool TexImage::save(const char * fileName) const
 {
 #pragma NV_MESSAGE("TODO: Add support for DDS textures in TexImage::save")
    if (m->image != NULL)
    {
        return ImageIO::saveFloat(fileName, m->image, 0, 4);
@ -989,33 +1027,19 @@ void TexImage::scaleAlphaToCoverage(float coverage, float alphaRef/*= 0.5f*/)
    m->image->scaleAlphaToCoverage(coverage, alphaRef, 3);
 }
-bool TexImage::normalizeRange(float * rangeMin, float * rangeMax)
+/*bool TexImage::normalizeRange(float * rangeMin, float * rangeMax)
 {
    if (m->image == NULL) return false;
-    Vector2 range(FLT_MAX, -FLT_MAX);
+    range(0, rangeMin, rangeMax);
-    // Compute range.
+    if (*rangeMin == *rangeMax) {
    FloatImage * img = m->image;
    const uint count = img->count();
    for (uint p = 0; p < count; p++) {
        float c = img->pixel(p);
        if (c < range.x) range.x = c;
        if (c > range.y) range.y = c;
    }
    if (range.x == range.y) {
        // Single color image.
        return false;
    }
-    *rangeMin = range.x;
+    const float scale = 1.0f / (*rangeMax - *rangeMin);
-    *rangeMax = range.y;
+    const float bias = *rangeMin * scale;
    const float scale = 1.0f / (range.y - range.x);
    const float bias = range.x * scale;
    if (range.x == 0.0f && range.y == 1.0f) {
        // Already normalized.
@ -1029,7 +1053,7 @@ bool TexImage::normalizeRange(float * rangeMin, float * rangeMax)
    //img->clamp(0, 4, 0.0f, 1.0f);
    return true;
-}
+}*/
 // Ideally you should compress/quantize the RGB and M portions independently.
 // Once you have M quantized, you would compute the corresponding RGB and quantize that.
@ -1054,7 +1078,6 @@ void TexImage::toRGBM(float range/*= 1*/, float threshold/*= 0.25*/)
        float B = nv::clamp(b[i] * irange, 0.0f, 1.0f);
        float M = max(max(R, G), max(B, 1e-6f)); // Avoid division by zero.
        //m = quantizeCeil(m, 8);
        r[i] = R / M;
        g[i] = G / M;
@ -1233,20 +1256,19 @@ void TexImage::toLUVW(float range/*= 1.0f*/)
        float G = nv::clamp(g[i] * irange, 0.0f, 1.0f);
        float B = nv::clamp(b[i] * irange, 0.0f, 1.0f);
-        float L = max(sqrtf(R*R + G*G + B*B), 1e-6f)); // Avoid division by zero.
+        float L = max(sqrtf(R*R + G*G + B*B), 1e-6f); // Avoid division by zero.
        //m = quantizeCeil(m, 8);
        r[i] = R / L;
        g[i] = G / L;
        b[i] = B / L;
-        a[i] = L;
+        a[i] = L / sqrtf(3);
    }
 }
 void TexImage::fromLUVW(float range/*= 1.0f*/)
 {
    // Decompression is the same as in RGBM.
-    fromRGBM(range);
+    fromRGBM(range * sqrtf(3));
 }
@ -1435,10 +1457,52 @@ float nvtt::rmsAlphaError(const TexImage & reference, const TexImage & image)
    return float(sqrt(mse / count));
 }
-TexImage nvtt::diff(const TexImage & reference, const TexImage & image)
+TexImage nvtt::diff(const TexImage & reference, const TexImage & image, float scale)
 {
-    // @@ TODO.
+    const FloatImage * ref = reference.m->image;
-    return TexImage();
+    const FloatImage * img = image.m->image;
    if (img == NULL || ref == NULL || img->width() != ref->width() || img->height() != ref->height()) {
        return TexImage();
    }
    nvDebugCheck(img->componentNum() == 4);
    nvDebugCheck(ref->componentNum() == 4);
    nvtt::TexImage diffImage;
    FloatImage * diff = diffImage.m->image = new FloatImage;
    diff->allocate(4, img->width(), img->height());
    const uint count = img->width() * img->height();
    for (uint i = 0; i < count; i++)
    {
        float r0 = img->pixel(i, 0);
        float g0 = img->pixel(i, 1);
        float b0 = img->pixel(i, 2);
        //float a0 = img->pixel(i, 3);
        float r1 = ref->pixel(i, 0);
        float g1 = ref->pixel(i, 1);
        float b1 = ref->pixel(i, 2);
        float a1 = ref->pixel(i, 3);
        float dr = r0 - r1;
        float dg = g0 - g1;
        float db = b0 - b1;
        //float da = a0 - a1;
        if (reference.alphaMode() == nvtt::AlphaMode_Transparency)
        {
            dr *= a1;
            dg *= a1;
            db *= a1;
        }
        diff->pixel(i, 0) = dr * scale;
        diff->pixel(i, 1) = dg * scale;
        diff->pixel(i, 2) = db * scale;
        diff->pixel(i, 3) = a1;
    }
    return diffImage;
 }
--- a/src/nvtt/nvtt.h
+++ b/src/nvtt/nvtt.h
@ -395,6 +395,8 @@ namespace nvtt
        NVTT_API float alphaTestCoverage(float alphaRef = 0.5) const;
        NVTT_API float average(int channel) const;
        NVTT_API const float * data() const;
        NVTT_API void histogram(int channel, float rangeMin, float rangeMax, int binCount, int * binPtr) const;
        NVTT_API void range(int channel, float * rangeMin, float * rangeMax);
        // Texture data.
        NVTT_API bool load(const char * fileName);
@ -426,7 +428,7 @@ namespace nvtt
        NVTT_API void setBorder(float r, float g, float b, float a);
        NVTT_API void fill(float r, float g, float b, float a);
        NVTT_API void scaleAlphaToCoverage(float coverage, float alphaRef = 0.5f);
-        NVTT_API bool normalizeRange(float * rangeMin, float * rangeMax);
+        //NVTT_API bool normalizeRange(float * rangeMin, float * rangeMax);
        NVTT_API void toRGBM(float range = 1.0f, float threshold = 0.0f);
        NVTT_API void fromRGBM(float range = 1.0f);
        NVTT_API void toYCoCg();
@ -451,9 +453,9 @@ namespace nvtt
        NVTT_API bool copyChannel(const TexImage & srcImage, int srcChannel, int dstChannel);
        // Error compare.
-        friend float rmsError(const TexImage & reference, const TexImage & img);
+        NVTT_API friend float rmsError(const TexImage & reference, const TexImage & img);
-        friend float rmsAlphaError(const TexImage & reference, const TexImage & img);
+        NVTT_API friend float rmsAlphaError(const TexImage & reference, const TexImage & img);
-        friend TexImage diff(const TexImage & reference, const TexImage & img);
+        NVTT_API friend TexImage diff(const TexImage & reference, const TexImage & img, float scale);
    private:
        void detach();
@ -471,7 +473,7 @@ namespace nvtt
    NVTT_API float rmsError(const TexImage & reference, const TexImage & img);
    NVTT_API float rmsAlphaError(const TexImage & reference, const TexImage & img);
-    NVTT_API TexImage diff(const TexImage & reference, const TexImage & img);
+    NVTT_API TexImage diff(const TexImage & reference, const TexImage & img, float scale);
 } // nvtt namespace
--- a/src/nvtt/squish/colourfit.cpp
+++ b/src/nvtt/squish/colourfit.cpp
@ -38,21 +38,21 @@ void ColourFit::SetColourSet( ColourSet const* colours, int flags )
 	m_flags = flags;
 }
-void ColourFit::Compress( void* block )
+void ColourFit::Compress( Vec3 * start, Vec3 * end )
 {
 	bool isDxt1 = ( ( m_flags & kDxt1 ) != 0 );
 	if( isDxt1 )
 	{
-		Compress3( block );
+		Compress3( start, end );
 		if( !m_colours->IsTransparent() )
 		{		
-			Compress4( block );
+			Compress4( start, end );
 		}
 	}
 	else
 	{
-		Compress4( block );
+		Compress4( start, end );
 	}
 }
--- a/src/nvtt/squish/colourfit.h
+++ b/src/nvtt/squish/colourfit.h
@ -40,11 +40,11 @@ public:
 	void SetColourSet( ColourSet const* colours, int flags );
-	void Compress( void* block );
+	void Compress( Vec3 * start, Vec3 * end );
 protected:
-	virtual void Compress3( void* block ) = 0;
+	virtual bool Compress3( Vec3 * start, Vec3 * end ) = 0;
-	virtual void Compress4( void* block ) = 0;
+	virtual bool Compress4( Vec3 * start, Vec3 * end ) = 0;
 	ColourSet const* m_colours;
 	int m_flags;
--- a/src/nvtt/squish/weightedclusterfit.cpp
+++ b/src/nvtt/squish/weightedclusterfit.cpp
@ -129,7 +129,7 @@ float WeightedClusterFit::GetBestError() const
 #if SQUISH_USE_SIMD
-void WeightedClusterFit::Compress3( void* block )
+bool WeightedClusterFit::Compress3( Vec3 * start, Vec3 * end )
 {
    int const count = m_colours->GetCount();
 	Vec4 const one = VEC4_CONST(1.0f);
@ -212,7 +212,7 @@ void WeightedClusterFit::Compress3( void* block )
 	if( CompareAnyLessThan( besterror, m_besterror ) )
 	{
 		// compute indices from cluster sizes.
-		u8 bestindices[16];
+		/*u8 bestindices[16];
 		{
 			int i = 0;
 			for(; i < b0; i++) {
@ -233,16 +233,22 @@ void WeightedClusterFit::Compress3( void* block )
 		m_colours->RemapIndices( ordered, bestindices );
 		// save the block
-		WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
+		WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );*/
-		
+
        *start = beststart.GetVec3();
        *end = bestend.GetVec3();
 		// save the error
 		m_besterror = besterror;
        return true;
 	}
    return false;
 }
-void WeightedClusterFit::Compress4( void* block )
+bool WeightedClusterFit::Compress4( Vec3 * start, Vec3 * end )
 {
    int const count = m_colours->GetCount();
 	Vec4 const one = VEC4_CONST(1.0f);
@ -334,7 +340,7 @@ void WeightedClusterFit::Compress4( void* block )
 	// save the block if necessary
 	if( CompareAnyLessThan( besterror, m_besterror ) )
 	{
-		// compute indices from cluster sizes.
+		/*// compute indices from cluster sizes.
 		u8 bestindices[16];
 		{
 			int i = 0;
@ -360,11 +366,18 @@ void WeightedClusterFit::Compress4( void* block )
        m_colours->RemapIndices( ordered, bestindices );
 		// save the block
-		WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
+		WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );*/
        *start = beststart.GetVec3();
        *end = bestend.GetVec3();
 		// save the error
 		m_besterror = besterror;
        return true;
 	}
    return false;
 }
 #else
--- a/src/nvtt/squish/weightedclusterfit.h
+++ b/src/nvtt/squish/weightedclusterfit.h
@ -45,8 +45,8 @@ public:
 	float GetBestError() const;
 	// Make them public
-	virtual void Compress3( void* block );
+	bool Compress3( Vec3 * start, Vec3 * end );
-	virtual void Compress4( void* block );
+	bool Compress4( Vec3 * start, Vec3 * end );
 private:
--- a/src/nvtt/tests/testsuite.cpp
+++ b/src/nvtt/tests/testsuite.cpp
@ -145,6 +145,11 @@ static const char * s_witnessImageSet[] = {
    "specRuin-puzzle.tga"
 };
 static const char * s_witnessLmapImageSet[] = {
    "specruin.dds",
 };
 enum Mode {
    Mode_BC1,
    Mode_BC1_Alpha,
@ -152,9 +157,12 @@ enum Mode {
    Mode_BC3_Alpha,
    Mode_BC3_YCoCg,
    Mode_BC3_RGBM,
    Mode_BC3_LUVW,
    Mode_BC1_Normal,
    Mode_BC3_Normal,
    Mode_BC5_Normal,
    Mode_BC3_Lightmap_1,
    Mode_BC3_Lightmap_2,
 };
 static const char * s_modeNames[] = {
    "BC1",
@ -167,6 +175,8 @@ static const char * s_modeNames[] = {
    "BC1-Normal",
    "BC3-Normal",
    "BC5-Normal",
    "BC3-RGBM",
    "BC3-LUVW",
 };
 struct Test {
@ -175,26 +185,29 @@ struct Test {
    Mode modes[4];
 };
 static Test s_imageTests[] = {
-    {"DXT Color", 3, {Mode_BC1, Mode_BC3_YCoCg, Mode_BC3_RGBM}},
+    {"DXT Color", 1, {Mode_BC1, Mode_BC3_YCoCg, Mode_BC3_RGBM, Mode_BC3_LUVW}},
    {"DXT Alpha", 3, {Mode_BC1_Alpha, Mode_BC2_Alpha, Mode_BC3_Alpha}},
    {"DXT Normal", 3, {Mode_BC1_Normal, Mode_BC3_Normal, Mode_BC5_Normal}},
    {"DXT Lightmap", 2, {Mode_BC3_Lightmap_1, Mode_BC3_Lightmap_2}},
 };
-const int s_testCount = ARRAY_SIZE(s_imageTests);
+const int s_imageTestCount = ARRAY_SIZE(s_imageTests);
 struct ImageSet
 {
    const char * name;
    const char * basePath;
    const char ** fileNames;
    int fileCount;
 };
 static ImageSet s_imageSets[] = {
-    {"Kodak",       s_kodakImageSet,        ARRAY_SIZE(s_kodakImageSet)},       // 0
+    {"Kodak",       "kodak",        s_kodakImageSet,        ARRAY_SIZE(s_kodakImageSet)},       // 0
-    {"Waterloo",    s_waterlooImageSet,     ARRAY_SIZE(s_waterlooImageSet)},    // 1
+    {"Waterloo",    "waterloo",     s_waterlooImageSet,     ARRAY_SIZE(s_waterlooImageSet)},    // 1
-    {"Epic",        s_epicImageSet,         ARRAY_SIZE(s_epicImageSet)},        // 2
+    {"Epic",        "epic",         s_epicImageSet,         ARRAY_SIZE(s_epicImageSet)},        // 2
-    {"Farbraush",   s_farbrauschImageSet,   ARRAY_SIZE(s_farbrauschImageSet)},  // 3
+    {"Farbraush",   "farbrausch",   s_farbrauschImageSet,   ARRAY_SIZE(s_farbrauschImageSet)},  // 3
-    {"Lugaru",      s_lugaruImageSet,       ARRAY_SIZE(s_lugaruImageSet)},      // 4
+    {"Lugaru",      "lugaru",       s_lugaruImageSet,       ARRAY_SIZE(s_lugaruImageSet)},      // 4
-    {"Quake3",      s_quake3ImageSet,       ARRAY_SIZE(s_quake3ImageSet)},      // 5
+    {"Quake3",      "quake3",       s_quake3ImageSet,       ARRAY_SIZE(s_quake3ImageSet)},      // 5
-    {"Witness",     s_witnessImageSet,       ARRAY_SIZE(s_witnessImageSet)}     // 6
+    {"Witness",     "witness",      s_witnessImageSet,      ARRAY_SIZE(s_witnessImageSet)},     // 6
    {"Lightmap",    "lightmap",     s_witnessLmapImageSet,  ARRAY_SIZE(s_witnessLmapImageSet)}, // 7
 };
 const int s_imageSetCount = sizeof(s_imageSets)/sizeof(s_imageSets[0]);
@ -227,9 +240,10 @@ struct MyOutputHandler : public nvtt::OutputHandler
    nvtt::TexImage decompress(Mode mode, nvtt::Decoder decoder)
    {
        nvtt::Format format; 
-        if (mode == Mode_BC1) format = nvtt::Format_BC1;
+        if (mode == Mode_BC1 || mode == Mode_BC1_Alpha || mode == Mode_BC1_Normal) format = nvtt::Format_BC1;
        else if (mode == Mode_BC2_Alpha) format = nvtt::Format_BC2;
        else if (mode == Mode_BC5_Normal) format = nvtt::Format_BC5;
-        else  format = nvtt::Format_BC3;
+        else format = nvtt::Format_BC3;
        nvtt::TexImage img;
        img.setImage2D(format, decoder, m_width, m_height, m_data);
@ -263,7 +277,7 @@ int main(int argc, char *argv[])
    bool nocuda = false;
    bool showHelp = false;
    nvtt::Decoder decoder = nvtt::Decoder_Reference;
-    const char * basePath = "";
+    Path basePath = "";
    const char * outPath = "output";
    const char * regressPath = NULL;
@ -274,6 +288,14 @@ int main(int argc, char *argv[])
        {
            if (i+1 < argc && argv[i+1][0] != '-') {
                setIndex = atoi(argv[i+1]);
                for (int j = 0; j < s_imageSetCount; j++) {
                    if (strCaseCmp(s_imageSets[j].name, argv[i+1]) == 0) {
                        setIndex = j;
                        break;
                    }
                }
                i++;
            }
        }
@ -327,7 +349,7 @@ int main(int argc, char *argv[])
    }
    // Validate inputs.
-    if (testIndex >= s_testCount) {
+    if (testIndex >= s_imageTestCount) {
        printf("Invalid test %d\n", testIndex);
        return 0;
    }
@ -343,17 +365,14 @@ int main(int argc, char *argv[])
        printf("Input options:\n");
        printf("  -path <path>   \tInput image path.\n");
        printf("  -regress <path>\tRegression directory.\n");
-        printf("  -set [0:5]     \tImage set.\n");
+        printf("  -set [0:%d]     \tImage set.\n", s_imageSetCount-1);
-        printf("    0:           \tKodak.\n");
+        for (int i = 0; i < s_imageSetCount; i++) {
-        printf("    1:           \tWaterloo.\n");
+            printf("    %i:           \t%s.\n", i, s_imageSets[i].name);
-        printf("    2:           \tEpic.\n");
+        }
-        printf("    3:           \tFarbrausch.\n");
+        printf("  -test [0:%d]    \tCompression tests to run.", s_imageTestCount);
-        printf("    4:           \tLugaru.\n");
+        for (int i = 0; i < s_imageTestCount; i++) {
-        printf("    5:           \tQuake 3.\n");
+            printf("    %i:           \t%s.\n", i, s_imageTests[i].name);
-        printf("  -test [0:2]    \tCompression tests to run.");
+        }
        printf("    0:           \tDXT Color.\n");
        printf("    1:           \tDXT Alpha.\n");
        printf("    2:           \tDXT Normal.\n");
        printf("  -dec x         \tDecompressor.\n");
        printf("    0:           \tReference.\n");
        printf("    1:           \tNVIDIA.\n");
@ -397,7 +416,9 @@ int main(int argc, char *argv[])
    nvtt::Context context;
    context.enableCudaAcceleration(!nocuda);
-    FileSystem::changeDirectory(basePath);
+    basePath.append(set.basePath);
    FileSystem::changeDirectory(basePath.str());
    FileSystem::createDirectory(outPath);
    //Path csvFileName;
@ -406,7 +427,7 @@ int main(int argc, char *argv[])
    //TextWriter csvWriter(&csvStream);
    Path graphFileName;
-    graphFileName.format("%s/result-%d.txt", outPath, setIndex);
+    graphFileName.format("%s/chart.txt", outPath/*, test.name*/);
    StdOutputStream graphStream(graphFileName.str());
    TextWriter graphWriter(&graphStream);
@ -434,7 +455,7 @@ int main(int argc, char *argv[])
    {
        const char * colors[] = {
            "3D7930", "952826", "3D1FC1",
-            "3D7930", "952826", "3D1FC1", // pick other colors...
+            "FF9900", "999999", "999999", // pick other colors...
        };
        graphWriter << colors[t];
        if (t != test.count-1) graphWriter << ",";
@ -484,10 +505,10 @@ int main(int argc, char *argv[])
    for (int t = 0; t < test.count; t++)
    {
        Mode mode = test.modes[t];
-        if (mode == Mode_BC1) {
+        if (mode == Mode_BC1 || mode == Mode_BC1_Alpha || mode == Mode_BC1_Normal) {
            compressionOptions.setFormat(nvtt::Format_BC1);
        }
-        else if (mode == Mode_BC3_Alpha || mode == Mode_BC3_YCoCg || mode == Mode_BC3_RGBM) {
+        else if (mode == Mode_BC3_Alpha || mode == Mode_BC3_YCoCg || mode == Mode_BC3_RGBM || mode == Mode_BC3_LUVW || mode == Mode_BC3_Lightmap_1 || mode == Mode_BC3_Lightmap_2) {
            compressionOptions.setFormat(nvtt::Format_BC3);
        }
        else if (mode == Mode_BC3_Normal) {
@ -497,10 +518,10 @@ int main(int argc, char *argv[])
            compressionOptions.setFormat(nvtt::Format_BC5);
        }
-        if (mode == Mode_BC3_Alpha) {
+        if (mode == Mode_BC3_Alpha || mode == Mode_BC3_Lightmap_1 || mode == Mode_BC3_Lightmap_2) { // Lightmap's alpha channel is coverage.
            img.setAlphaMode(nvtt::AlphaMode_Transparency);
        }
-        if (mode == Mode_BC3_Normal || mode == Mode_BC5_Normal) {
+        if (mode == Mode_BC1_Normal || mode == Mode_BC3_Normal || mode == Mode_BC5_Normal) {
            img.setNormalMap(true);
        }
@ -528,6 +549,56 @@ int main(int argc, char *argv[])
            else if (mode == Mode_BC3_RGBM) {
                tmp.toRGBM();
            }
            else if (mode == Mode_BC3_LUVW) {
                tmp.toLUVW();
            }
            else if (mode == Mode_BC3_Lightmap_1) {
                tmp.toRGBM(4);
                /*float rmin, rmax;
                tmp.range(0, &rmin, &rmax);
                float gmin, gmax;
                tmp.range(1, &gmin, &gmax);
                float bmin, bmax;
                tmp.range(2, &bmin, &bmax);
                float lmin, lmax;
                tmp.range(3, &lmin, &lmax);
                printf("rmin: %.3f   rmax: %.3f\n", rmin, rmax);
                printf("gmin: %.3f   gmax: %.3f\n", gmin, gmax);
                printf("bmin: %.3f   bmax: %.3f\n", bmin, bmax);
                printf("lmin: %.3f   lmax: %.3f\n", lmin, lmax);
                const int N = 32;
                int chistogram[N];
                int lhistogram[N];
                memset(chistogram, 0, sizeof(chistogram)); 
                memset(lhistogram, 0, sizeof(lhistogram));
                tmp.histogram(0, 0, 1, N, chistogram);
                tmp.histogram(1, 0, 1, N, chistogram);
                tmp.histogram(2, 0, 1, N, chistogram);
                tmp.histogram(3, 0, 1, N, lhistogram);
                printf("Color histogram:\n");
                for (int i = 0; i < N; i++) {
                    printf("%d, ", chistogram[i]);
                }
                printf("\n");
                printf("Luminance histogram:\n");
                for (int i = 0; i < N; i++) {
                    printf("%d, ", lhistogram[i]);
                }
                printf("\n");*/
            }
            else if (mode == Mode_BC3_Lightmap_2) {
                tmp.toLUVW(4);
            }
            printf("Compressing: \t'%s'\n", set.fileNames[i]);
@ -540,12 +611,8 @@ int main(int argc, char *argv[])
            totalTime += timer.elapsed();
            nvtt::TexImage img_out = outputHandler.decompress(mode, decoder);
-            if (mode == Mode_BC3_Alpha) {
+            img_out.setAlphaMode(img.alphaMode());
-                img_out.setAlphaMode(nvtt::AlphaMode_Transparency);
+            img_out.setNormalMap(img.isNormalMap());
            }
            if (mode == Mode_BC3_Normal || mode == Mode_BC5_Normal) {
                img_out.setNormalMap(true);
            }
            if (mode == Mode_BC3_YCoCg) {
                img_out.scaleBias(0, 1.0, -0.5);
@ -555,11 +622,30 @@ int main(int argc, char *argv[])
            else if (mode == Mode_BC3_RGBM) {
                img_out.fromRGBM();
            }
            else if (mode == Mode_BC3_LUVW) {
                img_out.fromLUVW();
            }
            else if (mode == Mode_BC3_Lightmap_1) {
                img_out.fromRGBM(4);
            }
            else if (mode == Mode_BC3_Lightmap_2) {
                img_out.fromLUVW(4);
            }
            Path outputFilePath;
            outputFilePath.format("%s/%s", outPath, s_modeNames[test.modes[t]]);
            FileSystem::createDirectory(outputFilePath.str());
            Path outputFileName;
-            outputFileName.format("%s/%s", outPath, set.fileNames[i]);
+            outputFileName.format("%s/%s", outputFilePath.str(), set.fileNames[i]);
            outputFileName.stripExtension();
-            outputFileName.append(".png");
+            if (mode == Mode_BC3_Lightmap_1 || mode == Mode_BC3_Lightmap_2) {
                outputFileName.append(".dds");
            }
            else {
                outputFileName.append(".png");
            }
            if (!img_out.save(outputFileName.str()))
            {
                printf("Error saving file '%s'.\n", outputFileName.str());
@ -573,6 +659,12 @@ int main(int argc, char *argv[])
            graphWriter << rmse;
            if (i != set.fileCount-1) graphWriter << ",";
            outputFileName.stripExtension();
            outputFileName.append("_diff.png");
            nvtt::diff(img, img_out, 4.0f).save(outputFileName.str());
            // Output csv file
            //csvWriter << "\"" << fileNames[i] << "\"," << rmse << "\n";
@ -615,7 +707,7 @@ int main(int argc, char *argv[])
        printf("  Total Time: \t%.3f sec\n", totalTime);
        printf("  Average RMSE:\t%.4f\n", totalRMSE);
-        if (t != s_testCount-1) graphWriter << "|";
+        if (t != test.count-1) graphWriter << "|";
    }
    /*if (regressPath != NULL)