From f2d90ee844e89e6f5e6b4c75a6ea18c65f8c60e4 Mon Sep 17 00:00:00 2001
From: castano <castano@95f4ed2b-212e-0410-8b90-d31948207fce>
Date: Mon, 2 Jan 2012 08:49:13 +0000
Subject: [PATCH] Fix errors in new cluster fit compressor.

---
 project/vc9/nvcompress/nvcompress.vcproj |  16 +-
 project/vc9/nvtt.sln                     | 414 ++++++++++++-----------
 project/vc9/nvtt/nvtt.vcproj             |  88 ++---
 src/nvimage/ColorBlock.cpp               | 140 +++++---
 src/nvimage/ColorBlock.h                 |  22 +-
 src/nvmath/Box.h                         |   2 +-
 src/nvmath/Box.inl                       |   2 +-
 src/nvmath/Matrix.h                      |  30 +-
 src/nvmath/Matrix.inl                    | 190 +++++------
 src/nvmath/Plane.h                       |   4 +-
 src/nvmath/Plane.inl                     |   4 +-
 src/nvmath/Vector.h                      |  54 ++-
 src/nvmath/Vector.inl                    |  94 ++---
 src/nvtt/ClusterFit.cpp                  |  58 ++--
 src/nvtt/QuickCompressDXT.cpp            |  17 +-
 src/nvtt/tools/imgdiff.cpp               |  34 +-
 16 files changed, 607 insertions(+), 562 deletions(-)
diff --git a/project/vc9/nvcompress/nvcompress.vcproj b/project/vc9/nvcompress/nvcompress.vcproj
index db204ba..44c07b2 100644
--- a/project/vc9/nvcompress/nvcompress.vcproj
+++ b/project/vc9/nvcompress/nvcompress.vcproj
@@ -20,7 +20,7 @@
 	</ToolFiles>
 	<Configurations>
 		<Configuration
-			Name="Debug|Win32"
+			Name="Debug-CUDA|Win32"
 			OutputDirectory="$(ConfigurationName)\$(PlatformName)"
 			IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
 			ConfigurationType="1"
@@ -98,7 +98,7 @@
 			/>
 		</Configuration>
 		<Configuration
-			Name="Debug|x64"
+			Name="Debug-CUDA|x64"
 			OutputDirectory="$(ConfigurationName)\$(PlatformName)"
 			IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
 			ConfigurationType="1"
@@ -176,7 +176,7 @@
 			/>
 		</Configuration>
 		<Configuration
-			Name="Release|Win32"
+			Name="Release-CUDA|Win32"
 			OutputDirectory="$(ConfigurationName)\$(PlatformName)"
 			IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
 			ConfigurationType="1"
@@ -262,7 +262,7 @@
 			/>
 		</Configuration>
 		<Configuration
-			Name="Release|x64"
+			Name="Release-CUDA|x64"
 			OutputDirectory="$(ConfigurationName)\$(PlatformName)"
 			IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
 			ConfigurationType="1"
@@ -344,7 +344,7 @@
 			/>
 		</Configuration>
 		<Configuration
-			Name="Debug (no cuda)|Win32"
+			Name="Debug|Win32"
 			OutputDirectory="$(ConfigurationName)\$(PlatformName)"
 			IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
 			ConfigurationType="1"
@@ -422,7 +422,7 @@
 			/>
 		</Configuration>
 		<Configuration
-			Name="Debug (no cuda)|x64"
+			Name="Debug|x64"
 			OutputDirectory="$(ConfigurationName)\$(PlatformName)"
 			IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
 			ConfigurationType="1"
@@ -497,7 +497,7 @@
 			/>
 		</Configuration>
 		<Configuration
-			Name="Release (no cuda)|Win32"
+			Name="Release|Win32"
 			OutputDirectory="$(ConfigurationName)\$(PlatformName)"
 			IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
 			ConfigurationType="1"
@@ -576,7 +576,7 @@
 			/>
 		</Configuration>
 		<Configuration
-			Name="Release (no cuda)|x64"
+			Name="Release|x64"
 			OutputDirectory="$(ConfigurationName)\$(PlatformName)"
 			IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
 			ConfigurationType="1"
diff --git a/project/vc9/nvtt.sln b/project/vc9/nvtt.sln
index 015fd8d..04e494c 100644
--- a/project/vc9/nvtt.sln
+++ b/project/vc9/nvtt.sln
@@ -99,417 +99,421 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cubemaptest", "cubemaptest\
 EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
-		Debug (no cuda)|Mixed Platforms = Debug (no cuda)|Mixed Platforms
-		Debug (no cuda)|Win32 = Debug (no cuda)|Win32
-		Debug (no cuda)|x64 = Debug (no cuda)|x64
 		Debug|Mixed Platforms = Debug|Mixed Platforms
 		Debug|Win32 = Debug|Win32
 		Debug|x64 = Debug|x64
-		Release (no cuda)|Mixed Platforms = Release (no cuda)|Mixed Platforms
-		Release (no cuda)|Win32 = Release (no cuda)|Win32
-		Release (no cuda)|x64 = Release (no cuda)|x64
+		Debug-CUDA|Mixed Platforms = Debug-CUDA|Mixed Platforms
+		Debug-CUDA|Win32 = Debug-CUDA|Win32
+		Debug-CUDA|x64 = Debug-CUDA|x64
 		Release|Mixed Platforms = Release|Mixed Platforms
 		Release|Win32 = Release|Win32
 		Release|x64 = Release|x64
+		Release-CUDA|Mixed Platforms = Release-CUDA|Mixed Platforms
+		Release-CUDA|Win32 = Release-CUDA|Win32
+		Release-CUDA|x64 = Release-CUDA|x64
 	EndGlobalSection
 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug (no cuda)|x64
-		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug (no cuda)|x64
-		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug (no cuda)|Win32.ActiveCfg = Debug (no cuda)|Win32
-		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug (no cuda)|Win32.Build.0 = Debug (no cuda)|Win32
-		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug (no cuda)|x64.ActiveCfg = Debug (no cuda)|x64
-		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug (no cuda)|x64.Build.0 = Debug (no cuda)|x64
 		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug|Mixed Platforms.ActiveCfg = Debug|x64
 		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug|Mixed Platforms.Build.0 = Debug|x64
 		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug|Win32.ActiveCfg = Debug|Win32
 		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug|Win32.Build.0 = Debug|Win32
 		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug|x64.ActiveCfg = Debug|x64
 		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug|x64.Build.0 = Debug|x64
-		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release (no cuda)|x64
-		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release (no cuda)|Mixed Platforms.Build.0 = Release (no cuda)|x64
-		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release (no cuda)|Win32.ActiveCfg = Release (no cuda)|Win32
-		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release (no cuda)|Win32.Build.0 = Release (no cuda)|Win32
-		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release (no cuda)|x64.ActiveCfg = Release (no cuda)|x64
-		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release (no cuda)|x64.Build.0 = Release (no cuda)|x64
+		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug-CUDA|Mixed Platforms.ActiveCfg = Debug-CUDA|x64
+		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug-CUDA|Mixed Platforms.Build.0 = Debug-CUDA|x64
+		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug-CUDA|Win32.ActiveCfg = Debug-CUDA|Win32
+		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug-CUDA|Win32.Build.0 = Debug-CUDA|Win32
+		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug-CUDA|x64.ActiveCfg = Debug-CUDA|x64
+		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug-CUDA|x64.Build.0 = Debug-CUDA|x64
 		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release|Mixed Platforms.ActiveCfg = Release|x64
 		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release|Mixed Platforms.Build.0 = Release|x64
 		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release|Win32.ActiveCfg = Release|Win32
 		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release|Win32.Build.0 = Release|Win32
 		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release|x64.ActiveCfg = Release|x64
 		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release|x64.Build.0 = Release|x64
-		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug (no cuda)|x64
-		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug (no cuda)|x64
-		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug (no cuda)|Win32.ActiveCfg = Debug (no cuda)|Win32
-		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug (no cuda)|Win32.Build.0 = Debug (no cuda)|Win32
-		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug (no cuda)|x64.ActiveCfg = Debug (no cuda)|x64
-		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug (no cuda)|x64.Build.0 = Debug (no cuda)|x64
+		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release-CUDA|Mixed Platforms.ActiveCfg = Release-CUDA|x64
+		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release-CUDA|Mixed Platforms.Build.0 = Release-CUDA|x64
+		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release-CUDA|Win32.ActiveCfg = Release-CUDA|Win32
+		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release-CUDA|Win32.Build.0 = Release-CUDA|Win32
+		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release-CUDA|x64.ActiveCfg = Release-CUDA|x64
+		{1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release-CUDA|x64.Build.0 = Release-CUDA|x64
 		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug|Mixed Platforms.ActiveCfg = Debug|x64
 		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug|Mixed Platforms.Build.0 = Debug|x64
 		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug|Win32.ActiveCfg = Debug|Win32
 		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug|Win32.Build.0 = Debug|Win32
 		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug|x64.ActiveCfg = Debug|x64
 		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug|x64.Build.0 = Debug|x64
-		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release (no cuda)|x64
-		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release (no cuda)|Mixed Platforms.Build.0 = Release (no cuda)|x64
-		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release (no cuda)|Win32.ActiveCfg = Release (no cuda)|Win32
-		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release (no cuda)|Win32.Build.0 = Release (no cuda)|Win32
-		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release (no cuda)|x64.ActiveCfg = Release (no cuda)|x64
-		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release (no cuda)|x64.Build.0 = Release (no cuda)|x64
+		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug-CUDA|Mixed Platforms.ActiveCfg = Debug-CUDA|x64
+		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug-CUDA|Mixed Platforms.Build.0 = Debug-CUDA|x64
+		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug-CUDA|Win32.ActiveCfg = Debug-CUDA|Win32
+		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug-CUDA|Win32.Build.0 = Debug-CUDA|Win32
+		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug-CUDA|x64.ActiveCfg = Debug-CUDA|x64
+		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug-CUDA|x64.Build.0 = Debug-CUDA|x64
 		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release|Mixed Platforms.ActiveCfg = Release|x64
 		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release|Mixed Platforms.Build.0 = Release|x64
 		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release|Win32.ActiveCfg = Release|Win32
 		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release|Win32.Build.0 = Release|Win32
 		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release|x64.ActiveCfg = Release|x64
 		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release|x64.Build.0 = Release|x64
-		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug|x64
-		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug|x64
-		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32
-		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug (no cuda)|Win32.Build.0 = Debug|Win32
-		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug (no cuda)|x64.ActiveCfg = Debug|x64
-		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug (no cuda)|x64.Build.0 = Debug|x64
+		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release-CUDA|Mixed Platforms.ActiveCfg = Release-CUDA|x64
+		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release-CUDA|Mixed Platforms.Build.0 = Release-CUDA|x64
+		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release-CUDA|Win32.ActiveCfg = Release-CUDA|Win32
+		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release-CUDA|Win32.Build.0 = Release-CUDA|Win32
+		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release-CUDA|x64.ActiveCfg = Release-CUDA|x64
+		{88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release-CUDA|x64.Build.0 = Release-CUDA|x64
 		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug|Mixed Platforms.ActiveCfg = Debug|x64
 		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug|Mixed Platforms.Build.0 = Debug|x64
 		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug|Win32.ActiveCfg = Debug|Win32
 		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug|Win32.Build.0 = Debug|Win32
 		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug|x64.ActiveCfg = Debug|x64
 		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug|x64.Build.0 = Debug|x64
-		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release|x64
-		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Release (no cuda)|Mixed Platforms.Build.0 = Release|x64
-		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Release (no cuda)|Win32.ActiveCfg = Release|Win32
-		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Release (no cuda)|Win32.Build.0 = Release|Win32
-		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Release (no cuda)|x64.ActiveCfg = Release|x64
-		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Release (no cuda)|x64.Build.0 = Release|x64
+		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug-CUDA|Mixed Platforms.ActiveCfg = Debug|x64
+		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug-CUDA|Mixed Platforms.Build.0 = Debug|x64
+		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug-CUDA|Win32.ActiveCfg = Debug|Win32
+		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug-CUDA|Win32.Build.0 = Debug|Win32
+		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug-CUDA|x64.ActiveCfg = Debug|x64
+		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug-CUDA|x64.Build.0 = Debug|x64
 		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Release|Mixed Platforms.ActiveCfg = Release|x64
 		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Release|Mixed Platforms.Build.0 = Release|x64
 		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Release|Win32.ActiveCfg = Release|Win32
 		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Release|Win32.Build.0 = Release|Win32
 		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Release|x64.ActiveCfg = Release|x64
 		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Release|x64.Build.0 = Release|x64
-		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug|x64
-		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug|x64
-		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32
-		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug (no cuda)|Win32.Build.0 = Debug|Win32
-		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug (no cuda)|x64.ActiveCfg = Debug|x64
-		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug (no cuda)|x64.Build.0 = Debug|x64
+		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Release-CUDA|Mixed Platforms.ActiveCfg = Release|x64
+		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Release-CUDA|Mixed Platforms.Build.0 = Release|x64
+		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Release-CUDA|Win32.ActiveCfg = Release|Win32
+		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Release-CUDA|Win32.Build.0 = Release|Win32
+		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Release-CUDA|x64.ActiveCfg = Release|x64
+		{4046F392-A18B-4C66-9639-3EABFFF5D531}.Release-CUDA|x64.Build.0 = Release|x64
 		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug|Mixed Platforms.ActiveCfg = Debug|x64
 		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug|Mixed Platforms.Build.0 = Debug|x64
 		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug|Win32.ActiveCfg = Debug|Win32
 		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug|Win32.Build.0 = Debug|Win32
 		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug|x64.ActiveCfg = Debug|x64
 		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug|x64.Build.0 = Debug|x64
-		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release|x64
-		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release (no cuda)|Mixed Platforms.Build.0 = Release|x64
-		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release (no cuda)|Win32.ActiveCfg = Release|Win32
-		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release (no cuda)|Win32.Build.0 = Release|Win32
-		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release (no cuda)|x64.ActiveCfg = Release|x64
-		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release (no cuda)|x64.Build.0 = Release|x64
+		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug-CUDA|Mixed Platforms.ActiveCfg = Debug|x64
+		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug-CUDA|Mixed Platforms.Build.0 = Debug|x64
+		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug-CUDA|Win32.ActiveCfg = Debug|Win32
+		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug-CUDA|Win32.Build.0 = Debug|Win32
+		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug-CUDA|x64.ActiveCfg = Debug|x64
+		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug-CUDA|x64.Build.0 = Debug|x64
 		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release|Mixed Platforms.ActiveCfg = Release|x64
 		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release|Mixed Platforms.Build.0 = Release|x64
 		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release|Win32.ActiveCfg = Release|Win32
 		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release|Win32.Build.0 = Release|Win32
 		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release|x64.ActiveCfg = Release|x64
 		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release|x64.Build.0 = Release|x64
-		{50C465FE-B308-42BC-894D-89484482AF06}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug|x64
-		{50C465FE-B308-42BC-894D-89484482AF06}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug|x64
-		{50C465FE-B308-42BC-894D-89484482AF06}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32
-		{50C465FE-B308-42BC-894D-89484482AF06}.Debug (no cuda)|Win32.Build.0 = Debug|Win32
-		{50C465FE-B308-42BC-894D-89484482AF06}.Debug (no cuda)|x64.ActiveCfg = Debug|x64
-		{50C465FE-B308-42BC-894D-89484482AF06}.Debug (no cuda)|x64.Build.0 = Debug|x64
+		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release-CUDA|Mixed Platforms.ActiveCfg = Release|x64
+		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release-CUDA|Mixed Platforms.Build.0 = Release|x64
+		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release-CUDA|Win32.ActiveCfg = Release|Win32
+		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release-CUDA|Win32.Build.0 = Release|Win32
+		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release-CUDA|x64.ActiveCfg = Release|x64
+		{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release-CUDA|x64.Build.0 = Release|x64
 		{50C465FE-B308-42BC-894D-89484482AF06}.Debug|Mixed Platforms.ActiveCfg = Debug|x64
 		{50C465FE-B308-42BC-894D-89484482AF06}.Debug|Mixed Platforms.Build.0 = Debug|x64
 		{50C465FE-B308-42BC-894D-89484482AF06}.Debug|Win32.ActiveCfg = Debug|Win32
 		{50C465FE-B308-42BC-894D-89484482AF06}.Debug|Win32.Build.0 = Debug|Win32
 		{50C465FE-B308-42BC-894D-89484482AF06}.Debug|x64.ActiveCfg = Debug|x64
 		{50C465FE-B308-42BC-894D-89484482AF06}.Debug|x64.Build.0 = Debug|x64
-		{50C465FE-B308-42BC-894D-89484482AF06}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release|x64
-		{50C465FE-B308-42BC-894D-89484482AF06}.Release (no cuda)|Mixed Platforms.Build.0 = Release|x64
-		{50C465FE-B308-42BC-894D-89484482AF06}.Release (no cuda)|Win32.ActiveCfg = Release|Win32
-		{50C465FE-B308-42BC-894D-89484482AF06}.Release (no cuda)|Win32.Build.0 = Release|Win32
-		{50C465FE-B308-42BC-894D-89484482AF06}.Release (no cuda)|x64.ActiveCfg = Release|x64
-		{50C465FE-B308-42BC-894D-89484482AF06}.Release (no cuda)|x64.Build.0 = Release|x64
+		{50C465FE-B308-42BC-894D-89484482AF06}.Debug-CUDA|Mixed Platforms.ActiveCfg = Debug|x64
+		{50C465FE-B308-42BC-894D-89484482AF06}.Debug-CUDA|Mixed Platforms.Build.0 = Debug|x64
+		{50C465FE-B308-42BC-894D-89484482AF06}.Debug-CUDA|Win32.ActiveCfg = Debug|Win32
+		{50C465FE-B308-42BC-894D-89484482AF06}.Debug-CUDA|Win32.Build.0 = Debug|Win32
+		{50C465FE-B308-42BC-894D-89484482AF06}.Debug-CUDA|x64.ActiveCfg = Debug|x64
+		{50C465FE-B308-42BC-894D-89484482AF06}.Debug-CUDA|x64.Build.0 = Debug|x64
 		{50C465FE-B308-42BC-894D-89484482AF06}.Release|Mixed Platforms.ActiveCfg = Release|x64
 		{50C465FE-B308-42BC-894D-89484482AF06}.Release|Mixed Platforms.Build.0 = Release|x64
 		{50C465FE-B308-42BC-894D-89484482AF06}.Release|Win32.ActiveCfg = Release|Win32
 		{50C465FE-B308-42BC-894D-89484482AF06}.Release|Win32.Build.0 = Release|Win32
 		{50C465FE-B308-42BC-894D-89484482AF06}.Release|x64.ActiveCfg = Release|x64
 		{50C465FE-B308-42BC-894D-89484482AF06}.Release|x64.Build.0 = Release|x64
-		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug|x64
-		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug|x64
-		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32
-		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug (no cuda)|Win32.Build.0 = Debug|Win32
-		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug (no cuda)|x64.ActiveCfg = Debug|x64
-		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug (no cuda)|x64.Build.0 = Debug|x64
+		{50C465FE-B308-42BC-894D-89484482AF06}.Release-CUDA|Mixed Platforms.ActiveCfg = Release|x64
+		{50C465FE-B308-42BC-894D-89484482AF06}.Release-CUDA|Mixed Platforms.Build.0 = Release|x64
+		{50C465FE-B308-42BC-894D-89484482AF06}.Release-CUDA|Win32.ActiveCfg = Release|Win32
+		{50C465FE-B308-42BC-894D-89484482AF06}.Release-CUDA|Win32.Build.0 = Release|Win32
+		{50C465FE-B308-42BC-894D-89484482AF06}.Release-CUDA|x64.ActiveCfg = Release|x64
+		{50C465FE-B308-42BC-894D-89484482AF06}.Release-CUDA|x64.Build.0 = Release|x64
 		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug|Mixed Platforms.ActiveCfg = Debug|x64
 		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug|Mixed Platforms.Build.0 = Debug|x64
 		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug|Win32.ActiveCfg = Debug|Win32
 		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug|Win32.Build.0 = Debug|Win32
 		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug|x64.ActiveCfg = Debug|x64
 		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug|x64.Build.0 = Debug|x64
-		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release|x64
-		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release (no cuda)|Mixed Platforms.Build.0 = Release|x64
-		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release (no cuda)|Win32.ActiveCfg = Release|Win32
-		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release (no cuda)|Win32.Build.0 = Release|Win32
-		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release (no cuda)|x64.ActiveCfg = Release|x64
-		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release (no cuda)|x64.Build.0 = Release|x64
+		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug-CUDA|Mixed Platforms.ActiveCfg = Debug|x64
+		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug-CUDA|Mixed Platforms.Build.0 = Debug|x64
+		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug-CUDA|Win32.ActiveCfg = Debug|Win32
+		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug-CUDA|Win32.Build.0 = Debug|Win32
+		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug-CUDA|x64.ActiveCfg = Debug|x64
+		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug-CUDA|x64.Build.0 = Debug|x64
 		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release|Mixed Platforms.ActiveCfg = Release|x64
 		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release|Mixed Platforms.Build.0 = Release|x64
 		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release|Win32.ActiveCfg = Release|Win32
 		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release|Win32.Build.0 = Release|Win32
 		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release|x64.ActiveCfg = Release|x64
 		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release|x64.Build.0 = Release|x64
-		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug|x64
-		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug|x64
-		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32
-		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug (no cuda)|Win32.Build.0 = Debug|Win32
-		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug (no cuda)|x64.ActiveCfg = Debug|x64
-		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug (no cuda)|x64.Build.0 = Debug|x64
+		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release-CUDA|Mixed Platforms.ActiveCfg = Release|x64
+		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release-CUDA|Mixed Platforms.Build.0 = Release|x64
+		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release-CUDA|Win32.ActiveCfg = Release|Win32
+		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release-CUDA|Win32.Build.0 = Release|Win32
+		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release-CUDA|x64.ActiveCfg = Release|x64
+		{CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release-CUDA|x64.Build.0 = Release|x64
 		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug|Mixed Platforms.ActiveCfg = Debug|x64
 		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug|Mixed Platforms.Build.0 = Debug|x64
 		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug|Win32.ActiveCfg = Debug|Win32
 		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug|Win32.Build.0 = Debug|Win32
 		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug|x64.ActiveCfg = Debug|x64
 		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug|x64.Build.0 = Debug|x64
-		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release|x64
-		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Release (no cuda)|Mixed Platforms.Build.0 = Release|x64
-		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Release (no cuda)|Win32.ActiveCfg = Release|Win32
-		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Release (no cuda)|Win32.Build.0 = Release|Win32
-		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Release (no cuda)|x64.ActiveCfg = Release|x64
-		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Release (no cuda)|x64.Build.0 = Release|x64
+		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug-CUDA|Mixed Platforms.ActiveCfg = Debug|x64
+		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug-CUDA|Mixed Platforms.Build.0 = Debug|x64
+		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug-CUDA|Win32.ActiveCfg = Debug|Win32
+		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug-CUDA|Win32.Build.0 = Debug|Win32
+		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug-CUDA|x64.ActiveCfg = Debug|x64
+		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug-CUDA|x64.Build.0 = Debug|x64
 		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Release|Mixed Platforms.ActiveCfg = Release|x64
 		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Release|Mixed Platforms.Build.0 = Release|x64
 		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Release|Win32.ActiveCfg = Release|Win32
 		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Release|Win32.Build.0 = Release|Win32
 		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Release|x64.ActiveCfg = Release|x64
 		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Release|x64.Build.0 = Release|x64
-		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug|x64
-		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug|x64
-		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32
-		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug (no cuda)|Win32.Build.0 = Debug|Win32
-		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug (no cuda)|x64.ActiveCfg = Debug|x64
-		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug (no cuda)|x64.Build.0 = Debug|x64
+		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Release-CUDA|Mixed Platforms.ActiveCfg = Release|x64
+		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Release-CUDA|Mixed Platforms.Build.0 = Release|x64
+		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Release-CUDA|Win32.ActiveCfg = Release|Win32
+		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Release-CUDA|Win32.Build.0 = Release|Win32
+		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Release-CUDA|x64.ActiveCfg = Release|x64
+		{841B73C5-C679-4EEF-A50A-7D6106642B49}.Release-CUDA|x64.Build.0 = Release|x64
 		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug|Mixed Platforms.ActiveCfg = Debug|x64
 		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug|Mixed Platforms.Build.0 = Debug|x64
 		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug|Win32.ActiveCfg = Debug|Win32
 		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug|Win32.Build.0 = Debug|Win32
 		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug|x64.ActiveCfg = Debug|x64
 		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug|x64.Build.0 = Debug|x64
-		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release|x64
-		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release (no cuda)|Mixed Platforms.Build.0 = Release|x64
-		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release (no cuda)|Win32.ActiveCfg = Release|Win32
-		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release (no cuda)|Win32.Build.0 = Release|Win32
-		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release (no cuda)|x64.ActiveCfg = Release|x64
-		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release (no cuda)|x64.Build.0 = Release|x64
+		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug-CUDA|Mixed Platforms.ActiveCfg = Debug|x64
+		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug-CUDA|Mixed Platforms.Build.0 = Debug|x64
+		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug-CUDA|Win32.ActiveCfg = Debug|Win32
+		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug-CUDA|Win32.Build.0 = Debug|Win32
+		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug-CUDA|x64.ActiveCfg = Debug|x64
+		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug-CUDA|x64.Build.0 = Debug|x64
 		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release|Mixed Platforms.ActiveCfg = Release|x64
 		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release|Mixed Platforms.Build.0 = Release|x64
 		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release|Win32.ActiveCfg = Release|Win32
 		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release|Win32.Build.0 = Release|Win32
 		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release|x64.ActiveCfg = Release|x64
 		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release|x64.Build.0 = Release|x64
-		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug|x64
-		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug|x64
-		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32
-		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug (no cuda)|Win32.Build.0 = Debug|Win32
-		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug (no cuda)|x64.ActiveCfg = Debug|x64
-		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug (no cuda)|x64.Build.0 = Debug|x64
+		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release-CUDA|Mixed Platforms.ActiveCfg = Release|x64
+		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release-CUDA|Mixed Platforms.Build.0 = Release|x64
+		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release-CUDA|Win32.ActiveCfg = Release|Win32
+		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release-CUDA|Win32.Build.0 = Release|Win32
+		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release-CUDA|x64.ActiveCfg = Release|x64
+		{75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release-CUDA|x64.Build.0 = Release|x64
 		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug|Mixed Platforms.ActiveCfg = Debug|x64
 		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug|Mixed Platforms.Build.0 = Debug|x64
 		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug|Win32.ActiveCfg = Debug|Win32
 		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug|Win32.Build.0 = Debug|Win32
 		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug|x64.ActiveCfg = Debug|x64
 		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug|x64.Build.0 = Debug|x64
-		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release|x64
-		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release (no cuda)|Mixed Platforms.Build.0 = Release|x64
-		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release (no cuda)|Win32.ActiveCfg = Release|Win32
-		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release (no cuda)|Win32.Build.0 = Release|Win32
-		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release (no cuda)|x64.ActiveCfg = Release|x64
-		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release (no cuda)|x64.Build.0 = Release|x64
+		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug-CUDA|Mixed Platforms.ActiveCfg = Debug|x64
+		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug-CUDA|Mixed Platforms.Build.0 = Debug|x64
+		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug-CUDA|Win32.ActiveCfg = Debug|Win32
+		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug-CUDA|Win32.Build.0 = Debug|Win32
+		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug-CUDA|x64.ActiveCfg = Debug|x64
+		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug-CUDA|x64.Build.0 = Debug|x64
 		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release|Mixed Platforms.ActiveCfg = Release|x64
 		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release|Mixed Platforms.Build.0 = Release|x64
 		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release|Win32.ActiveCfg = Release|Win32
 		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release|Win32.Build.0 = Release|Win32
 		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release|x64.ActiveCfg = Release|x64
 		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release|x64.Build.0 = Release|x64
-		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug|x64
-		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug|x64
-		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32
-		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug (no cuda)|Win32.Build.0 = Debug|Win32
-		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug (no cuda)|x64.ActiveCfg = Debug|x64
-		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug (no cuda)|x64.Build.0 = Debug|x64
+		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release-CUDA|Mixed Platforms.ActiveCfg = Release|x64
+		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release-CUDA|Mixed Platforms.Build.0 = Release|x64
+		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release-CUDA|Win32.ActiveCfg = Release|Win32
+		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release-CUDA|Win32.Build.0 = Release|Win32
+		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release-CUDA|x64.ActiveCfg = Release|x64
+		{05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release-CUDA|x64.Build.0 = Release|x64
 		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug|Mixed Platforms.ActiveCfg = Debug|x64
 		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug|Mixed Platforms.Build.0 = Debug|x64
 		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug|Win32.ActiveCfg = Debug|Win32
 		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug|Win32.Build.0 = Debug|Win32
 		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug|x64.ActiveCfg = Debug|x64
 		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug|x64.Build.0 = Debug|x64
-		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release|x64
-		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release (no cuda)|Mixed Platforms.Build.0 = Release|x64
-		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release (no cuda)|Win32.ActiveCfg = Release|Win32
-		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release (no cuda)|Win32.Build.0 = Release|Win32
-		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release (no cuda)|x64.ActiveCfg = Release|x64
-		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release (no cuda)|x64.Build.0 = Release|x64
+		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug-CUDA|Mixed Platforms.ActiveCfg = Debug|x64
+		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug-CUDA|Mixed Platforms.Build.0 = Debug|x64
+		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug-CUDA|Win32.ActiveCfg = Debug|Win32
+		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug-CUDA|Win32.Build.0 = Debug|Win32
+		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug-CUDA|x64.ActiveCfg = Debug|x64
+		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug-CUDA|x64.Build.0 = Debug|x64
 		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release|Mixed Platforms.ActiveCfg = Release|x64
 		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release|Mixed Platforms.Build.0 = Release|x64
 		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release|Win32.ActiveCfg = Release|Win32
 		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release|Win32.Build.0 = Release|Win32
 		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release|x64.ActiveCfg = Release|x64
 		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release|x64.Build.0 = Release|x64
-		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug|x64
-		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug|x64
-		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32
-		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug (no cuda)|Win32.Build.0 = Debug|Win32
-		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug (no cuda)|x64.ActiveCfg = Debug|x64
-		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug (no cuda)|x64.Build.0 = Debug|x64
+		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release-CUDA|Mixed Platforms.ActiveCfg = Release|x64
+		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release-CUDA|Mixed Platforms.Build.0 = Release|x64
+		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release-CUDA|Win32.ActiveCfg = Release|Win32
+		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release-CUDA|Win32.Build.0 = Release|Win32
+		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release-CUDA|x64.ActiveCfg = Release|x64
+		{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release-CUDA|x64.Build.0 = Release|x64
 		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug|Mixed Platforms.ActiveCfg = Debug|x64
 		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug|Mixed Platforms.Build.0 = Debug|x64
 		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug|Win32.ActiveCfg = Debug|Win32
 		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug|Win32.Build.0 = Debug|Win32
 		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug|x64.ActiveCfg = Debug|x64
 		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug|x64.Build.0 = Debug|x64
-		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release|x64
-		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release (no cuda)|Mixed Platforms.Build.0 = Release|x64
-		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release (no cuda)|Win32.ActiveCfg = Release|Win32
-		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release (no cuda)|Win32.Build.0 = Release|Win32
-		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release (no cuda)|x64.ActiveCfg = Release|x64
-		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release (no cuda)|x64.Build.0 = Release|x64
+		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug-CUDA|Mixed Platforms.ActiveCfg = Debug|x64
+		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug-CUDA|Mixed Platforms.Build.0 = Debug|x64
+		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug-CUDA|Win32.ActiveCfg = Debug|Win32
+		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug-CUDA|Win32.Build.0 = Debug|Win32
+		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug-CUDA|x64.ActiveCfg = Debug|x64
+		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug-CUDA|x64.Build.0 = Debug|x64
 		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release|Mixed Platforms.ActiveCfg = Release|x64
 		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release|Mixed Platforms.Build.0 = Release|x64
 		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release|Win32.ActiveCfg = Release|Win32
 		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release|Win32.Build.0 = Release|Win32
 		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release|x64.ActiveCfg = Release|x64
 		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release|x64.Build.0 = Release|x64
-		{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug|Any CPU
-		{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug|Any CPU
-		{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug (no cuda)|Win32.ActiveCfg = Debug|Any CPU
-		{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug (no cuda)|Win32.Build.0 = Debug|Any CPU
-		{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug (no cuda)|x64.ActiveCfg = Debug|Any CPU
-		{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug (no cuda)|x64.Build.0 = Debug|Any CPU
+		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release-CUDA|Mixed Platforms.ActiveCfg = Release|x64
+		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release-CUDA|Mixed Platforms.Build.0 = Release|x64
+		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release-CUDA|Win32.ActiveCfg = Release|Win32
+		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release-CUDA|Win32.Build.0 = Release|Win32
+		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release-CUDA|x64.ActiveCfg = Release|x64
+		{51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release-CUDA|x64.Build.0 = Release|x64
 		{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
 		{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
 		{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug|Win32.ActiveCfg = Debug|Any CPU
 		{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug|Win32.Build.0 = Debug|Any CPU
 		{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug|x64.ActiveCfg = Debug|Any CPU
-		{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release|Any CPU
-		{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release (no cuda)|Mixed Platforms.Build.0 = Release|Any CPU
-		{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release (no cuda)|Win32.ActiveCfg = Release|Any CPU
-		{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release (no cuda)|Win32.Build.0 = Release|Any CPU
-		{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release (no cuda)|x64.ActiveCfg = Release|Any CPU
+		{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug|x64.Build.0 = Debug|Any CPU
+		{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug-CUDA|Mixed Platforms.ActiveCfg = Debug|Any CPU
+		{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug-CUDA|Mixed Platforms.Build.0 = Debug|Any CPU
+		{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug-CUDA|Win32.ActiveCfg = Debug|Any CPU
+		{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug-CUDA|Win32.Build.0 = Debug|Any CPU
+		{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug-CUDA|x64.ActiveCfg = Debug|Any CPU
 		{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
 		{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|Mixed Platforms.Build.0 = Release|Any CPU
 		{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|Win32.ActiveCfg = Release|Any CPU
 		{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|Win32.Build.0 = Release|Any CPU
 		{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|x64.ActiveCfg = Release|Any CPU
-		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug|Win32
-		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug|Win32
-		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32
-		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Win32.Build.0 = Debug|Win32
-		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|x64.ActiveCfg = Debug|Win32
+		{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release-CUDA|Mixed Platforms.ActiveCfg = Release|Any CPU
+		{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release-CUDA|Mixed Platforms.Build.0 = Release|Any CPU
+		{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release-CUDA|Win32.ActiveCfg = Release|Any CPU
+		{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release-CUDA|Win32.Build.0 = Release|Any CPU
+		{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release-CUDA|x64.ActiveCfg = Release|Any CPU
 		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Mixed Platforms.ActiveCfg = Debug|Win32
 		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Mixed Platforms.Build.0 = Debug|Win32
 		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Win32.ActiveCfg = Debug|Win32
 		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Win32.Build.0 = Debug|Win32
 		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|x64.ActiveCfg = Debug|Win32
-		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release|Win32
-		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Mixed Platforms.Build.0 = Release|Win32
-		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Win32.ActiveCfg = Release|Win32
-		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Win32.Build.0 = Release|Win32
-		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|x64.ActiveCfg = Release|Win32
+		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug-CUDA|Mixed Platforms.ActiveCfg = Debug|Win32
+		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug-CUDA|Mixed Platforms.Build.0 = Debug|Win32
+		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug-CUDA|Win32.ActiveCfg = Debug|Win32
+		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug-CUDA|Win32.Build.0 = Debug|Win32
+		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug-CUDA|x64.ActiveCfg = Debug|Win32
 		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Mixed Platforms.ActiveCfg = Release|Win32
 		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Mixed Platforms.Build.0 = Release|Win32
 		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Win32.ActiveCfg = Release|Win32
 		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Win32.Build.0 = Release|Win32
 		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|x64.ActiveCfg = Release|Win32
-		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug|Win32
-		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug|Win32
-		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32
-		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Debug (no cuda)|Win32.Build.0 = Debug|Win32
-		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Debug (no cuda)|x64.ActiveCfg = Debug|x64
-		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Debug (no cuda)|x64.Build.0 = Debug|x64
+		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release-CUDA|Mixed Platforms.ActiveCfg = Release|Win32
+		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release-CUDA|Mixed Platforms.Build.0 = Release|Win32
+		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release-CUDA|Win32.ActiveCfg = Release|Win32
+		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release-CUDA|Win32.Build.0 = Release|Win32
+		{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release-CUDA|x64.ActiveCfg = Release|Win32
 		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Debug|Mixed Platforms.ActiveCfg = Debug|Win32
 		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Debug|Mixed Platforms.Build.0 = Debug|Win32
 		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Debug|Win32.ActiveCfg = Debug|Win32
 		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Debug|Win32.Build.0 = Debug|Win32
 		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Debug|x64.ActiveCfg = Debug|x64
 		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Debug|x64.Build.0 = Debug|x64
-		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release|x64
-		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Release (no cuda)|Mixed Platforms.Build.0 = Release|x64
-		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Release (no cuda)|Win32.ActiveCfg = Release|Win32
-		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Release (no cuda)|Win32.Build.0 = Release|Win32
-		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Release (no cuda)|x64.ActiveCfg = Release|x64
-		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Release (no cuda)|x64.Build.0 = Release|x64
-		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Release|Mixed Platforms.ActiveCfg = Release|Win32
-		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Release|Mixed Platforms.Build.0 = Release|Win32
+		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Debug-CUDA|Mixed Platforms.ActiveCfg = Debug|Win32
+		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Debug-CUDA|Mixed Platforms.Build.0 = Debug|Win32
+		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Debug-CUDA|Win32.ActiveCfg = Debug|Win32
+		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Debug-CUDA|Win32.Build.0 = Debug|Win32
+		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Debug-CUDA|x64.ActiveCfg = Debug|x64
+		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Debug-CUDA|x64.Build.0 = Debug|x64
+		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Release|Mixed Platforms.ActiveCfg = Release|x64
+		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Release|Mixed Platforms.Build.0 = Release|x64
 		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Release|Win32.ActiveCfg = Release|Win32
 		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Release|Win32.Build.0 = Release|Win32
 		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Release|x64.ActiveCfg = Release|x64
 		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Release|x64.Build.0 = Release|x64
-		{C33787E3-5564-4834-9FE3-A9020455A669}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug|Win32
-		{C33787E3-5564-4834-9FE3-A9020455A669}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug|Win32
-		{C33787E3-5564-4834-9FE3-A9020455A669}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32
-		{C33787E3-5564-4834-9FE3-A9020455A669}.Debug (no cuda)|Win32.Build.0 = Debug|Win32
-		{C33787E3-5564-4834-9FE3-A9020455A669}.Debug (no cuda)|x64.ActiveCfg = Debug|x64
-		{C33787E3-5564-4834-9FE3-A9020455A669}.Debug (no cuda)|x64.Build.0 = Debug|x64
+		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Release-CUDA|Mixed Platforms.ActiveCfg = Release|Win32
+		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Release-CUDA|Mixed Platforms.Build.0 = Release|Win32
+		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Release-CUDA|Win32.ActiveCfg = Release|Win32
+		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Release-CUDA|Win32.Build.0 = Release|Win32
+		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Release-CUDA|x64.ActiveCfg = Release|x64
+		{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Release-CUDA|x64.Build.0 = Release|x64
 		{C33787E3-5564-4834-9FE3-A9020455A669}.Debug|Mixed Platforms.ActiveCfg = Debug|Win32
 		{C33787E3-5564-4834-9FE3-A9020455A669}.Debug|Mixed Platforms.Build.0 = Debug|Win32
 		{C33787E3-5564-4834-9FE3-A9020455A669}.Debug|Win32.ActiveCfg = Debug|Win32
 		{C33787E3-5564-4834-9FE3-A9020455A669}.Debug|Win32.Build.0 = Debug|Win32
 		{C33787E3-5564-4834-9FE3-A9020455A669}.Debug|x64.ActiveCfg = Debug|x64
 		{C33787E3-5564-4834-9FE3-A9020455A669}.Debug|x64.Build.0 = Debug|x64
-		{C33787E3-5564-4834-9FE3-A9020455A669}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release|Win32
-		{C33787E3-5564-4834-9FE3-A9020455A669}.Release (no cuda)|Mixed Platforms.Build.0 = Release|Win32
-		{C33787E3-5564-4834-9FE3-A9020455A669}.Release (no cuda)|Win32.ActiveCfg = Release|Win32
-		{C33787E3-5564-4834-9FE3-A9020455A669}.Release (no cuda)|Win32.Build.0 = Release|Win32
-		{C33787E3-5564-4834-9FE3-A9020455A669}.Release (no cuda)|x64.ActiveCfg = Release|x64
-		{C33787E3-5564-4834-9FE3-A9020455A669}.Release (no cuda)|x64.Build.0 = Release|x64
+		{C33787E3-5564-4834-9FE3-A9020455A669}.Debug-CUDA|Mixed Platforms.ActiveCfg = Debug|Win32
+		{C33787E3-5564-4834-9FE3-A9020455A669}.Debug-CUDA|Mixed Platforms.Build.0 = Debug|Win32
+		{C33787E3-5564-4834-9FE3-A9020455A669}.Debug-CUDA|Win32.ActiveCfg = Debug|Win32
+		{C33787E3-5564-4834-9FE3-A9020455A669}.Debug-CUDA|Win32.Build.0 = Debug|Win32
+		{C33787E3-5564-4834-9FE3-A9020455A669}.Debug-CUDA|x64.ActiveCfg = Debug|x64
+		{C33787E3-5564-4834-9FE3-A9020455A669}.Debug-CUDA|x64.Build.0 = Debug|x64
 		{C33787E3-5564-4834-9FE3-A9020455A669}.Release|Mixed Platforms.ActiveCfg = Release|Win32
 		{C33787E3-5564-4834-9FE3-A9020455A669}.Release|Mixed Platforms.Build.0 = Release|Win32
 		{C33787E3-5564-4834-9FE3-A9020455A669}.Release|Win32.ActiveCfg = Release|Win32
 		{C33787E3-5564-4834-9FE3-A9020455A669}.Release|Win32.Build.0 = Release|Win32
 		{C33787E3-5564-4834-9FE3-A9020455A669}.Release|x64.ActiveCfg = Release|x64
 		{C33787E3-5564-4834-9FE3-A9020455A669}.Release|x64.Build.0 = Release|x64
-		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug|x64
-		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug|x64
-		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug (no cuda)|Win32.ActiveCfg = Debug|x64
-		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug (no cuda)|x64.ActiveCfg = Debug|x64
-		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug (no cuda)|x64.Build.0 = Debug|x64
+		{C33787E3-5564-4834-9FE3-A9020455A669}.Release-CUDA|Mixed Platforms.ActiveCfg = Release|Win32
+		{C33787E3-5564-4834-9FE3-A9020455A669}.Release-CUDA|Mixed Platforms.Build.0 = Release|Win32
+		{C33787E3-5564-4834-9FE3-A9020455A669}.Release-CUDA|Win32.ActiveCfg = Release|Win32
+		{C33787E3-5564-4834-9FE3-A9020455A669}.Release-CUDA|Win32.Build.0 = Release|Win32
+		{C33787E3-5564-4834-9FE3-A9020455A669}.Release-CUDA|x64.ActiveCfg = Release|x64
+		{C33787E3-5564-4834-9FE3-A9020455A669}.Release-CUDA|x64.Build.0 = Release|x64
 		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug|Mixed Platforms.ActiveCfg = Debug|x64
 		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug|Mixed Platforms.Build.0 = Debug|x64
 		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug|Win32.ActiveCfg = Debug|Win32
 		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug|Win32.Build.0 = Debug|Win32
 		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug|x64.ActiveCfg = Debug|x64
 		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug|x64.Build.0 = Debug|x64
-		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release|x64
-		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release (no cuda)|Mixed Platforms.Build.0 = Release|x64
-		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release (no cuda)|Win32.ActiveCfg = Release|x64
-		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release (no cuda)|x64.ActiveCfg = Release|x64
-		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release (no cuda)|x64.Build.0 = Release|x64
+		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug-CUDA|Mixed Platforms.ActiveCfg = Debug|x64
+		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug-CUDA|Mixed Platforms.Build.0 = Debug|x64
+		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug-CUDA|Win32.ActiveCfg = Debug|Win32
+		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug-CUDA|Win32.Build.0 = Debug|Win32
+		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug-CUDA|x64.ActiveCfg = Debug|x64
+		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug-CUDA|x64.Build.0 = Debug|x64
 		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release|Mixed Platforms.ActiveCfg = Release|x64
 		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release|Mixed Platforms.Build.0 = Release|x64
 		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release|Win32.ActiveCfg = Release|Win32
 		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release|Win32.Build.0 = Release|Win32
 		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release|x64.ActiveCfg = Release|x64
 		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release|x64.Build.0 = Release|x64
-		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug|x64
-		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug|x64
-		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Debug (no cuda)|Win32.ActiveCfg = Debug|x64
-		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Debug (no cuda)|x64.ActiveCfg = Debug|x64
-		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Debug (no cuda)|x64.Build.0 = Debug|x64
+		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release-CUDA|Mixed Platforms.ActiveCfg = Release|x64
+		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release-CUDA|Mixed Platforms.Build.0 = Release|x64
+		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release-CUDA|Win32.ActiveCfg = Release|Win32
+		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release-CUDA|Win32.Build.0 = Release|Win32
+		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release-CUDA|x64.ActiveCfg = Release|x64
+		{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release-CUDA|x64.Build.0 = Release|x64
 		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Debug|Mixed Platforms.ActiveCfg = Debug|x64
 		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Debug|Mixed Platforms.Build.0 = Debug|x64
 		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Debug|Win32.ActiveCfg = Debug|Win32
 		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Debug|Win32.Build.0 = Debug|Win32
 		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Debug|x64.ActiveCfg = Debug|x64
 		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Debug|x64.Build.0 = Debug|x64
-		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release|x64
-		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Release (no cuda)|Mixed Platforms.Build.0 = Release|x64
-		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Release (no cuda)|Win32.ActiveCfg = Release|x64
-		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Release (no cuda)|x64.ActiveCfg = Release|x64
-		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Release (no cuda)|x64.Build.0 = Release|x64
+		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Debug-CUDA|Mixed Platforms.ActiveCfg = Debug|x64
+		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Debug-CUDA|Mixed Platforms.Build.0 = Debug|x64
+		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Debug-CUDA|Win32.ActiveCfg = Debug|Win32
+		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Debug-CUDA|Win32.Build.0 = Debug|Win32
+		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Debug-CUDA|x64.ActiveCfg = Debug|x64
+		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Debug-CUDA|x64.Build.0 = Debug|x64
 		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Release|Mixed Platforms.ActiveCfg = Release|x64
 		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Release|Mixed Platforms.Build.0 = Release|x64
 		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Release|Win32.ActiveCfg = Release|Win32
 		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Release|Win32.Build.0 = Release|Win32
 		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Release|x64.ActiveCfg = Release|x64
 		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Release|x64.Build.0 = Release|x64
+		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Release-CUDA|Mixed Platforms.ActiveCfg = Release|x64
+		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Release-CUDA|Mixed Platforms.Build.0 = Release|x64
+		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Release-CUDA|Win32.ActiveCfg = Release|Win32
+		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Release-CUDA|Win32.Build.0 = Release|Win32
+		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Release-CUDA|x64.ActiveCfg = Release|x64
+		{CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Release-CUDA|x64.Build.0 = Release|x64
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
diff --git a/project/vc9/nvtt/nvtt.vcproj b/project/vc9/nvtt/nvtt.vcproj
index 8a867c8..eab12ce 100644
--- a/project/vc9/nvtt/nvtt.vcproj
+++ b/project/vc9/nvtt/nvtt.vcproj
@@ -20,7 +20,7 @@
 	</ToolFiles>
 	<Configurations>
 		<Configuration
-			Name="Debug|Win32"
+			Name="Debug-CUDA|Win32"
 			OutputDirectory="$(ConfigurationName)\$(PlatformName)"
 			IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
 			ConfigurationType="2"
@@ -101,7 +101,7 @@
 			/>
 		</Configuration>
 		<Configuration
-			Name="Debug|x64"
+			Name="Debug-CUDA|x64"
 			OutputDirectory="$(ConfigurationName)\$(PlatformName)"
 			IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
 			ConfigurationType="2"
@@ -181,7 +181,7 @@
 			/>
 		</Configuration>
 		<Configuration
-			Name="Release|Win32"
+			Name="Release-CUDA|Win32"
 			OutputDirectory="$(ConfigurationName)\$(PlatformName)"
 			IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
 			ConfigurationType="2"
@@ -270,7 +270,7 @@
 			/>
 		</Configuration>
 		<Configuration
-			Name="Release|x64"
+			Name="Release-CUDA|x64"
 			OutputDirectory="$(ConfigurationName)\$(PlatformName)"
 			IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
 			ConfigurationType="2"
@@ -355,7 +355,7 @@
 			/>
 		</Configuration>
 		<Configuration
-			Name="Debug (no cuda)|Win32"
+			Name="Debug|Win32"
 			OutputDirectory="$(ConfigurationName)\$(PlatformName)"
 			IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
 			ConfigurationType="2"
@@ -436,7 +436,7 @@
 			/>
 		</Configuration>
 		<Configuration
-			Name="Debug (no cuda)|x64"
+			Name="Debug|x64"
 			OutputDirectory="$(ConfigurationName)\$(PlatformName)"
 			IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
 			ConfigurationType="2"
@@ -512,7 +512,7 @@
 			/>
 		</Configuration>
 		<Configuration
-			Name="Release (no cuda)|Win32"
+			Name="Release|Win32"
 			OutputDirectory="$(ConfigurationName)\$(PlatformName)"
 			IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
 			ConfigurationType="2"
@@ -596,7 +596,7 @@
 			/>
 		</Configuration>
 		<Configuration
-			Name="Release (no cuda)|x64"
+			Name="Release|x64"
 			OutputDirectory="$(ConfigurationName)\$(PlatformName)"
 			IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
 			ConfigurationType="2"
@@ -640,7 +640,7 @@
 				Name="VCLinkerTool"
 				OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).dll"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="&quot;$(CUDA_LIB_PATH)\..\lib64&quot;"
+				AdditionalLibraryDirectories=""
 				SubSystem="2"
 				OptimizeReferences="2"
 				EnableCOMDATFolding="2"
@@ -697,7 +697,7 @@
 				RelativePath="..\..\..\src\nvtt\cuda\CompressKernel.cu"
 				>
 				<FileConfiguration
-					Name="Debug|Win32"
+					Name="Debug-CUDA|Win32"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
@@ -707,7 +707,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
+					Name="Debug-CUDA|x64"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
@@ -717,7 +717,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Release-CUDA|Win32"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
@@ -727,7 +727,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|x64"
+					Name="Release-CUDA|x64"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
@@ -737,7 +737,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug (no cuda)|Win32"
+					Name="Debug|Win32"
 					ExcludedFromBuild="true"
 					>
 					<Tool
@@ -748,7 +748,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug (no cuda)|x64"
+					Name="Debug|x64"
 					ExcludedFromBuild="true"
 					>
 					<Tool
@@ -756,7 +756,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release (no cuda)|Win32"
+					Name="Release|Win32"
 					ExcludedFromBuild="true"
 					>
 					<Tool
@@ -767,7 +767,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release (no cuda)|x64"
+					Name="Release|x64"
 					ExcludedFromBuild="true"
 					>
 					<Tool
@@ -779,7 +779,7 @@
 				RelativePath="..\..\..\src\nvtt\cuda\ConvolveKernel.cu"
 				>
 				<FileConfiguration
-					Name="Debug|Win32"
+					Name="Debug-CUDA|Win32"
 					ExcludedFromBuild="true"
 					>
 					<Tool
@@ -787,7 +787,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
+					Name="Debug-CUDA|x64"
 					ExcludedFromBuild="true"
 					>
 					<Tool
@@ -795,7 +795,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Release-CUDA|Win32"
 					ExcludedFromBuild="true"
 					>
 					<Tool
@@ -803,7 +803,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|x64"
+					Name="Release-CUDA|x64"
 					ExcludedFromBuild="true"
 					>
 					<Tool
@@ -811,7 +811,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug (no cuda)|Win32"
+					Name="Debug|Win32"
 					ExcludedFromBuild="true"
 					>
 					<Tool
@@ -819,7 +819,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug (no cuda)|x64"
+					Name="Debug|x64"
 					ExcludedFromBuild="true"
 					>
 					<Tool
@@ -827,7 +827,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release (no cuda)|Win32"
+					Name="Release|Win32"
 					ExcludedFromBuild="true"
 					>
 					<Tool
@@ -835,7 +835,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release (no cuda)|x64"
+					Name="Release|x64"
 					ExcludedFromBuild="true"
 					>
 					<Tool
@@ -896,7 +896,7 @@
 			RelativePath="..\..\..\src\nvtt\CompressorDX11.cpp"
 			>
 			<FileConfiguration
-				Name="Debug|Win32"
+				Name="Debug-CUDA|Win32"
 				ExcludedFromBuild="true"
 				>
 				<Tool
@@ -904,7 +904,7 @@
 				/>
 			</FileConfiguration>
 			<FileConfiguration
-				Name="Debug|x64"
+				Name="Debug-CUDA|x64"
 				ExcludedFromBuild="true"
 				>
 				<Tool
@@ -912,7 +912,7 @@
 				/>
 			</FileConfiguration>
 			<FileConfiguration
-				Name="Release|Win32"
+				Name="Release-CUDA|Win32"
 				ExcludedFromBuild="true"
 				>
 				<Tool
@@ -920,7 +920,7 @@
 				/>
 			</FileConfiguration>
 			<FileConfiguration
-				Name="Release|x64"
+				Name="Release-CUDA|x64"
 				ExcludedFromBuild="true"
 				>
 				<Tool
@@ -928,7 +928,7 @@
 				/>
 			</FileConfiguration>
 			<FileConfiguration
-				Name="Debug (no cuda)|Win32"
+				Name="Debug|Win32"
 				ExcludedFromBuild="true"
 				>
 				<Tool
@@ -936,7 +936,7 @@
 				/>
 			</FileConfiguration>
 			<FileConfiguration
-				Name="Debug (no cuda)|x64"
+				Name="Debug|x64"
 				ExcludedFromBuild="true"
 				>
 				<Tool
@@ -944,7 +944,7 @@
 				/>
 			</FileConfiguration>
 			<FileConfiguration
-				Name="Release (no cuda)|Win32"
+				Name="Release|Win32"
 				ExcludedFromBuild="true"
 				>
 				<Tool
@@ -952,7 +952,7 @@
 				/>
 			</FileConfiguration>
 			<FileConfiguration
-				Name="Release (no cuda)|x64"
+				Name="Release|x64"
 				ExcludedFromBuild="true"
 				>
 				<Tool
@@ -964,7 +964,7 @@
 			RelativePath="..\..\..\src\nvtt\CompressorDX11.h"
 			>
 			<FileConfiguration
-				Name="Debug|Win32"
+				Name="Debug-CUDA|Win32"
 				ExcludedFromBuild="true"
 				>
 				<Tool
@@ -972,7 +972,7 @@
 				/>
 			</FileConfiguration>
 			<FileConfiguration
-				Name="Debug|x64"
+				Name="Debug-CUDA|x64"
 				ExcludedFromBuild="true"
 				>
 				<Tool
@@ -980,7 +980,7 @@
 				/>
 			</FileConfiguration>
 			<FileConfiguration
-				Name="Release|Win32"
+				Name="Release-CUDA|Win32"
 				ExcludedFromBuild="true"
 				>
 				<Tool
@@ -988,7 +988,7 @@
 				/>
 			</FileConfiguration>
 			<FileConfiguration
-				Name="Release|x64"
+				Name="Release-CUDA|x64"
 				ExcludedFromBuild="true"
 				>
 				<Tool
@@ -996,7 +996,7 @@
 				/>
 			</FileConfiguration>
 			<FileConfiguration
-				Name="Debug (no cuda)|Win32"
+				Name="Debug|Win32"
 				ExcludedFromBuild="true"
 				>
 				<Tool
@@ -1004,7 +1004,7 @@
 				/>
 			</FileConfiguration>
 			<FileConfiguration
-				Name="Debug (no cuda)|x64"
+				Name="Debug|x64"
 				ExcludedFromBuild="true"
 				>
 				<Tool
@@ -1012,7 +1012,7 @@
 				/>
 			</FileConfiguration>
 			<FileConfiguration
-				Name="Release (no cuda)|Win32"
+				Name="Release|Win32"
 				ExcludedFromBuild="true"
 				>
 				<Tool
@@ -1020,7 +1020,7 @@
 				/>
 			</FileConfiguration>
 			<FileConfiguration
-				Name="Release (no cuda)|x64"
+				Name="Release|x64"
 				ExcludedFromBuild="true"
 				>
 				<Tool
@@ -1121,15 +1121,15 @@
 			>
 		</File>
 		<File
-			RelativePath="..\..\..\src\nvtt\TaskDispatcher.h"
+			RelativePath="..\..\..\src\nvtt\Surface.cpp"
 			>
 		</File>
 		<File
-			RelativePath="..\..\..\src\nvtt\Surface.cpp"
+			RelativePath="..\..\..\src\nvtt\Surface.h"
 			>
 		</File>
 		<File
-			RelativePath="..\..\..\src\nvtt\Surface.h"
+			RelativePath="..\..\..\src\nvtt\TaskDispatcher.h"
 			>
 		</File>
 	</Files>
diff --git a/src/nvimage/ColorBlock.cpp b/src/nvimage/ColorBlock.cpp
index 2087e85..ddf02cb 100644
--- a/src/nvimage/ColorBlock.cpp
+++ b/src/nvimage/ColorBlock.cpp
@@ -461,15 +461,30 @@ float ColorBlock::volume() const
 }*/
 
 
+void ColorSet::allocate(uint w, uint h)
+{
+    nvDebugCheck(w <= 4 && h <= 4);
+
+    this->colorCount = w * h;
+    this->indexCount = 16;
+    this->w = 4;
+    this->h = 4;
+
+    //colors = new Vector4[colorCount];
+    //weights = new float[colorCount];
+    //indices = new int[indexCount];
+}
+
+// Allocate 4x4 block and fill with 
 void ColorSet::setColors(const float * data, uint img_w, uint img_h, uint img_x, uint img_y)
 {
     nvDebugCheck(img_x < img_w && img_y < img_h);
 
-    w = min(4U, img_w - img_x);
-    h = min(4U, img_h - img_y);
-    nvDebugCheck(w != 0 && h != 0);
+    const uint block_w = min(4U, img_w - img_x);
+    const uint block_h = min(4U, img_h - img_y);
+    nvDebugCheck(block_w != 0 && block_h != 0);
 
-    count = w * h;
+    allocate(block_w, block_h);
 
     const float * r = data + img_w * img_h * 0;
     const float * g = data + img_w * img_h * 1;
@@ -477,9 +492,9 @@ void ColorSet::setColors(const float * data, uint img_w, uint img_h, uint img_x,
     const float * a = data + img_w * img_h * 3;
 
     // Set colors.
-    for (uint y = 0, i = 0; y < h; y++)
+    for (uint y = 0, i = 0; y < block_h; y++)
     {
-        for (uint x = 0; x < w; x++, i++)
+        for (uint x = 0; x < block_w; x++, i++)
         {
             uint idx = x + img_x + (y + img_y) * img_w;
             colors[i].x = r[idx];
@@ -488,11 +503,25 @@ void ColorSet::setColors(const float * data, uint img_w, uint img_h, uint img_x,
             colors[i].w = a[idx];
         }
     }
+
+    // Set default indices.
+    for (uint y = 0, i = 0; y < 4; y++)
+    {
+        for (uint x = 0; x < 4; x++)
+        {
+            if (x < block_w && y < block_h) {
+                indices[y*4+x] = i++;
+            }
+            else {
+                indices[y*4+x] = -1;
+            }
+        }
+    }
 }
 
 void ColorSet::setAlphaWeights()
 {
-    for (uint i = 0; i < count; i++)
+    for (uint i = 0; i < colorCount; i++)
     {
         weights[i] = max(colors[i].w, 0.001f); // Avoid division by zero.
     }
@@ -500,72 +529,71 @@ void ColorSet::setAlphaWeights()
 
 void ColorSet::setUniformWeights()
 {
-    for (uint i = 0; i < count; i++)
+    for (uint i = 0; i < colorCount; i++)
     {
         weights[i] = 1.0f;
     }
 }
 
 
+// @@ Handle complex blocks (not 4x4).
 void ColorSet::createMinimalSet(bool ignoreTransparent)
 {
-    nvDebugCheck(count == w*h); // Do not call this method multiple times.
+    nvDebugCheck(colorCount <= 16);
 
     Vector4 C[16];
     float W[16];
-    memcpy(C, colors, sizeof(Vector4)*count);
-    memcpy(W, weights, sizeof(float)*count);
+    memcpy(C, colors, sizeof(Vector4)*colorCount);
+    memcpy(W, weights, sizeof(float)*colorCount);
 
     uint n = 0;
-    for (uint y = 0, i = 0; y < h; y++)
+    for (uint i = 0; i < indexCount; i++)
     {
-        for (uint x = 0; x < w; x++, i++)
-        {
-            if (ignoreTransparent && C[i].w == 0) {
-                continue;
-            }
+        if (indices[i] < 0) {
+            continue;
+        }
+
+        Vector4 ci = C[indices[i]];
+        float wi = W[indices[i]];
+
+        if (ignoreTransparent && ci.w == 0) {
+            indices[i] = -1;
+            continue;
+        }
 
-            uint idx = y * 4 + x;
-
-            // loop over previous points for a match
-            for (int j = 0; ; j++)
-            {
-                // allocate a new point
-                if (j == i)
-                {
-                    colors[n] = C[i];
-                    weights[n] = W[i];
-                    remap[idx] = n;
-                    n++;
-                    break;
-                }
-
-                // check for a match
-                bool colorMatch = (C[i].x == C[j].x) && (C[i].w == C[j].w) && (C[i].z == C[j].z);
-                //bool alphaMatch = (C[i].w == C[j].w);
-
-                if (colorMatch)
-                {
-                    // get the index of the match
-                    int index = remap[j];
-
-                    // map to this point and increase the weight
-                    weights[index] += W[i];
-                    remap[idx] = index;
-                    break;
-                }
+        // Find matching color.
+        uint j;
+        for (j = 0; j < n; j++) {
+            bool colorMatch = equal(colors[j].x, ci.x) && equal(colors[j].y, ci.y) && equal(colors[j].z, ci.z);
+            //bool alphaMatch = equal(colors[j].w, ci.w);
+
+            if (colorMatch) {
+                weights[j] += wi;
+                indices[i] = j;
+                break;
             }
         }
+
+        // No match found. Add new color.
+        if (j == n) {
+            colors[n] = ci;
+            weights[n] = wi;
+            indices[i] = n;
+            n++;
+        }
+    }
+    nvDebugCheck(n != 0);
+
+    for (uint i = n; i < colorCount; i++) {
+        weights[i] = 0;
     }
 
-    count = n;
+    colorCount = n;
 
     // Avoid empty blocks.
-    if (count == 0) {
-        count = 1;
-        //colors[0] = C[0];
-        //weights[0] = W[0];
-        memset(remap, 0, sizeof(int)*16);
+    if (colorCount == 0) {
+        colorCount = 1;
+        indices[0] = 0;
     }
 }
 
@@ -578,7 +606,7 @@ void ColorSet::wrapIndices()
         uint base = (y % h) * w;
         for (uint x = w; x < 4; x++)
         {
-            remap[y*4+3] = remap[base + (x % w)];
+            indices[y*4+3] = indices[base + (x % w)];
         }
     }
 }
@@ -588,7 +616,7 @@ bool ColorSet::isSingleColor(bool ignoreAlpha) const
     Vector4 v = colors[0];
     if (ignoreAlpha) v.w = 1.0f;
 
-    for (uint i = 1; i < count; i++)
+    for (uint i = 1; i < colorCount; i++)
     {
         Vector4 c = colors[i];
         if (ignoreAlpha) c.w = 1.0f;
@@ -615,7 +643,7 @@ static inline float component(Vector4::Arg c, uint i)
 
 void ColorSet::swizzle(uint x, uint y, uint z, uint w)
 {
-    for (uint i = 0; i < count; i++)
+    for (uint i = 0; i < colorCount; i++)
     {
         Vector4 c = colors[i];
         colors[i].x = component(c, x);
@@ -627,7 +655,7 @@ void ColorSet::swizzle(uint x, uint y, uint z, uint w)
 
 bool ColorSet::hasAlpha() const
 {
-    for (uint i = 0; i < count; i++)
+    for (uint i = 0; i < colorCount; i++)
     {
         if (colors[i].w != 0.0f) return true;
     }
diff --git a/src/nvimage/ColorBlock.h b/src/nvimage/ColorBlock.h
index ea0aaec..6541fa8 100644
--- a/src/nvimage/ColorBlock.h
+++ b/src/nvimage/ColorBlock.h
@@ -83,6 +83,11 @@ namespace nv
 
     struct ColorSet
     {
+        ColorSet() : colorCount(0), indexCount(0), w(0), h(0) {}
+        //~ColorSet() {}
+
+        void allocate(uint w, uint h);
+
         void setColors(const float * data, uint img_w, uint img_h, uint img_x, uint img_y);
 
         void setAlphaWeights();
@@ -97,19 +102,22 @@ namespace nv
         bool hasAlpha() const;
 
         // These methods require indices to be set:
-        Vector4 color(uint x, uint y) const { nvDebugCheck(x < w && y < h); return colors[remap[y * 4 + x]]; }
-        Vector4 & color(uint x, uint y) { nvDebugCheck(x < w && y < h); return colors[remap[y * 4 + x]]; }
+        Vector4 color(uint x, uint y) const { nvDebugCheck(x < w && y < h); return colors[indices[y * 4 + x]]; }
+        Vector4 & color(uint x, uint y) { nvDebugCheck(x < w && y < h); return colors[indices[y * 4 + x]]; }
 
-        Vector4 color(uint i) const { nvDebugCheck(i < 16); return colors[remap[i]]; }
-        Vector4 & color(uint i) { nvDebugCheck(i < 16); return colors[remap[i]]; }
+        Vector4 color(uint i) const { nvDebugCheck(i < indexCount); return colors[indices[i]]; }
+        Vector4 & color(uint i) { nvDebugCheck(i < indexCount); return colors[indices[i]]; }
 
+        bool isValidIndex(uint i) const { return i < indexCount && indices[i] >= 0; }
 
-        uint count;
-        uint w, h;
+        uint colorCount;
+        uint indexCount;    // Fixed to 16
+        uint w, h;          // Fixed to 4x4
 
+        // Allocate color set dynamically and add support for sets larger than 4x4.
         Vector4 colors[16];
         float weights[16];
-        int remap[16];
+        int indices[16];
     };
 
 } // nv namespace
diff --git a/src/nvmath/Box.h b/src/nvmath/Box.h
index dcbfd39..74e4bf3 100644
--- a/src/nvmath/Box.h
+++ b/src/nvmath/Box.h
@@ -43,7 +43,7 @@ namespace nv
         Vector3 extents() const;
 
         // Return extents of the box.
-        scalar extents(uint axis) const;
+        float extents(uint axis) const;
 
         // Add a point to this box.
         void addPointToBounds(const Vector3 & p);
diff --git a/src/nvmath/Box.inl b/src/nvmath/Box.inl
index 33623e9..9b69828 100644
--- a/src/nvmath/Box.inl
+++ b/src/nvmath/Box.inl
@@ -56,7 +56,7 @@ namespace nv
     }
 
     // Return extents of the box.
-    scalar Box::extents(uint axis) const
+    float Box::extents(uint axis) const
     {
         nvDebugCheck(axis < 3);
         if (axis == 0) return (maxCorner.x - minCorner.x) * 0.5f;
diff --git a/src/nvmath/Matrix.h b/src/nvmath/Matrix.h
index 1601c3d..3edb8af 100644
--- a/src/nvmath/Matrix.h
+++ b/src/nvmath/Matrix.h
@@ -19,9 +19,9 @@ namespace nv
         Matrix3(const Matrix3 & m);
         Matrix3(Vector3::Arg v0, Vector3::Arg v1, Vector3::Arg v2);
 
-        scalar get(uint row, uint col) const;
-        scalar operator()(uint row, uint col) const;
-        scalar & operator()(uint row, uint col);
+        float get(uint row, uint col) const;
+        float operator()(uint row, uint col) const;
+        float & operator()(uint row, uint col);
 
         Vector3 row(uint i) const;
         Vector3 column(uint i) const;
@@ -34,7 +34,7 @@ namespace nv
         float determinant() const;
 
     private:
-        scalar m_data[9];
+        float m_data[9];
     };
 
 
@@ -52,28 +52,28 @@ namespace nv
         explicit Matrix(identity_t);
         Matrix(const Matrix & m);
         Matrix(Vector4::Arg v0, Vector4::Arg v1, Vector4::Arg v2, Vector4::Arg v3);
-        //explicit Matrix(const scalar m[]);	// m is assumed to contain 16 elements
+        //explicit Matrix(const float m[]);	// m is assumed to contain 16 elements
 
-        scalar data(uint idx) const;
-        scalar & data(uint idx);
-        scalar get(uint row, uint col) const;
-        scalar operator()(uint row, uint col) const;
-        scalar & operator()(uint row, uint col);
-        const scalar * ptr() const;
+        float data(uint idx) const;
+        float & data(uint idx);
+        float get(uint row, uint col) const;
+        float operator()(uint row, uint col) const;
+        float & operator()(uint row, uint col);
+        const float * ptr() const;
 
         Vector4 row(uint i) const;
         Vector4 column(uint i) const;
 
-        void scale(scalar s);
+        void scale(float s);
         void scale(Vector3::Arg s);
         void translate(Vector3::Arg t);
-        void rotate(scalar theta, scalar v0, scalar v1, scalar v2);
-        scalar determinant() const;
+        void rotate(float theta, float v0, float v1, float v2);
+        float determinant() const;
 
         void apply(Matrix::Arg m);
 
     private:
-        scalar m_data[16];
+        float m_data[16];
     };
 
 } // nv namespace
diff --git a/src/nvmath/Matrix.inl b/src/nvmath/Matrix.inl
index dd8b84d..9ef3f10 100644
--- a/src/nvmath/Matrix.inl
+++ b/src/nvmath/Matrix.inl
@@ -40,17 +40,17 @@ namespace nv
         m_data[6] = v2.x; m_data[7] = v2.y; m_data[8] = v2.z;
     }
 
-    inline scalar Matrix3::get(uint row, uint col) const
+    inline float Matrix3::get(uint row, uint col) const
     {
         nvDebugCheck(row < 3 && col < 3);
         return m_data[col * 3 + row];
     }
-    inline scalar Matrix3::operator()(uint row, uint col) const
+    inline float Matrix3::operator()(uint row, uint col) const
     {
         nvDebugCheck(row < 3 && col < 3);
         return m_data[col * 3 + row];
     }
-    inline scalar & Matrix3::operator()(uint row, uint col)
+    inline float & Matrix3::operator()(uint row, uint col)
     {
         nvDebugCheck(row < 3 && col < 3);
         return m_data[col * 3 + row];
@@ -136,7 +136,7 @@ namespace nv
         Matrix3 m;
 
         for(int i = 0; i < 3; i++) {
-            const scalar ai0 = a(i,0), ai1 = a(i,1), ai2 = a(i,2);
+            const float ai0 = a(i,0), ai1 = a(i,1), ai2 = a(i,2);
             m(i, 0) = ai0 * b(0,0) + ai1 * b(1,0) + ai2 * b(2,0);
             m(i, 1) = ai0 * b(0,1) + ai1 * b(1,1) + ai2 * b(2,1);
             m(i, 2) = ai0 * b(0,2) + ai1 * b(1,2) + ai2 * b(2,2);
@@ -198,7 +198,7 @@ namespace nv
         m_data[12] = v3.x; m_data[13] = v3.y; m_data[14] = v3.z; m_data[15] = v3.w;
     }
 
-    /*inline Matrix::Matrix(const scalar m[])
+    /*inline Matrix::Matrix(const float m[])
     {
         for(int i = 0; i < 16; i++) {
             m_data[i] = m[i];
@@ -207,33 +207,33 @@ namespace nv
 
 
     // Accessors
-    inline scalar Matrix::data(uint idx) const
+    inline float Matrix::data(uint idx) const
     {
         nvDebugCheck(idx < 16);
         return m_data[idx];
     }
-    inline scalar & Matrix::data(uint idx)
+    inline float & Matrix::data(uint idx)
     {
         nvDebugCheck(idx < 16);
         return m_data[idx];
     }
-    inline scalar Matrix::get(uint row, uint col) const
+    inline float Matrix::get(uint row, uint col) const
     {
         nvDebugCheck(row < 4 && col < 4);
         return m_data[col * 4 + row];
     }
-    inline scalar Matrix::operator()(uint row, uint col) const
+    inline float Matrix::operator()(uint row, uint col) const
     {
         nvDebugCheck(row < 4 && col < 4);
         return m_data[col * 4 + row];
     }
-    inline scalar & Matrix::operator()(uint row, uint col)
+    inline float & Matrix::operator()(uint row, uint col)
     {
         nvDebugCheck(row < 4 && col < 4);
         return m_data[col * 4 + row];
     }
 
-    inline const scalar * Matrix::ptr() const
+    inline const float * Matrix::ptr() const
     {
         return m_data;
     }
@@ -251,7 +251,7 @@ namespace nv
     }
 
     // Apply scale.
-    inline void Matrix::scale(scalar s)
+    inline void Matrix::scale(float s)
     {
         m_data[0] *= s; m_data[1] *= s; m_data[2] *= s; m_data[3] *= s;
         m_data[4] *= s; m_data[5] *= s; m_data[6] *= s; m_data[7] *= s;
@@ -276,10 +276,10 @@ namespace nv
         m_data[15] = m_data[3] * t.x + m_data[7] * t.y + m_data[11] * t.z + m_data[15];
     }
 
-    Matrix rotation(scalar theta, scalar v0, scalar v1, scalar v2);
+    Matrix rotation(float theta, float v0, float v1, float v2);
 
     // Apply rotation.
-    inline void Matrix::rotate(scalar theta, scalar v0, scalar v1, scalar v2)
+    inline void Matrix::rotate(float theta, float v0, float v1, float v2)
     {
         Matrix R(rotation(theta, v0, v1, v2));
         apply(R);
@@ -291,7 +291,7 @@ namespace nv
         nvDebugCheck(this != &m);
 
         for(int i = 0; i < 4; i++) {
-            const scalar ai0 = get(i,0), ai1 = get(i,1), ai2 = get(i,2), ai3 = get(i,3);
+            const float ai0 = get(i,0), ai1 = get(i,1), ai2 = get(i,2), ai3 = get(i,3);
             m_data[0 + i] = ai0 * m(0,0) + ai1 * m(1,0) + ai2 * m(2,0) + ai3 * m(3,0);
             m_data[4 + i] = ai0 * m(0,1) + ai1 * m(1,1) + ai2 * m(2,1) + ai3 * m(3,1);
             m_data[8 + i] = ai0 * m(0,2) + ai1 * m(1,2) + ai2 * m(2,2) + ai3 * m(3,2);
@@ -310,7 +310,7 @@ namespace nv
     }
 
     // Get scale matrix.
-    inline Matrix scale(scalar s)
+    inline Matrix scale(float s)
     {
         Matrix m(identity);
         m(0,0) = m(1,1) = m(2,2) = s;
@@ -328,10 +328,10 @@ namespace nv
     }
 
     // Get rotation matrix.
-    inline Matrix rotation(scalar theta, scalar v0, scalar v1, scalar v2)
+    inline Matrix rotation(float theta, float v0, float v1, float v2)
     {
-        scalar cost = cosf(theta);
-        scalar sint = sinf(theta);
+        float cost = cosf(theta);
+        float sint = sinf(theta);
 
         Matrix m(identity);
 
@@ -348,18 +348,18 @@ namespace nv
             m(0,1) = sint; m(1,1) = cost;
         } 
         else {
-            scalar a2, b2, c2;
+            float a2, b2, c2;
             a2 = v0 * v0;
             b2 = v1 * v1;
             c2 = v2 * v2;
 
-            scalar iscale = 1.0f / sqrtf(a2 + b2 + c2);
+            float iscale = 1.0f / sqrtf(a2 + b2 + c2);
             v0 *= iscale;
             v1 *= iscale;
             v2 *= iscale;
 
-            scalar abm, acm, bcm;
-            scalar mcos, asin, bsin, csin;
+            float abm, acm, bcm;
+            float mcos, asin, bsin, csin;
             mcos = 1.0f - cost;
             abm = v0 * v1 * mcos;
             acm = v0 * v2 * mcos;
@@ -380,18 +380,18 @@ namespace nv
         return m;
     }
 
-    //Matrix rotation(scalar yaw, scalar pitch, scalar roll);
-    //Matrix skew(scalar angle, Vector3::Arg v1, Vector3::Arg v2);
+    //Matrix rotation(float yaw, float pitch, float roll);
+    //Matrix skew(float angle, Vector3::Arg v1, Vector3::Arg v2);
 
     // Get frustum matrix.
-    inline Matrix frustum(scalar xmin, scalar xmax, scalar ymin, scalar ymax, scalar zNear, scalar zFar)
+    inline Matrix frustum(float xmin, float xmax, float ymin, float ymax, float zNear, float zFar)
     {
         Matrix m(0.0f);
 
-        scalar doubleznear = 2.0f * zNear;
-        scalar one_deltax = 1.0f / (xmax - xmin);
-        scalar one_deltay = 1.0f / (ymax - ymin);
-        scalar one_deltaz = 1.0f / (zFar - zNear);
+        float doubleznear = 2.0f * zNear;
+        float one_deltax = 1.0f / (xmax - xmin);
+        float one_deltay = 1.0f / (ymax - ymin);
+        float one_deltaz = 1.0f / (zFar - zNear);
 
         m(0,0) = doubleznear * one_deltax;
         m(1,1) = doubleznear * one_deltay;
@@ -405,14 +405,14 @@ namespace nv
     }
 
     // Get infinite frustum matrix.
-    inline Matrix frustum(scalar xmin, scalar xmax, scalar ymin, scalar ymax, scalar zNear)
+    inline Matrix frustum(float xmin, float xmax, float ymin, float ymax, float zNear)
     {
         Matrix m(0.0f);
 
-        scalar doubleznear = 2.0f * zNear;
-        scalar one_deltax = 1.0f / (xmax - xmin);
-        scalar one_deltay = 1.0f / (ymax - ymin);
-        scalar nudge = 1.0; // 0.999;
+        float doubleznear = 2.0f * zNear;
+        float one_deltax = 1.0f / (xmax - xmin);
+        float one_deltay = 1.0f / (ymax - ymin);
+        float nudge = 1.0; // 0.999;
 
         m(0,0) = doubleznear * one_deltax;
         m(1,1) = doubleznear * one_deltay;
@@ -426,27 +426,27 @@ namespace nv
     }
 
     // Get perspective matrix.
-    inline Matrix perspective(scalar fovy, scalar aspect, scalar zNear, scalar zFar)
+    inline Matrix perspective(float fovy, float aspect, float zNear, float zFar)
     {
-        scalar xmax = zNear * tan(fovy / 2);
-        scalar xmin = -xmax;
+        float xmax = zNear * tan(fovy / 2);
+        float xmin = -xmax;
 
-        scalar ymax = xmax / aspect;
-        scalar ymin = -ymax;
+        float ymax = xmax / aspect;
+        float ymin = -ymax;
 
         return frustum(xmin, xmax, ymin, ymax, zNear, zFar);	
     }
 
     // Get infinite perspective matrix.
-    inline Matrix perspective(scalar fovy, scalar aspect, scalar zNear)
+    inline Matrix perspective(float fovy, float aspect, float zNear)
     {
-        scalar x = zNear * tan(fovy / 2);
-        scalar y = x / aspect;
+        float x = zNear * tan(fovy / 2);
+        float y = x / aspect;
         return frustum( -x, x, -y, y, zNear );	
     }
 
     // Get matrix determinant.
-    inline scalar Matrix::determinant() const
+    inline float Matrix::determinant() const
     {
         return 
             m_data[3] * m_data[6] * m_data[ 9] * m_data[12] - m_data[2] * m_data[7] * m_data[ 9] * m_data[12] - m_data[3] * m_data[5] * m_data[10] * m_data[12] + m_data[1] * m_data[7] * m_data[10] * m_data[12] +
@@ -563,9 +563,9 @@ void TranslationMatrix(const Vec3 & v) {
 }
 
 /** Rotate theta degrees around v. */
-void RotationMatrix( scalar theta, scalar v0, scalar v1, scalar v2 ) {
-    scalar cost = cos(theta);
-    scalar sint = sin(theta);
+void RotationMatrix( float theta, float v0, float v1, float v2 ) {
+    float cost = cos(theta);
+    float sint = sin(theta);
 
     if( 1 == v0 && 0 == v1 && 0 == v2 ) {
         data[0] = 1.0f;	data[1] = 0.0f;	data[2] = 0.0f;	data[3] = 0.0f;
@@ -587,18 +587,18 @@ void RotationMatrix( scalar theta, scalar v0, scalar v1, scalar v2 ) {
     } 
     else {
         //we need scale a,b,c to unit length.
-        scalar a2, b2, c2;
+        float a2, b2, c2;
         a2 = v0 * v0;
         b2 = v1 * v1;
         c2 = v2 * v2;
 
-        scalar iscale = 1.0f / sqrtf(a2 + b2 + c2);
+        float iscale = 1.0f / sqrtf(a2 + b2 + c2);
         v0 *= iscale;
         v1 *= iscale;
         v2 *= iscale;
 
-        scalar abm, acm, bcm;
-        scalar mcos, asin, bsin, csin;
+        float abm, acm, bcm;
+        float mcos, asin, bsin, csin;
         mcos = 1.0f - cost;
         abm = v0 * v1 * mcos;
         acm = v0 * v2 * mcos;
@@ -626,7 +626,7 @@ void RotationMatrix( scalar theta, scalar v0, scalar v1, scalar v2 ) {
 }
 
 /*
-void SkewMatrix(scalar angle, const Vec3 & v1, const Vec3 & v2) {
+void SkewMatrix(float angle, const Vec3 & v1, const Vec3 & v2) {
 v1.Normalize();
 v2.Normalize();
 
@@ -635,9 +635,9 @@ v3.Cross(v1, v2);
 v3.Normalize();
 
 // Get skew factor.
-scalar costheta = Vec3DotProduct(v1, v2);
-scalar sintheta = Real.Sqrt(1 - costheta * costheta);
-scalar skew = tan(Trig.DegreesToRadians(angle) + acos(sintheta)) * sintheta - costheta;
+float costheta = Vec3DotProduct(v1, v2);
+float sintheta = Real.Sqrt(1 - costheta * costheta);
+float skew = tan(Trig.DegreesToRadians(angle) + acos(sintheta)) * sintheta - costheta;
 
 // Build orthonormal matrix.
 v1 = FXVector3.Cross(v3, v2);
@@ -669,13 +669,13 @@ return R * S * R.Transpose;	// Not sure this is in the correct order...
 *
 * @todo Have to recompute this code for our new convention.
 **/
-void RotationMatrix( scalar yaw, scalar pitch, scalar roll ) {
-    scalar sy = sin(yaw+ToRadian(90));
-    scalar cy = cos(yaw+ToRadian(90));
-    scalar sp = sin(pitch-ToRadian(90));
-    scalar cp = cos(pitch-ToRadian(90));
-    scalar sr = sin(roll);
-    scalar cr = cos(roll);
+void RotationMatrix( float yaw, float pitch, float roll ) {
+    float sy = sin(yaw+ToRadian(90));
+    float cy = cos(yaw+ToRadian(90));
+    float sp = sin(pitch-ToRadian(90));
+    float cp = cos(pitch-ToRadian(90));
+    float sr = sin(roll);
+    float cr = cos(roll);
 
     data[0] = cr*cy + sr*sp*sy;
     data[1] = cp*sy;
@@ -699,35 +699,35 @@ void RotationMatrix( scalar yaw, scalar pitch, scalar roll ) {
 }
 
 /** Create a frustum matrix with the far plane at the infinity. */
-void Frustum( scalar xmin, scalar xmax, scalar ymin, scalar ymax, scalar zNear, scalar zFar ) {
-    scalar one_deltax, one_deltay, one_deltaz, doubleznear;
+void Frustum( float xmin, float xmax, float ymin, float ymax, float zNear, float zFar ) {
+    float one_deltax, one_deltay, one_deltaz, doubleznear;
 
     doubleznear = 2.0f * zNear;
     one_deltax = 1.0f / (xmax - xmin);
     one_deltay = 1.0f / (ymax - ymin);
     one_deltaz = 1.0f / (zFar - zNear);
 
-    data[0] = (scalar)(doubleznear * one_deltax);
+    data[0] = (float)(doubleznear * one_deltax);
     data[1] = 0.0f;
     data[2] = 0.0f;
     data[3] = 0.0f;
     data[4] = 0.0f;
-    data[5] = (scalar)(doubleznear * one_deltay);
+    data[5] = (float)(doubleznear * one_deltay);
     data[6] = 0.f;
     data[7] = 0.f;
-    data[8] = (scalar)((xmax + xmin) * one_deltax);
-    data[9] = (scalar)((ymax + ymin) * one_deltay);
-    data[10] = (scalar)(-(zFar + zNear) * one_deltaz);
+    data[8] = (float)((xmax + xmin) * one_deltax);
+    data[9] = (float)((ymax + ymin) * one_deltay);
+    data[10] = (float)(-(zFar + zNear) * one_deltaz);
     data[11] = -1.f;
     data[12] = 0.f;
     data[13] = 0.f;
-    data[14] = (scalar)(-(zFar * doubleznear) * one_deltaz);
+    data[14] = (float)(-(zFar * doubleznear) * one_deltaz);
     data[15] = 0.f;
 }
 
 /** Create a frustum matrix with the far plane at the infinity. */
-void FrustumInf( scalar xmin, scalar xmax, scalar ymin, scalar ymax, scalar zNear ) {
-    scalar one_deltax, one_deltay, doubleznear, nudge;
+void FrustumInf( float xmin, float xmax, float ymin, float ymax, float zNear ) {
+    float one_deltax, one_deltay, doubleznear, nudge;
 
     doubleznear = 2.0f * zNear;
     one_deltax = 1.0f / (xmax - xmin);
@@ -756,8 +756,8 @@ void FrustumInf( scalar xmin, scalar xmax, scalar ymin, scalar ymax, scalar zNea
 }
 
 /** Create an inverse frustum matrix with the far plane at the infinity. */
-void FrustumInfInv( scalar left, scalar right, scalar bottom, scalar top, scalar zNear ) {
-    // this matrix is wrong (not tested scalarly) I think it should be transposed.
+void FrustumInfInv( float left, float right, float bottom, float top, float zNear ) {
+    // this matrix is wrong (not tested floatly) I think it should be transposed.
     data[0] = (right - left) / (2 * zNear);
     data[1] = 0;
     data[2] = 0;
@@ -777,8 +777,8 @@ void FrustumInfInv( scalar left, scalar right, scalar bottom, scalar top, scalar
 }
 
 /** Create an homogeneous projection matrix. */
-void Perspective( scalar fov, scalar aspect, scalar zNear, scalar zFar ) {
-    scalar xmin, xmax, ymin, ymax;
+void Perspective( float fov, float aspect, float zNear, float zFar ) {
+    float xmin, xmax, ymin, ymax;
 
     xmax = zNear * tan( fov/2 );
     xmin = -xmax;
@@ -790,22 +790,22 @@ void Perspective( scalar fov, scalar aspect, scalar zNear, scalar zFar ) {
 }
 
 /** Create a projection matrix with the far plane at the infinity. */
-void PerspectiveInf( scalar fov, scalar aspect, scalar zNear ) {
-    scalar x = zNear * tan( fov/2 );
-    scalar y = x / aspect;
+void PerspectiveInf( float fov, float aspect, float zNear ) {
+    float x = zNear * tan( fov/2 );
+    float y = x / aspect;
     FrustumInf( -x, x, -y, y, zNear );
 }
 
 /** Create an inverse projection matrix with far plane at the infinity. */
-void PerspectiveInfInv( scalar fov, scalar aspect, scalar zNear ) {
-    scalar x = zNear * tan( fov/2 );
-    scalar y = x / aspect;
+void PerspectiveInfInv( float fov, float aspect, float zNear ) {
+    float x = zNear * tan( fov/2 );
+    float y = x / aspect;
     FrustumInfInv( -x, x, -y, y, zNear );
 }
 
 /** Build bone matrix from quatertion and offset. */
 void BoneMatrix(const Quat & q, const Vec3 & offset) {
-    scalar x2, y2, z2, xx, xy, xz, yy, yz, zz, wx, wy, wz;
+    float x2, y2, z2, xx, xy, xz, yy, yz, zz, wx, wy, wz;
 
     // calculate coefficients
     x2 = q.x + q.x;
@@ -844,7 +844,7 @@ void BoneMatrix(const Quat & q, const Vec3 & offset) {
 //@{
 
 /** Apply a general scale. */
-void Scale( scalar x, scalar y, scalar z ) {
+void Scale( float x, float y, float z ) {
     data[0] *= x;	data[4] *= y;	data[8]  *= z;
     data[1] *= x;	data[5] *= y;	data[9]  *= z;
     data[2] *= x;	data[6] *= y;	data[10] *= z;
@@ -852,14 +852,14 @@ void Scale( scalar x, scalar y, scalar z ) {
 }
 
 /** Apply a rotation of theta degrees around the axis v*/
-void Rotate( scalar theta, const Vec3 & v ) {
+void Rotate( float theta, const Vec3 & v ) {
     Matrix b;
     b.RotationMatrix( theta, v[0], v[1], v[2] );
     Multiply4x3( b );
 }
 
 /** Apply a rotation of theta degrees around the axis v*/
-void Rotate( scalar theta, scalar v0, scalar v1, scalar v2 ) {
+void Rotate( float theta, float v0, float v1, float v2 ) {
     Matrix b;
     b.RotationMatrix( theta, v0, v1, v2 );
     Multiply4x3( b );
@@ -881,7 +881,7 @@ void Translate( const Vec3 &t ) {
 * Translate the matrix by x, y, z. This is the same as multiplying by a 
 * translation matrix with the given offsets.
 */
-void Translate( scalar x, scalar y, scalar z ) {
+void Translate( float x, float y, float z ) {
     data[12] = data[0] * x + data[4] * y + data[8]  * z + data[12];
     data[13] = data[1] * x + data[5] * y + data[9]  * z + data[13];
     data[14] = data[2] * x + data[6] * y + data[10] * z + data[14];
@@ -922,7 +922,7 @@ void AffineInverse() {
 //@{
 
 /** Return the determinant of this matrix. */
-scalar Determinant() const {
+float Determinant() const {
     return	data[0] * data[5] * data[10] * data[15] + 
         data[1] * data[6] * data[11] * data[12] +
         data[2] * data[7] * data[ 8] * data[13] +
@@ -944,7 +944,7 @@ void Multiply4x4( const Matrix & A, const Matrix & restrict B ) {
     piDebugCheck(this != &B);
 
     for(int i = 0; i < 4; i++) {
-        const scalar ai0 = A(i,0), ai1 = A(i,1), ai2 = A(i,2), ai3 = A(i,3);
+        const float ai0 = A(i,0), ai1 = A(i,1), ai2 = A(i,2), ai3 = A(i,3);
         GetElem(i,0) = ai0 * B(0,0) + ai1 * B(1,0) + ai2 * B(2,0) + ai3 * B(3,0);
         GetElem(i,1) = ai0 * B(0,1) + ai1 * B(1,1) + ai2 * B(2,1) + ai3 * B(3,1);
         GetElem(i,2) = ai0 * B(0,2) + ai1 * B(1,2) + ai2 * B(2,2) + ai3 * B(3,2);
@@ -981,7 +981,7 @@ void Multiply4x3( const Matrix & A, const Matrix & restrict B ) {
     piDebugCheck(this != &B);
 
     for(int i = 0; i < 3; i++) {
-        const scalar ai0 = A(i,0), ai1 = A(i,1), ai2 = A(i,2), ai3 = A(i,3);
+        const float ai0 = A(i,0), ai1 = A(i,1), ai2 = A(i,2), ai3 = A(i,3);
         GetElem(i,0) = ai0 * B(0,0) + ai1 * B(1,0) + ai2 * B(2,0) + ai3 * B(3,0);
         GetElem(i,1) = ai0 * B(0,1) + ai1 * B(1,1) + ai2 * B(2,1) + ai3 * B(3,1);
         GetElem(i,2) = ai0 * B(0,2) + ai1 * B(1,2) + ai2 * B(2,2) + ai3 * B(3,2);
@@ -1038,9 +1038,9 @@ void TransformPoint(const Vec3 & restrict orig, Vec3 * restrict dest) const {
 }
 
 /** Transform a point, normalize it, and return w. */
-scalar TransformPointAndNormalize(const Vec3 & restrict orig, Vec3 * restrict dest) const {
+float TransformPointAndNormalize(const Vec3 & restrict orig, Vec3 * restrict dest) const {
     piDebugCheck(&orig != dest);
-    scalar w;
+    float w;
     dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12];
     dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13];
     dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14];
@@ -1050,7 +1050,7 @@ scalar TransformPointAndNormalize(const Vec3 & restrict orig, Vec3 * restrict de
 }
 
 /** Transform a point and return w. */
-scalar TransformPointReturnW(const Vec3 & restrict orig, Vec3 * restrict dest) const {
+float TransformPointReturnW(const Vec3 & restrict orig, Vec3 * restrict dest) const {
     piDebugCheck(&orig != dest);
     dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12];
     dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13];
@@ -1071,7 +1071,7 @@ void TransformVec4(const Vec3 & orig, Vec4 * dest) const {
 //@{
 
 /** Get the ZYZ euler angles from the matrix. Assumes the matrix is orthonormal. */
-void GetEulerAnglesZYZ(scalar * s, scalar * t, scalar * r) const {
+void GetEulerAnglesZYZ(float * s, float * t, float * r) const {
     if( GetElem(2,2) < 1.0f ) {
         if( GetElem(2,2) > -1.0f ) {
             // 	cs*ct*cr-ss*sr 		-ss*ct*cr-cs*sr		st*cr
@@ -1115,7 +1115,7 @@ void Print() const {
 
 public:
 
-    scalar data[16];
+    float data[16];
 
 };
 #endif
diff --git a/src/nvmath/Plane.h b/src/nvmath/Plane.h
index 0552801..b81fb2b 100644
--- a/src/nvmath/Plane.h
+++ b/src/nvmath/Plane.h
@@ -25,12 +25,12 @@ namespace nv
         const Plane & operator=(Plane::Arg v);
 
         Vector3 vector() const;
-        scalar offset() const;
+        float offset() const;
 
         const Vector4 & asVector() const;
         Vector4 & asVector();
 
-        void operator*=(scalar s);
+        void operator*=(float s);
 
     private:
         Vector4 p;
diff --git a/src/nvmath/Plane.inl b/src/nvmath/Plane.inl
index c8bd3ea..4bc87f1 100644
--- a/src/nvmath/Plane.inl
+++ b/src/nvmath/Plane.inl
@@ -18,7 +18,7 @@ namespace nv
     inline const Plane & Plane::operator=(Plane::Arg v) { p = v.p; return *this; }
 
     inline Vector3 Plane::vector() const { return p.xyz(); }
-    inline scalar Plane::offset() const { return p.w; }
+    inline float Plane::offset() const { return p.w; }
 
     inline const Vector4 & Plane::asVector() const { return p; }
     inline Vector4 & Plane::asVector() { return p; }
@@ -38,7 +38,7 @@ namespace nv
         return dot(plane.vector(), point) - plane.offset();
     }
 
-    inline void Plane::operator*=(scalar s)
+    inline void Plane::operator*=(float s)
     {
         scale(p, s);
     }
diff --git a/src/nvmath/Vector.h b/src/nvmath/Vector.h
index a54ccce..a0cc539 100644
--- a/src/nvmath/Vector.h
+++ b/src/nvmath/Vector.h
@@ -8,30 +8,26 @@
 
 namespace nv
 {
-
-    // I should probably use templates.
-    typedef float scalar;
-
     class NVMATH_CLASS Vector2
     {
     public:
         typedef Vector2 const & Arg;
 
         Vector2();
-        explicit Vector2(scalar f);
-        Vector2(scalar x, scalar y);
+        explicit Vector2(float f);
+        Vector2(float x, float y);
         Vector2(Vector2::Arg v);
 
         const Vector2 & operator=(Vector2::Arg v);
 
-        const scalar * ptr() const;
+        const float * ptr() const;
 
-        void set(scalar x, scalar y);
+        void set(float x, float y);
 
         Vector2 operator-() const;
         void operator+=(Vector2::Arg v);
         void operator-=(Vector2::Arg v);
-        void operator*=(scalar s);
+        void operator*=(float s);
         void operator*=(Vector2::Arg v);
 
         friend bool operator==(Vector2::Arg a, Vector2::Arg b);
@@ -39,9 +35,9 @@ namespace nv
 
         union {
             struct {
-                scalar x, y;
+                float x, y;
             };
-            scalar component[2];
+            float component[2];
         };
     };
 
@@ -55,24 +51,24 @@ namespace nv
         typedef Vector3 const & Arg;
 
         Vector3();
-        explicit Vector3(scalar x);
-        Vector3(scalar x, scalar y, scalar z);
-        Vector3(Vector2::Arg v, scalar z);
+        explicit Vector3(float x);
+        Vector3(float x, float y, float z);
+        Vector3(Vector2::Arg v, float z);
         Vector3(Vector3::Arg v);
 
         const Vector3 & operator=(Vector3::Arg v);
 
         Vector2 xy() const;
 
-        const scalar * ptr() const;
+        const float * ptr() const;
 
-        void set(scalar x, scalar y, scalar z);
+        void set(float x, float y, float z);
 
         Vector3 operator-() const;
         void operator+=(Vector3::Arg v);
         void operator-=(Vector3::Arg v);
-        void operator*=(scalar s);
-        void operator/=(scalar s);
+        void operator*=(float s);
+        void operator/=(float s);
         void operator*=(Vector3::Arg v);
 
         friend bool operator==(Vector3::Arg a, Vector3::Arg b);
@@ -80,9 +76,9 @@ namespace nv
 
         union {
             struct {
-                scalar x, y, z;
+                float x, y, z;
             };
-            scalar component[3];
+            float component[3];
         };
     };
 
@@ -96,11 +92,11 @@ namespace nv
         typedef Vector4 const & Arg;
 
         Vector4();
-        explicit Vector4(scalar x);
-        Vector4(scalar x, scalar y, scalar z, scalar w);
-        Vector4(Vector2::Arg v, scalar z, scalar w);
+        explicit Vector4(float x);
+        Vector4(float x, float y, float z, float w);
+        Vector4(Vector2::Arg v, float z, float w);
         Vector4(Vector2::Arg v, Vector2::Arg u);
-        Vector4(Vector3::Arg v, scalar w);
+        Vector4(Vector3::Arg v, float w);
         Vector4(Vector4::Arg v);
         //	Vector4(const Quaternion & v);
 
@@ -110,14 +106,14 @@ namespace nv
         Vector2 zw() const;
         Vector3 xyz() const;
 
-        const scalar * ptr() const;
+        const float * ptr() const;
 
-        void set(scalar x, scalar y, scalar z, scalar w);
+        void set(float x, float y, float z, float w);
 
         Vector4 operator-() const;
         void operator+=(Vector4::Arg v);
         void operator-=(Vector4::Arg v);
-        void operator*=(scalar s);
+        void operator*=(float s);
         void operator*=(Vector4::Arg v);
 
         friend bool operator==(Vector4::Arg a, Vector4::Arg b);
@@ -125,9 +121,9 @@ namespace nv
 
         union {
             struct {
-                scalar x, y, z, w;
+                float x, y, z, w;
             };
-            scalar component[4];
+            float component[4];
         };
     };
 
diff --git a/src/nvmath/Vector.inl b/src/nvmath/Vector.inl
index 9b0ec0a..d2d3341 100644
--- a/src/nvmath/Vector.inl
+++ b/src/nvmath/Vector.inl
@@ -22,8 +22,8 @@ namespace nv
 
     // Vector2
     inline Vector2::Vector2() {}
-    inline Vector2::Vector2(scalar f) : x(f), y(f) {}
-    inline Vector2::Vector2(scalar x, scalar y) : x(x), y(y) {}
+    inline Vector2::Vector2(float f) : x(f), y(f) {}
+    inline Vector2::Vector2(float x, float y) : x(x), y(y) {}
     inline Vector2::Vector2(Vector2::Arg v) : x(v.x), y(v.y) {}
 
     inline const Vector2 & Vector2::operator=(Vector2::Arg v)
@@ -33,12 +33,12 @@ namespace nv
         return *this;
     }
 
-    inline const scalar * Vector2::ptr() const
+    inline const float * Vector2::ptr() const
     {
         return &x;
     }
 
-    inline void Vector2::set(scalar x, scalar y)
+    inline void Vector2::set(float x, float y)
     {
         this->x = x;
         this->y = y;
@@ -61,7 +61,7 @@ namespace nv
         y -= v.y;
     }
 
-    inline void Vector2::operator*=(scalar s)
+    inline void Vector2::operator*=(float s)
     {
         x *= s;
         y *= s;
@@ -85,9 +85,9 @@ namespace nv
 
     // Vector3
     inline Vector3::Vector3() {}
-    inline Vector3::Vector3(scalar f) : x(f), y(f), z(f) {}
-    inline Vector3::Vector3(scalar x, scalar y, scalar z) : x(x), y(y), z(z) {}
-    inline Vector3::Vector3(Vector2::Arg v, scalar z) : x(v.x), y(v.y), z(z) {}
+    inline Vector3::Vector3(float f) : x(f), y(f), z(f) {}
+    inline Vector3::Vector3(float x, float y, float z) : x(x), y(y), z(z) {}
+    inline Vector3::Vector3(Vector2::Arg v, float z) : x(v.x), y(v.y), z(z) {}
     inline Vector3::Vector3(Vector3::Arg v) : x(v.x), y(v.y), z(v.z) {}
 
     inline const Vector3 & Vector3::operator=(Vector3::Arg v)
@@ -104,12 +104,12 @@ namespace nv
         return Vector2(x, y);
     }
 
-    inline const scalar * Vector3::ptr() const
+    inline const float * Vector3::ptr() const
     {
         return &x;
     }
 
-    inline void Vector3::set(scalar x, scalar y, scalar z)
+    inline void Vector3::set(float x, float y, float z)
     {
         this->x = x;
         this->y = y;
@@ -135,14 +135,14 @@ namespace nv
         z -= v.z;
     }
 
-    inline void Vector3::operator*=(scalar s)
+    inline void Vector3::operator*=(float s)
     {
         x *= s;
         y *= s;
         z *= s;
     }
 
-    inline void Vector3::operator/=(scalar s)
+    inline void Vector3::operator/=(float s)
     {
         float is = 1.0f / s;
         x *= is;
@@ -169,11 +169,11 @@ namespace nv
 
     // Vector4
     inline Vector4::Vector4() {}
-    inline Vector4::Vector4(scalar f) : x(f), y(f), z(f), w(f) {}
-    inline Vector4::Vector4(scalar x, scalar y, scalar z, scalar w) : x(x), y(y), z(z), w(w) {}
-    inline Vector4::Vector4(Vector2::Arg v, scalar z, scalar w) : x(v.x), y(v.y), z(z), w(w) {}
+    inline Vector4::Vector4(float f) : x(f), y(f), z(f), w(f) {}
+    inline Vector4::Vector4(float x, float y, float z, float w) : x(x), y(y), z(z), w(w) {}
+    inline Vector4::Vector4(Vector2::Arg v, float z, float w) : x(v.x), y(v.y), z(z), w(w) {}
     inline Vector4::Vector4(Vector2::Arg v, Vector2::Arg u) : x(v.x), y(v.y), z(u.x), w(u.y) {}
-    inline Vector4::Vector4(Vector3::Arg v, scalar w) : x(v.x), y(v.y), z(v.z), w(w) {}
+    inline Vector4::Vector4(Vector3::Arg v, float w) : x(v.x), y(v.y), z(v.z), w(w) {}
     inline Vector4::Vector4(Vector4::Arg v) : x(v.x), y(v.y), z(v.z), w(v.w) {}
 
     inline const Vector4 & Vector4::operator=(const Vector4 & v)
@@ -200,12 +200,12 @@ namespace nv
         return Vector3(x, y, z);
     }
 
-    inline const scalar * Vector4::ptr() const
+    inline const float * Vector4::ptr() const
     {
         return &x;
     }
 
-    inline void Vector4::set(scalar x, scalar y, scalar z, scalar w)
+    inline void Vector4::set(float x, float y, float z, float w)
     {
         this->x = x;
         this->y = y;
@@ -234,7 +234,7 @@ namespace nv
         w -= v.w;
     }
 
-    inline void Vector4::operator*=(scalar s)
+    inline void Vector4::operator*=(float s)
     {
         x *= s;
         y *= s;
@@ -284,7 +284,7 @@ namespace nv
         return sub(a, b);
     }
 
-    inline Vector2 scale(Vector2::Arg v, scalar s)
+    inline Vector2 scale(Vector2::Arg v, float s)
     {
         return Vector2(v.x * s, v.y * s);
     }
@@ -294,7 +294,7 @@ namespace nv
         return Vector2(v.x * s.x, v.y * s.y);
     }
 
-    inline Vector2 operator*(Vector2::Arg v, scalar s)
+    inline Vector2 operator*(Vector2::Arg v, float s)
     {
         return scale(v, s);
     }
@@ -304,32 +304,32 @@ namespace nv
         return Vector2(v1.x*v2.x, v1.y*v2.y);
     }
 
-    inline Vector2 operator*(scalar s, Vector2::Arg v)
+    inline Vector2 operator*(float s, Vector2::Arg v)
     {
         return scale(v, s);
     }
 
-    inline Vector2 operator/(Vector2::Arg v, scalar s)
+    inline Vector2 operator/(Vector2::Arg v, float s)
     {
         return scale(v, 1.0f/s);
     }
 
-    inline scalar dot(Vector2::Arg a, Vector2::Arg b)
+    inline float dot(Vector2::Arg a, Vector2::Arg b)
     {
         return a.x * b.x + a.y * b.y;
     }
 
-    inline scalar lengthSquared(Vector2::Arg v)
+    inline float lengthSquared(Vector2::Arg v)
     {
         return v.x * v.x + v.y * v.y;
     }
 
-    inline scalar length(Vector2::Arg v)
+    inline float length(Vector2::Arg v)
     {
         return sqrtf(lengthSquared(v));
     }
 
-    inline scalar inverseLength(Vector2::Arg v)
+    inline float inverseLength(Vector2::Arg v)
     {
         return 1.0f / sqrtf(lengthSquared(v));
     }
@@ -444,7 +444,7 @@ namespace nv
         return Vector3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x);
     }
 
-    inline Vector3 scale(Vector3::Arg v, scalar s)
+    inline Vector3 scale(Vector3::Arg v, float s)
     {
         return Vector3(v.x * s, v.y * s, v.z * s);
     }
@@ -454,12 +454,12 @@ namespace nv
         return Vector3(v.x * s.x, v.y * s.y, v.z * s.z);
     }
 
-    inline Vector3 operator*(Vector3::Arg v, scalar s)
+    inline Vector3 operator*(Vector3::Arg v, float s)
     {
         return scale(v, s);
     }
 
-    inline Vector3 operator*(scalar s, Vector3::Arg v)
+    inline Vector3 operator*(float s, Vector3::Arg v)
     {
         return scale(v, s);
     }
@@ -469,38 +469,38 @@ namespace nv
         return scale(v, s);
     }
 
-    inline Vector3 operator/(Vector3::Arg v, scalar s)
+    inline Vector3 operator/(Vector3::Arg v, float s)
     {
         return scale(v, 1.0f/s);
     }
 
-    /*inline Vector3 add_scaled(Vector3::Arg a, Vector3::Arg b, scalar s)
+    /*inline Vector3 add_scaled(Vector3::Arg a, Vector3::Arg b, float s)
     {
         return Vector3(a.x + b.x * s, a.y + b.y * s, a.z + b.z * s);
     }*/
 
-    inline Vector3 lerp(Vector3::Arg v1, Vector3::Arg v2, scalar t)
+    inline Vector3 lerp(Vector3::Arg v1, Vector3::Arg v2, float t)
     {
-        const scalar s = 1.0f - t;
+        const float s = 1.0f - t;
         return Vector3(v1.x * s + t * v2.x, v1.y * s + t * v2.y, v1.z * s + t * v2.z);
     }
 
-    inline scalar dot(Vector3::Arg a, Vector3::Arg b)
+    inline float dot(Vector3::Arg a, Vector3::Arg b)
     {
         return a.x * b.x + a.y * b.y + a.z * b.z;
     }
 
-    inline scalar lengthSquared(Vector3::Arg v)
+    inline float lengthSquared(Vector3::Arg v)
     {
         return v.x * v.x + v.y * v.y + v.z * v.z;
     }
 
-    inline scalar length(Vector3::Arg v)
+    inline float length(Vector3::Arg v)
     {
         return sqrtf(lengthSquared(v));
     }
 
-    inline scalar inverseLength(Vector3::Arg v)
+    inline float inverseLength(Vector3::Arg v)
     {
         return 1.0f / sqrtf(lengthSquared(v));
     }
@@ -602,7 +602,7 @@ namespace nv
         return sub(a, b);
     }
 
-    inline Vector4 scale(Vector4::Arg v, scalar s)
+    inline Vector4 scale(Vector4::Arg v, float s)
     {
         return Vector4(v.x * s, v.y * s, v.z * s, v.w * s);
     }
@@ -612,42 +612,42 @@ namespace nv
         return Vector4(v.x * s.x, v.y * s.y, v.z * s.z, v.w * s.w);
     }
 
-    inline Vector4 operator*(Vector4::Arg v, scalar s)
+    inline Vector4 operator*(Vector4::Arg v, float s)
     {
         return scale(v, s);
     }
 
-    inline Vector4 operator*(scalar s, Vector4::Arg v)
+    inline Vector4 operator*(float s, Vector4::Arg v)
     {
         return scale(v, s);
     }
 
-    inline Vector4 operator/(Vector4::Arg v, scalar s)
+    inline Vector4 operator/(Vector4::Arg v, float s)
     {
         return scale(v, 1.0f/s);
     }
 
-    inline Vector4 add_scaled(Vector4::Arg a, Vector4::Arg b, scalar s)
+    inline Vector4 add_scaled(Vector4::Arg a, Vector4::Arg b, float s)
     {
         return Vector4(a.x + b.x * s, a.y + b.y * s, a.z + b.z * s, a.w + b.w * s);
     }
 
-    inline scalar dot(Vector4::Arg a, Vector4::Arg b)
+    inline float dot(Vector4::Arg a, Vector4::Arg b)
     {
         return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
     }
 
-    inline scalar lengthSquared(Vector4::Arg v)
+    inline float lengthSquared(Vector4::Arg v)
     {
         return v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w;
     }
 
-    inline scalar length(Vector4::Arg v)
+    inline float length(Vector4::Arg v)
     {
         return sqrtf(lengthSquared(v));
     }
 
-    inline scalar inverseLength(Vector4::Arg v)
+    inline float inverseLength(Vector4::Arg v)
     {
         return 1.0f / sqrtf(lengthSquared(v));
     }
diff --git a/src/nvtt/ClusterFit.cpp b/src/nvtt/ClusterFit.cpp
index c191c35..6717a84 100644
--- a/src/nvtt/ClusterFit.cpp
+++ b/src/nvtt/ClusterFit.cpp
@@ -49,7 +49,7 @@ void ClusterFit::setColourSet(const ColorSet * set)
 #endif
 
     // cache some values
-    m_count = set->count;
+    m_count = set->colorCount;
 
     Vector3 values[16];
     for (uint i = 0; i < m_count; i++)
@@ -148,7 +148,7 @@ bool ClusterFit::compress3( Vector3 * start, Vector3 * end )
     SimdVector besterror = SimdVector( FLT_MAX );
 
     SimdVector x0 = zero;
-	
+
     int b0 = 0, b1 = 0;
 
     // check all possible clusters for this total order
@@ -191,22 +191,22 @@ bool ClusterFit::compress3( Vector3 * start, Vector3 * end )
             SimdVector e3 = negativeMultiplySubtract( b, betax_sum, e2 );
             SimdVector e4 = multiplyAdd( two, e3, e1 );
 
-	    // apply the metric to the error term
-	    SimdVector e5 = e4 * m_metricSqr;
-	    SimdVector error = e5.splatX() + e5.splatY() + e5.splatZ();
+            // apply the metric to the error term
+            SimdVector e5 = e4 * m_metricSqr;
+            SimdVector error = e5.splatX() + e5.splatY() + e5.splatZ();
 
-	    // keep the solution if it wins
-	    if( compareAnyLessThan( error, besterror ) )
-	    {
-		besterror = error;
-		beststart = a;
-		bestend = b;
-		b0 = c0;
-		b1 = c1;
-	    }
+            // keep the solution if it wins
+            if( compareAnyLessThan( error, besterror ) )
+            {
+                besterror = error;
+                beststart = a;
+                bestend = b;
+                b0 = c0;
+                b1 = c1;
+            }
 
-	    x1 += m_weighted[c0+c1];
-	}
+            x1 += m_weighted[c0+c1];
+        }
 
         x0 += m_weighted[c0];
     }
@@ -218,8 +218,8 @@ bool ClusterFit::compress3( Vector3 * start, Vector3 * end )
         *start = beststart.toVector3();
         *end = bestend.toVector3();
 
-	// save the error
-	m_besterror = besterror;
+        // save the error
+        m_besterror = besterror;
 
         return true;
     }
@@ -308,10 +308,10 @@ bool ClusterFit::compress4( Vector3 * start, Vector3 * end )
                 }
 
                 x2 += m_weighted[c0+c1+c2];
-	    }
+            }
 
-	    x1 += m_weighted[c0+c1];
-	}
+            x1 += m_weighted[c0+c1];
+        }
 
         x0 += m_weighted[c0];
     }
@@ -321,9 +321,9 @@ bool ClusterFit::compress4( Vector3 * start, Vector3 * end )
     {
         *start = beststart.toVector3();
         *end = bestend.toVector3();
-		
-	// save the error
-	m_besterror = besterror;
+
+        // save the error
+        m_besterror = besterror;
 
         return true;
     }
@@ -404,12 +404,12 @@ bool ClusterFit::compress3(Vector3 * start, Vector3 * end)
     // save the block if necessary
     if( besterror < m_besterror )
     {
-		
+
         *start = beststart;
         *end = bestend;
 
-	// save the error
-	m_besterror = besterror;
+        // save the error
+        m_besterror = besterror;
 
         return true;
     }
@@ -420,8 +420,8 @@ bool ClusterFit::compress3(Vector3 * start, Vector3 * end)
 bool ClusterFit::compress4(Vector3 * start, Vector3 * end)
 {
     const uint count = m_count;
-    Vector3 const grid( 31.0f, 63.0f, 31.0f );
-    Vector3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
+    const Vector3 grid( 31.0f, 63.0f, 31.0f );
+    const Vector3 gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
 
     // declare variables
     Vector3 beststart( 0.0f );
diff --git a/src/nvtt/QuickCompressDXT.cpp b/src/nvtt/QuickCompressDXT.cpp
index b6e788a..214f66d 100644
--- a/src/nvtt/QuickCompressDXT.cpp
+++ b/src/nvtt/QuickCompressDXT.cpp
@@ -179,8 +179,13 @@ inline static uint computeIndices4(const ColorSet & set, Vector3::Arg maxColor,
 	palette[3] = lerp(palette[0], palette[1], 2.0f / 3.0f);
 	
 	uint indices = 0;
-	for(int i = 0; i < 16; i++)
+    for(int i = 0; i < 16; i++)
 	{
+        if (!set.isValidIndex(i)) {
+            // Skip masked pixels and out of bounds.
+            continue;
+        }
+
         Vector3 color = set.color(i).xyz();
 
 		float d0 = colorDistance(palette[0], color);
@@ -237,16 +242,20 @@ inline static uint computeIndices3(const ColorSet & set, Vector3::Arg maxColor,
 	uint indices = 0;
 	for(int i = 0; i < 16; i++)
 	{
+        if (!set.isValidIndex(i)) {
+            // Skip masked pixels and out of bounds.
+            indices |= 3 << (2 * i);
+            continue;
+        }
+
         Vector3 color = set.color(i).xyz();
-		float alpha = set.color(i).w;
 		
 		float d0 = colorDistance(palette[0], color);
 		float d1 = colorDistance(palette[1], color);
 		float d2 = colorDistance(palette[2], color);
 		
 		uint index;
-		if (alpha == 0) index = 3;
-		else if (d0 < d1 && d0 < d2) index = 0;
+		if (d0 < d1 && d0 < d2) index = 0;
 		else if (d1 < d2) index = 1;
 		else index = 2;
 		
diff --git a/src/nvtt/tools/imgdiff.cpp b/src/nvtt/tools/imgdiff.cpp
index bc00fb5..3e8ea1e 100644
--- a/src/nvtt/tools/imgdiff.cpp
+++ b/src/nvtt/tools/imgdiff.cpp
@@ -72,11 +72,11 @@ struct Error
 		mse = 0.0f;
 	}
 
-	void addSample(float e)
+	void addSample(double e)
 	{
 		samples++;
-		mabse += fabsf(e);
-		maxabse = nv::max(maxabse, fabsf(e));
+		mabse += fabs(e);
+		maxabse = nv::max(maxabse, fabs(e));
 		mse += e * e;
 	}
 
@@ -84,8 +84,8 @@ struct Error
 	{
 		mabse /= samples;
 		mse /= samples;
-		rmse = sqrtf(mse);
-		psnr = (rmse == 0) ? 999.0f : 20.0f * log10(255.0f / rmse);
+		rmse = sqrt(mse);
+		psnr = (rmse == 0) ? 999.0 : 20.0 * log10(255.0 / rmse);
 	}
 
 	void print()
@@ -97,11 +97,11 @@ struct Error
 	}
 
 	int samples;
-	float mabse;
-	float maxabse;
-	float mse;
-	float rmse;
-	float psnr;
+	double mabse;
+	double maxabse;
+	double mse;
+	double rmse;
+	double psnr;
 };
 
 struct NormalError
@@ -230,10 +230,10 @@ int main(int argc, char *argv[])
 			const nv::Color32 c0(image0.pixel(e, i));
 			const nv::Color32 c1(image1.pixel(e, i));
 
-			float r = float(c0.r - c1.r);
-			float g = float(c0.g - c1.g);
-			float b = float(c0.b - c1.b);
-			float a = float(c0.a - c1.a);
+			double r = float(c0.r - c1.r);
+			double g = float(c0.g - c1.g);
+			double b = float(c0.b - c1.b);
+			double a = float(c0.a - c1.a);
 
 			error_r.addSample(r);
 			error_g.addSample(g);
@@ -247,9 +247,9 @@ int main(int argc, char *argv[])
 
 			if (compareAlpha)
 			{
-				error_total.addSample(r * c0.a / 255.0f);
-				error_total.addSample(g * c0.a / 255.0f);
-				error_total.addSample(b * c0.a / 255.0f);
+				error_total.addSample(r * c0.a / 255.0);
+				error_total.addSample(g * c0.a / 255.0);
+				error_total.addSample(b * c0.a / 255.0);
 			}
 			else
 			{