From d11d7a5f386461409b0b92620d60611938a14802 Mon Sep 17 00:00:00 2001
From: castano <castano@95f4ed2b-212e-0410-8b90-d31948207fce>
Date: Tue, 11 Oct 2011 06:40:40 +0000
Subject: [PATCH] seamless cubemap filtering.

---
 src/nvimage/DirectDrawSurface.cpp |  56 ++--
 src/nvimage/DirectDrawSurface.h   |  45 ++-
 src/nvmath/CMakeLists.txt         |  13 +-
 src/nvmath/Color.inl              |  20 +-
 src/nvmath/SphericalHarmonic.cpp  | 243 ++++++++++++++
 src/nvmath/SphericalHarmonic.h    | 418 ++++++++++++++++++++++++
 src/nvmath/nvmath.h               |   4 +-
 src/nvthread/ParallelFor.cpp      |   2 +-
 src/nvtt/CubeSurface.cpp          | 512 +++++++++++++-----------------
 src/nvtt/CubeSurface.h            |  40 +--
 src/nvtt/nvtt.h                   |   5 +-
 src/nvtt/tests/cubemaptest.cpp    |   2 +-
 12 files changed, 978 insertions(+), 382 deletions(-)
 create mode 100644 src/nvmath/SphericalHarmonic.cpp
 create mode 100644 src/nvmath/SphericalHarmonic.h

diff --git a/src/nvimage/DirectDrawSurface.cpp b/src/nvimage/DirectDrawSurface.cpp
index c9faa76..0f3e29d 100644
--- a/src/nvimage/DirectDrawSurface.cpp
+++ b/src/nvimage/DirectDrawSurface.cpp
@@ -36,32 +36,9 @@
 
 using namespace nv;
 
-
-const uint nv::FOURCC_NVTT = MAKEFOURCC('N', 'V', 'T', 'T');
-
-const uint nv::FOURCC_DDS = MAKEFOURCC('D', 'D', 'S', ' ');
-const uint nv::FOURCC_DXT1 = MAKEFOURCC('D', 'X', 'T', '1');
-const uint nv::FOURCC_DXT2 = MAKEFOURCC('D', 'X', 'T', '2');
-const uint nv::FOURCC_DXT3 = MAKEFOURCC('D', 'X', 'T', '3');
-const uint nv::FOURCC_DXT4 = MAKEFOURCC('D', 'X', 'T', '4');
-const uint nv::FOURCC_DXT5 = MAKEFOURCC('D', 'X', 'T', '5');
-const uint nv::FOURCC_RXGB = MAKEFOURCC('R', 'X', 'G', 'B');
-const uint nv::FOURCC_ATI1 = MAKEFOURCC('A', 'T', 'I', '1');
-const uint nv::FOURCC_ATI2 = MAKEFOURCC('A', 'T', 'I', '2');
-
-
-
 namespace
 {
 
-    static const uint FOURCC_A2XY = MAKEFOURCC('A', '2', 'X', 'Y');
-
-    static const uint FOURCC_DX10 = MAKEFOURCC('D', 'X', '1', '0');
-
-    static const uint FOURCC_UVER = MAKEFOURCC('U', 'V', 'E', 'R');
-
-
-
     static const uint DDSD_CAPS = 0x00000001U;
     static const uint DDSD_PIXELFORMAT = 0x00001000U;
     static const uint DDSD_WIDTH = 0x00000004U;
@@ -210,16 +187,16 @@ namespace
 #undef CASE
     }
 
-    const char * getD3d10ResourceDimensionString(D3D10_RESOURCE_DIMENSION resourceDimension)
+    const char * getD3d10ResourceDimensionString(DDS_DIMENSION resourceDimension)
     {
         switch(resourceDimension)
         {
             default:
-            case D3D10_RESOURCE_DIMENSION_UNKNOWN: return "UNKNOWN";
-            case D3D10_RESOURCE_DIMENSION_BUFFER: return "BUFFER";
-            case D3D10_RESOURCE_DIMENSION_TEXTURE1D: return "TEXTURE1D";
-            case D3D10_RESOURCE_DIMENSION_TEXTURE2D: return "TEXTURE2D";
-            case D3D10_RESOURCE_DIMENSION_TEXTURE3D: return "TEXTURE3D";
+            case DDS_DIMENSION_UNKNOWN: return "UNKNOWN";
+            case DDS_DIMENSION_BUFFER: return "BUFFER";
+            case DDS_DIMENSION_TEXTURE1D: return "TEXTURE1D";
+            case DDS_DIMENSION_TEXTURE2D: return "TEXTURE2D";
+            case DDS_DIMENSION_TEXTURE3D: return "TEXTURE3D";
         }
     }
 
@@ -531,7 +508,7 @@ DDSHeader::DDSHeader()
     this->notused = 0;
 
     this->header10.dxgiFormat = DXGI_FORMAT_UNKNOWN;
-    this->header10.resourceDimension = D3D10_RESOURCE_DIMENSION_UNKNOWN;
+    this->header10.resourceDimension = DDS_DIMENSION_UNKNOWN;
     this->header10.miscFlag = 0;
     this->header10.arraySize = 0;
     this->header10.reserved = 0;
@@ -580,7 +557,8 @@ void DDSHeader::setMipmapCount(uint count)
 
 void DDSHeader::setTexture2D()
 {
-    this->header10.resourceDimension = D3D10_RESOURCE_DIMENSION_TEXTURE2D;
+    this->header10.resourceDimension = DDS_DIMENSION_TEXTURE2D;
+    this->header10.miscFlag = 0;
     this->header10.arraySize = 1;
 }
 
@@ -588,7 +566,8 @@ void DDSHeader::setTexture3D()
 {
     this->caps.caps2 = DDSCAPS2_VOLUME;
 
-    this->header10.resourceDimension = D3D10_RESOURCE_DIMENSION_TEXTURE3D;
+    this->header10.resourceDimension = DDS_DIMENSION_TEXTURE3D;
+    this->header10.miscFlag = 0;
     this->header10.arraySize = 1;
 }
 
@@ -597,8 +576,9 @@ void DDSHeader::setTextureCube()
     this->caps.caps1 |= DDSCAPS_COMPLEX;
     this->caps.caps2 = DDSCAPS2_CUBEMAP | DDSCAPS2_CUBEMAP_ALL_FACES;
 
-    this->header10.resourceDimension = D3D10_RESOURCE_DIMENSION_TEXTURE2D;
-    this->header10.arraySize = 6;
+    this->header10.resourceDimension = DDS_DIMENSION_TEXTURE2D;
+    this->header10.miscFlag = DDS_MISC_TEXTURECUBE;
+    this->header10.arraySize = 1;
 }
 
 void DDSHeader::setLinearSize(uint size)
@@ -1084,7 +1064,7 @@ bool DirectDrawSurface::isTexture1D() const
     nvDebugCheck(isValid());
     if (header.hasDX10Header())
     {
-        return header.header10.resourceDimension == D3D10_RESOURCE_DIMENSION_TEXTURE1D;
+        return header.header10.resourceDimension == DDS_DIMENSION_TEXTURE1D;
     }
     return false;
 }
@@ -1094,7 +1074,7 @@ bool DirectDrawSurface::isTexture2D() const
     nvDebugCheck(isValid());
     if (header.hasDX10Header())
     {
-        return header.header10.resourceDimension == D3D10_RESOURCE_DIMENSION_TEXTURE2D;
+        return header.header10.resourceDimension == DDS_DIMENSION_TEXTURE2D;
     }
     else
     {
@@ -1107,7 +1087,7 @@ bool DirectDrawSurface::isTexture3D() const
     nvDebugCheck(isValid());
     if (header.hasDX10Header())
     {
-        return header.header10.resourceDimension == D3D10_RESOURCE_DIMENSION_TEXTURE3D;
+        return header.header10.resourceDimension == DDS_DIMENSION_TEXTURE3D;
     }
     else
     {
@@ -1597,7 +1577,7 @@ void DirectDrawSurface::printInfo() const
     {
         printf("DX10 Header:\n");
         printf("\tDXGI Format: %u (%s)\n", header.header10.dxgiFormat, getDxgiFormatString((DXGI_FORMAT)header.header10.dxgiFormat));
-        printf("\tResource dimension: %u (%s)\n", header.header10.resourceDimension, getD3d10ResourceDimensionString((D3D10_RESOURCE_DIMENSION)header.header10.resourceDimension));
+        printf("\tResource dimension: %u (%s)\n", header.header10.resourceDimension, getD3d10ResourceDimensionString((DDS_DIMENSION)header.header10.resourceDimension));
         printf("\tMisc flag: %u\n", header.header10.miscFlag);
         printf("\tArray size: %u\n", header.header10.arraySize);
     }
diff --git a/src/nvimage/DirectDrawSurface.h b/src/nvimage/DirectDrawSurface.h
index 5a8c62b..07d135c 100644
--- a/src/nvimage/DirectDrawSurface.h
+++ b/src/nvimage/DirectDrawSurface.h
@@ -39,17 +39,6 @@ namespace nv
     class Stream;
     struct ColorBlock;
 
-    extern const uint FOURCC_NVTT;
-    extern const uint FOURCC_DDS;
-    extern const uint FOURCC_DXT1;
-    extern const uint FOURCC_DXT2;
-    extern const uint FOURCC_DXT3;
-    extern const uint FOURCC_DXT4;
-    extern const uint FOURCC_DXT5;
-    extern const uint FOURCC_RXGB;
-    extern const uint FOURCC_ATI1;
-    extern const uint FOURCC_ATI2;
-
     enum DDPF
     {
         DDPF_ALPHAPIXELS = 0x00000001U,
@@ -110,15 +99,37 @@ namespace nv
         D3DFMT_A32B32G32R32F = 116,
     };
 
+    enum FOURCC
+    {
+        FOURCC_NVTT = MAKEFOURCC('N', 'V', 'T', 'T'),
+        FOURCC_DDS = MAKEFOURCC('D', 'D', 'S', ' '),
+        FOURCC_DXT1 = MAKEFOURCC('D', 'X', 'T', '1'),
+        FOURCC_DXT2 = MAKEFOURCC('D', 'X', 'T', '2'),
+        FOURCC_DXT3 = MAKEFOURCC('D', 'X', 'T', '3'),
+        FOURCC_DXT4 = MAKEFOURCC('D', 'X', 'T', '4'),
+        FOURCC_DXT5 = MAKEFOURCC('D', 'X', 'T', '5'),
+        FOURCC_RXGB = MAKEFOURCC('R', 'X', 'G', 'B'),
+        FOURCC_ATI1 = MAKEFOURCC('A', 'T', 'I', '1'),
+        FOURCC_ATI2 = MAKEFOURCC('A', 'T', 'I', '2'),
+        FOURCC_A2XY = MAKEFOURCC('A', '2', 'X', 'Y'),
+        FOURCC_DX10 = MAKEFOURCC('D', 'X', '1', '0'),
+        FOURCC_UVER = MAKEFOURCC('U', 'V', 'E', 'R'),
+    };
+
 
     // D3D1x resource dimensions.
-    enum D3D10_RESOURCE_DIMENSION
+    enum DDS_DIMENSION // D3D10_RESOURCE_DIMENSION
+    {
+        DDS_DIMENSION_UNKNOWN = 0,
+        DDS_DIMENSION_BUFFER = 1,
+        DDS_DIMENSION_TEXTURE1D = 2,
+        DDS_DIMENSION_TEXTURE2D = 3,
+        DDS_DIMENSION_TEXTURE3D = 4,
+    };
+
+    enum DDS_MISC_FLAG
     {
-        D3D10_RESOURCE_DIMENSION_UNKNOWN = 0,
-        D3D10_RESOURCE_DIMENSION_BUFFER = 1,
-        D3D10_RESOURCE_DIMENSION_TEXTURE1D = 2,
-        D3D10_RESOURCE_DIMENSION_TEXTURE2D = 3,
-        D3D10_RESOURCE_DIMENSION_TEXTURE3D = 4,
+        DDS_MISC_TEXTURECUBE = 0x4,
     };
 
     // DXGI formats.
diff --git a/src/nvmath/CMakeLists.txt b/src/nvmath/CMakeLists.txt
index 53c6b13..2460661 100644
--- a/src/nvmath/CMakeLists.txt
+++ b/src/nvmath/CMakeLists.txt
@@ -2,13 +2,14 @@ PROJECT(nvmath)
 
 SET(MATH_SRCS
     nvmath.h
-    Vector.h
-    Matrix.h
-    Plane.h Plane.cpp
-    Box.h 
-    Color.h
+    Box.h Box.inl
+    Color.h Color.inl
+    Fitting.h Fitting.cpp
     Half.h Half.cpp
-    Fitting.h Fitting.cpp)
+    Matrix.h
+    Plane.h Plane.inl Plane.cpp
+    SphericalHarmonic.h SphericalHarmonic.cpp
+    Vector.h Vector.inl)
 
 INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
 
diff --git a/src/nvmath/Color.inl b/src/nvmath/Color.inl
index 6da2f3a..bdbe03d 100644
--- a/src/nvmath/Color.inl
+++ b/src/nvmath/Color.inl
@@ -11,13 +11,13 @@
 namespace nv
 {
 
-    /// Clamp color components.
+    // Clamp color components.
     inline Vector3 colorClamp(Vector3::Arg c)
     {
         return Vector3(clamp(c.x, 0.0f, 1.0f), clamp(c.y, 0.0f, 1.0f), clamp(c.z, 0.0f, 1.0f));
     }
 
-    /// Clamp without allowing the hue to change.
+    // Clamp without allowing the hue to change.
     inline Vector3 colorNormalize(Vector3::Arg c)
     {
         float scale = 1.0f;
@@ -27,15 +27,15 @@ namespace nv
         return c / scale;
     }
 
-    /// Convert Color32 to Color16.
+    // Convert Color32 to Color16.
     inline Color16 toColor16(Color32 c)
     {
         Color16 color;
         //         rrrrrggggggbbbbb
         // rrrrr000gggggg00bbbbb000
-        //	color.u = (c.u >> 3) & 0x1F;
-        //	color.u |= (c.u >> 5) & 0x7E0;
-        //	color.u |= (c.u >> 8) & 0xF800;
+        // color.u = (c.u >> 3) & 0x1F;
+        // color.u |= (c.u >> 5) & 0x7E0;
+        // color.u |= (c.u >> 8) & 0xF800;
 
         color.r = c.r >> 3;
         color.g = c.g >> 2;
@@ -44,13 +44,13 @@ namespace nv
     }
 
 
-    /// Promote 16 bit color to 32 bit using regular bit expansion.
+    // Promote 16 bit color to 32 bit using regular bit expansion.
     inline Color32 toColor32(Color16 c)
     {
         Color32 color;
-        //	c.u = ((col0.u << 3) & 0xf8) | ((col0.u << 5) & 0xfc00) | ((col0.u << 8) & 0xf80000);
-        //	c.u |= (c.u >> 5) & 0x070007;
-        //	c.u |= (c.u >> 6) & 0x000300;
+        // c.u = ((col0.u << 3) & 0xf8) | ((col0.u << 5) & 0xfc00) | ((col0.u << 8) & 0xf80000);
+        // c.u |= (c.u >> 5) & 0x070007;
+        // c.u |= (c.u >> 6) & 0x000300;
 
         color.b = (c.b << 3) | (c.b >> 2);
         color.g = (c.g << 2) | (c.g >> 4);
diff --git a/src/nvmath/SphericalHarmonic.cpp b/src/nvmath/SphericalHarmonic.cpp
new file mode 100644
index 0000000..25832e7
--- /dev/null
+++ b/src/nvmath/SphericalHarmonic.cpp
@@ -0,0 +1,243 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#include <nvmath/SphericalHarmonic.h>
+
+using namespace nv;
+
+
+namespace
+{
+	
+	// Basic integer factorial.
+	inline static int factorial( int v )
+	{
+		const static int fac_table[] = { 1, 1, 2, 6, 24, 120, 720, 5040, 40320, 362880, 3628800, 39916800 };
+
+		if(v <= 11){
+			return fac_table[v];
+		}
+	
+		int result = v;
+		while (--v > 0) {
+			result *= v;
+		}
+		return result;
+	}
+	
+	
+	// Double factorial. 
+	// Defined as: n!! = n*(n - 2)*(n - 4)..., n!!(0,-1) = 1.
+	inline static int doubleFactorial( int x )
+	{
+		if (x == 0 || x == -1) {
+			return 1;
+		}
+	
+		int result = x;
+		while ((x -= 2) > 0) {
+			result *= x;
+		}
+	
+		return result;
+	}	
+	
+	/// Normalization constant for spherical harmonic.
+	/// @param l is the band.
+	/// @param m is the argument, in the range [0, m]
+	inline static float K( int l, int m )
+	{
+		nvDebugCheck( m >= 0 );
+		return sqrtf(((2 * l + 1) * factorial(l - m)) / (4 * PI * factorial(l + m)));
+	}
+	
+	/// Normalization constant for hemispherical harmonic.
+	inline static float HK( int l, int m )
+	{
+		nvDebugCheck( m >= 0 );
+		return sqrtf(((2 * l + 1) * factorial(l - m)) / (2 * PI * factorial(l + m)));
+	}
+
+	/// Evaluate Legendre polynomial. */
+	static float legendre( int l, int m, float x )
+	{
+	//	piDebugCheck( m >= 0 );
+	//	piDebugCheck( m <= l );
+	//	piDebugCheck( fabs(x) <= 1 );
+	
+		// Rule 2 needs no previous results
+		if (l == m) {
+			return powf(-1.0f, float(m)) * doubleFactorial(2 * m - 1) * powf(1 - x*x, 0.5f * m);
+		}
+	
+		// Rule 3 requires the result for the same argument of the previous band
+		if (l == m + 1) {
+			return x * (2 * m + 1) * legendrePolynomial(m, m, x);
+		}
+	
+		// Main reccurence used by rule 1 that uses result of the same argument from
+		// the previous two bands
+		return (x * (2 * l - 1) * legendrePolynomial(l - 1, m, x) - (l + m - 1) * legendrePolynomial(l - 2, m, x)) / (l - m);
+	}
+	
+	
+	template <int l, int m> float legendre(float x);
+	
+	template <> float legendre<0, 0>(float ) {
+		return 1;
+	}
+	
+	template <> float legendre<1, 0>(float x) {
+		return x;
+	}
+	template <> float legendre<1, 1>(float x) {
+		return -sqrtf(1 - x * x);
+	}
+	
+	template <> float legendre<2, 0>(float x) {
+		return -0.5f + (3 * x * x) / 2;
+	}
+	template <> float legendre<2, 1>(float x) {
+		return -3 * x * sqrtf(1 - x * x);
+	}
+	template <> float legendre<2, 2>(float x) {
+		return -3 * (-1 + x * x);
+	}
+	
+	template <> float legendre<3, 0>(float x) {
+		return -(3 * x) / 2 + (5 * x * x * x) / 2;
+	}
+	template <> float legendre<3, 1>(float x) {
+		return -3 * sqrtf(1 - x * x) / 2 * (-1 + 5 * x * x);
+	}
+	template <> float legendre<3, 2>(float x) {
+		return -15 * (-x + x * x * x);
+	}
+	template <> float legendre<3, 3>(float x) {
+		return -15 * powf(1 - x * x, 1.5f);
+	}
+	
+	template <> float legendre<4, 0>(float x) {
+		return 0.125f * (3.0f - 30.0f * x * x + 35.0f * x * x * x * x);
+	}
+	template <> float legendre<4, 1>(float x) {
+		return -2.5f * x * sqrtf(1.0f - x * x) * (7.0f * x * x - 3.0f);
+	}
+	template <> float legendre<4, 2>(float x) {
+		return -7.5f * (1.0f - 8.0f * x * x + 7.0f * x * x * x * x);
+	}
+	template <> float legendre<4, 3>(float x) {
+		return -105.0f * x * powf(1 - x * x, 1.5f);
+	}
+	template <> float legendre<4, 4>(float x) {
+		return 105.0f * (x * x - 1.0f) * (x * x - 1.0f);
+	}
+
+} // namespace
+
+
+float nv::legendrePolynomial(int l, int m, float x)
+{
+	switch(l)
+	{
+		case 0:
+			return legendre<0, 0>(x);
+		case 1:
+			if(m == 0) return legendre<1, 0>(x);
+			return legendre<1, 1>(x);
+		case 2:
+			if(m == 0) return legendre<2, 0>(x);
+			else if(m == 1) return legendre<2, 1>(x);
+			return legendre<2, 2>(x);
+		case 3:
+			if(m == 0) return legendre<3, 0>(x);
+			else if(m == 1) return legendre<3, 1>(x);
+			else if(m == 2) return legendre<3, 2>(x);
+			return legendre<3, 3>(x);
+		case 4:
+			if(m == 0) return legendre<4, 0>(x);
+			else if(m == 1) return legendre<4, 1>(x);
+			else if(m == 2) return legendre<4, 2>(x);
+			else if(m == 3) return legendre<4, 3>(x);
+			else return legendre<4, 4>(x);
+	}
+	
+	// Fallback to the expensive version.
+	return legendre(l, m, x);
+}
+
+
+/** 
+ * Evaluate the spherical harmonic function for the given angles.
+ * @param l is the band.
+ * @param m is the argument, in the range [-l,l]
+ * @param theta is the altitude, in the range [0, PI]
+ * @param phi is the azimuth, in the range [0, 2*PI]
+ */
+float nv::shBasis( int l, int m, float theta, float phi )
+{
+	if( m == 0 ) {
+		// K(l, 0) = sqrt((2*l+1)/(4*PI))
+		return sqrtf((2 * l + 1) / (4 * PI)) * legendrePolynomial(l, 0, cosf(theta));
+	}
+	else if( m > 0 ) {
+		return sqrtf(2.0f) * K(l, m) * cosf(m * phi) * legendrePolynomial(l, m, cosf(theta));
+	}
+	else {
+		return sqrtf(2.0f) * K(l, -m) * sinf(-m * phi) * legendrePolynomial(l, -m, cosf(theta));
+	}
+}
+
+
+/**
+ * Real spherical harmonic function of an unit vector. Uses the following
+ * equalities to call the angular function:
+ * x = sin(theta)*cos(phi)
+ * y = sin(theta)*sin(phi)
+ * z = cos(theta)
+ */
+float nv::shBasis( int l, int m, Vector3::Arg v )
+{
+	float theta = acosf(v.z);
+	float phi = atan2f(v.y, v.x);
+	return shBasis( l, m, theta, phi );
+}
+
+
+/**
+ * Evaluate the hemispherical harmonic function for the given angles.
+ * @param l is the band.
+ * @param m is the argument, in the range [-l,l]
+ * @param theta is the altitude, in the range [0, PI/2]
+ * @param phi is the azimuth, in the range [0, 2*PI]
+ */
+float nv::hshBasis( int l, int m, float theta, float phi )
+{
+	if( m == 0 ) {
+		// HK(l, 0) = sqrt((2*l+1)/(2*PI))
+		return sqrtf((2 * l + 1) / (2 * PI)) * legendrePolynomial(l, 0, 2*cosf(theta)-1);
+	}
+	else if( m > 0 ) {
+		return sqrtf(2.0f) * HK(l, m) * cosf(m * phi) * legendrePolynomial(l, m, 2*cosf(theta)-1);
+	}
+	else {
+		return sqrtf(2.0f) * HK(l, -m) * sinf(-m * phi) * legendrePolynomial(l, -m, 2*cosf(theta)-1);
+	}
+}
+
+
+/**
+ * Real hemispherical harmonic function of an unit vector. Uses the following
+ * equalities to call the angular function:
+ * x = sin(theta)*cos(phi)
+ * y = sin(theta)*sin(phi)
+ * z = cos(theta)
+ */
+float nv::hshBasis( int l, int m, Vector3::Arg v )
+{
+	float theta = acosf(v.z);
+	float phi = atan2f(v.y, v.x);
+	return hshBasis( l, m, theta, phi );
+}
+
+
+
diff --git a/src/nvmath/SphericalHarmonic.h b/src/nvmath/SphericalHarmonic.h
new file mode 100644
index 0000000..3847d57
--- /dev/null
+++ b/src/nvmath/SphericalHarmonic.h
@@ -0,0 +1,418 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#ifndef NV_MATH_SPHERICALHARMONIC_H
+#define NV_MATH_SPHERICALHARMONIC_H
+
+#include "Vector.h"
+
+#include <string.h> // memcpy
+
+
+namespace nv
+{
+    class Matrix;
+
+    NVMATH_API float legendrePolynomial( int l, int m, float x ) NV_CONST;
+    NVMATH_API float shBasis( int l, int m, float theta, float phi ) NV_CONST;
+    NVMATH_API float shBasis( int l, int m, Vector3::Arg v ) NV_CONST;
+    NVMATH_API float hshBasis( int l, int m, float theta, float phi ) NV_CONST;
+    NVMATH_API float hshBasis( int l, int m, Vector3::Arg v ) NV_CONST;
+
+    class Sh;
+    float dot(const Sh & a, const Sh & b) NV_CONST;
+
+
+    /// Spherical harmonic class.
+    class Sh
+    {
+        friend class Sh2;
+        friend class ShMatrix;
+    public:
+
+        /// Construct a spherical harmonic of the given order.
+        Sh(int o) : m_order(o)
+        {
+            m_elemArray = new float[basisNum()];
+        }
+
+        /// Copy constructor.
+        Sh(const Sh & sh) : m_order(sh.order())
+        {
+            m_elemArray = new float[basisNum()];
+            memcpy(m_elemArray, sh.m_elemArray, sizeof(float) * basisNum());
+        }
+
+        /// Destructor.
+        ~Sh()
+        {
+            delete [] m_elemArray;
+            m_elemArray = NULL;
+        }
+
+        /// Get number of bands.
+        static int bandNum(int m_order) {
+            return m_order + 1;
+        }
+
+        /// Get number of sh basis.
+        static int basisNum(int m_order) {
+            return (m_order + 1) * (m_order + 1);
+        }
+
+        /// Get the index for the given coefficients.
+        static int index( int l, int m ) {
+            return l * l + l + m;
+        }
+
+        /// Get sh order.
+        int order() const
+        {
+            return m_order;
+        }
+
+        /// Get sh order.
+        int bandNum() const
+        {
+            return bandNum(m_order);
+        }
+
+        /// Get sh order.
+        int basisNum() const
+        {
+            return basisNum(m_order);
+        }
+
+        /// Get sh coefficient indexed by l,m.
+        float elem( int l, int m ) const
+        {
+            return m_elemArray[index(l, m)];
+        }
+
+        /// Get sh coefficient indexed by l,m.
+        float & elem( int l, int m )
+        {
+            return m_elemArray[index(l, m)];
+        }
+
+
+        /// Get sh coefficient indexed by i.
+        float elemAt( int i ) const {
+            return m_elemArray[i];
+        }
+
+        /// Get sh coefficient indexed by i.
+        float & elemAt( int i )
+        {
+            return m_elemArray[i];
+        }
+
+
+        /// Reset the sh coefficients.
+        void reset()
+        {
+            for( int i = 0; i < basisNum(); i++ ) {
+                m_elemArray[i] = 0.0f;
+            }
+        }
+
+        /// Copy spherical harmonic.
+        void operator= ( const Sh & sh )
+        {
+            nvDebugCheck(order() <= sh.order());
+
+            for(int i = 0; i < basisNum(); i++) {
+                m_elemArray[i] = sh.m_elemArray[i];
+            }
+        }
+
+        /// Add spherical harmonics.
+        void operator+= ( const Sh & sh )
+        {
+            nvDebugCheck(order() == sh.order());
+
+            for(int i = 0; i < basisNum(); i++) {
+                m_elemArray[i] += sh.m_elemArray[i];
+            }
+        }
+
+        /// Substract spherical harmonics.
+        void operator-= ( const Sh & sh )
+        {
+            nvDebugCheck(order() == sh.order());
+
+            for(int i = 0; i < basisNum(); i++) {
+                m_elemArray[i] -= sh.m_elemArray[i];
+            }
+        }
+
+        // Not exactly convolution, nor product.
+        void operator*= ( const Sh & sh )
+        {
+            nvDebugCheck(order() == sh.order());
+
+            for(int i = 0; i < basisNum(); i++) {
+                m_elemArray[i] *= sh.m_elemArray[i];
+            }
+        }
+
+        /// Scale spherical harmonics.
+        void operator*= ( float f )
+        {
+            for(int i = 0; i < basisNum(); i++) {
+                m_elemArray[i] *= f;
+            }
+        }
+
+        /// Add scaled spherical harmonics.
+        void addScaled( const Sh & sh, float f )
+        {
+            nvDebugCheck(order() == sh.order());
+
+            for(int i = 0; i < basisNum(); i++) {
+                m_elemArray[i] += sh.m_elemArray[i] * f;
+            }
+        }
+
+
+        /*/// Add a weighted sample to the sh coefficients.
+        void AddSample( const Vec3 & dir, const Color3f & color, float w=1.0f ) {
+        for(int l = 0; l <= order; l++) {
+        for(int m = -l; m <= l; m++) {
+        Color3f & elem = GetElem(l, m);
+        elem.Mad( elem, color, w * shBasis(l, m, dir) );
+        }
+        }
+        }*/
+
+        /// Evaluate 
+        void eval(Vector3::Arg dir)
+        {
+            for(int l = 0; l <= m_order; l++) {
+                for(int m = -l; m <= l; m++) {
+                    elem(l, m) = shBasis(l, m, dir);
+                }
+            }
+        }
+
+
+        /// Evaluate the spherical harmonic function.
+        float sample(Vector3::Arg dir) const
+        {
+            Sh sh(order());
+            sh.eval(dir);
+
+            return dot(sh, *this);
+        }
+
+
+    protected:
+
+        const int m_order;
+        float * m_elemArray;
+
+    };
+
+
+    /// Compute dot product of the spherical harmonics.
+    inline float dot(const Sh & a, const Sh & b)
+    {
+        nvDebugCheck(a.order() == b.order());
+
+        float sum = 0;
+        for( int i = 0; i < Sh::basisNum(a.order()); i++ ) {
+            sum += a.elemAt(i) * b.elemAt(i);
+        }
+
+        return sum;
+    }
+
+
+    /// Second order spherical harmonic.
+    class Sh2 : public Sh
+    {
+    public:
+
+        /// Constructor.
+        Sh2() : Sh(2) {}
+
+        /// Copy constructor.
+        Sh2(const Sh2 & sh) : Sh(sh) {}
+
+        /// Spherical harmonic resulting from projecting the clamped cosine transfer function to the SH basis.
+        void cosineTransfer()
+        {
+            const float c1 = 0.282095f;	// K(0, 0)
+            const float c2 = 0.488603f; // K(1, 0)
+            const float c3 = 1.092548f; // sqrt(15.0f / PI) / 2.0f = K(2, -2)
+            const float c4 = 0.315392f; // sqrt(5.0f / PI) / 4.0f) = K(2, 0)
+            const float c5 = 0.546274f; // sqrt(15.0f / PI) / 4.0f) = K(2, 2)
+
+            const float normalization = PI * 16.0f / 17.0f;
+
+            const float const1 = c1 * normalization * 1.0f;
+            const float const2 = c2 * normalization * (2.0f / 3.0f);
+            const float const3 = c3 * normalization * (1.0f / 4.0f);
+            const float const4 = c4 * normalization * (1.0f / 4.0f);
+            const float const5 = c5 * normalization * (1.0f / 4.0f);
+
+            m_elemArray[0] = const1;
+
+            m_elemArray[1] = -const2;
+            m_elemArray[2] = const2;
+            m_elemArray[3] = -const2;
+
+            m_elemArray[4] = const3;
+            m_elemArray[5] = -const3;
+            m_elemArray[6] = const4;
+            m_elemArray[7] = -const3;
+            m_elemArray[8] = const5;
+        }
+    };
+
+
+
+    /// Spherical harmonic matrix.
+    class ShMatrix
+    {
+    public:
+
+        /// Create an identity matrix of the given order.
+        ShMatrix(int o = 2) : m_order(o), m_identity(true)
+        {
+            nvCheck(m_order > 0);
+            m_e = new float[size()];
+            m_band = new float *[bandNum()];		
+            setupBands();
+        }
+
+        /// Destroy and free matrix elements.
+        ~ShMatrix()
+        {
+            delete m_e;
+            delete m_band;
+        }
+
+        /// Set identity matrix.
+        void setIdentity()
+        {
+            m_identity = true;
+        }
+
+        /// Return true if this is an identity matrix, false in other case.
+        bool isIdentity() const {
+            return m_identity;
+        }
+
+        /// Get number of bands of this matrix.
+        int bandNum() const
+        {
+            return m_order+1;
+        }
+
+        /// Get total number of elements in the matrix.
+        int size() const
+        {
+            int size = 0;
+            for (int i = 0; i < bandNum(); i++) {
+                size += square(i * 2 + 1);
+            }
+            return size;
+        }
+
+        /// Get element at the given raw index.
+        float element(int idx) const
+        {
+            return m_e[idx];
+        }
+
+        /// Get element at the given with the given indices.
+        float & element(int b, int x, int y)
+        {
+            nvDebugCheck(b >= 0);
+            nvDebugCheck(b < bandNum());
+            return m_band[b][(b + y) * (b * 2 + 1) + (b + x)];
+        }
+
+        /// Get element at the given with the given indices.
+        float element(int b, int x, int y) const
+        {
+            nvDebugCheck(b >= 0);
+            nvDebugCheck(b < bandNum());
+            return m_band[b][(b + y) * (b * 2 + 1) + (b + x)];
+        }
+
+        /// Copy matrix.
+        void copy(const ShMatrix & m)
+        {
+            nvDebugCheck(m_order == m.m_order);
+            memcpy(m_e, m.m_e, size() * sizeof(float));
+        }
+
+        /// Rotate the given coefficients.
+        /*void transform( const Sh & restrict source,  Sh * restrict dest ) const {
+            nvCheck( &source != dest );	// Make sure there's no aliasing.
+            nvCheck( dest->m_order <= m_order );
+            nvCheck( m_order <= source.m_order );
+
+            if (m_identity) {
+                *dest = source;
+                return;
+            }
+
+            // Loop through each band.
+            for (int l = 0; l <= dest->m_order; l++) {
+
+                for (int mo = -l; mo <= l; mo++) {
+
+                    Color3f rgb = Color3f::Black;
+
+                    for( int mi = -l; mi <= l; mi++ ) {
+                        rgb.Mad( rgb, source.elem(l, mi), elem(l, mo, mi) );
+                    }
+
+                    dest->elem(l, mo) = rgb;
+                }
+            }
+        }*/
+
+
+        NVMATH_API void multiply( const ShMatrix &A, const ShMatrix &B );
+        NVMATH_API void rotation( const Matrix & m );
+        NVMATH_API void rotation( int axis, float angles );
+        NVMATH_API void print();
+
+
+    private:
+
+        // @@ These could be static indices precomputed only once.
+        /// Setup the band pointers.
+        void setupBands()
+        {
+            int size = 0;
+            for( int i = 0; i < bandNum(); i++ ) {
+                m_band[i] = &m_e[size];
+                size += square(i * 2 + 1);
+            }
+        }
+
+
+    private:
+
+        // Matrix order.
+        const int m_order;
+
+        // Identity flag for quick transform.
+        bool m_identity;
+
+        // Array of elements.
+        float * m_e;
+
+        // Band pointers.
+        float ** m_band;
+
+    };
+
+
+} // nv namespace
+
+#endif // NV_MATH_SPHERICALHARMONIC_H
diff --git a/src/nvmath/nvmath.h b/src/nvmath/nvmath.h
index b9a1bad..f486743 100644
--- a/src/nvmath/nvmath.h
+++ b/src/nvmath/nvmath.h
@@ -6,7 +6,7 @@
 
 #include "nvcore/nvcore.h"
 #include "nvcore/Debug.h"   // nvDebugCheck
-#include "nvcore/Utils.h"   // clamp
+#include "nvcore/Utils.h"   // max, clamp
 
 #include <math.h>
 
@@ -109,7 +109,7 @@ namespace nv
     inline bool equal(const float f0, const float f1, const float epsilon = NV_EPSILON)
     {
         //return fabs(f0-f1) <= epsilon;
-        return fabs(f0-f1) <= epsilon * max(1.0f, fabs(f0), fabs(f1));
+        return fabs(f0-f1) <= epsilon * max(1.0f, fabsf(f0), fabsf(f1));
     }
 
     inline bool isZero(const float f, const float epsilon = NV_EPSILON)
diff --git a/src/nvthread/ParallelFor.cpp b/src/nvthread/ParallelFor.cpp
index fe15416..cf30504 100644
--- a/src/nvthread/ParallelFor.cpp
+++ b/src/nvthread/ParallelFor.cpp
@@ -7,7 +7,7 @@
 
 using namespace nv;
 
-#define ENABLE_PARALLEL_FOR 1
+#define ENABLE_PARALLEL_FOR 0
 
 
 void worker(void * arg) {
diff --git a/src/nvtt/CubeSurface.cpp b/src/nvtt/CubeSurface.cpp
index 99c3c7e..b194574 100644
--- a/src/nvtt/CubeSurface.cpp
+++ b/src/nvtt/CubeSurface.cpp
@@ -37,6 +37,199 @@ using namespace nvtt;
 
 
 
+// Solid angle of an axis aligned quad from (0,0,1) to (x,y,1)
+// See: http://www.fizzmoll11.com/thesis/ for a derivation of this formula.
+static float areaElement(float x, float y) {
+    return atan2(x*y, sqrtf(x*x + y*y + 1));
+}
+
+// Solid angle of a hemicube texel.
+static float solidAngleTerm(uint x, uint y, float inverseEdgeLength) {
+    // Transform x,y to [-1, 1] range, offset by 0.5 to point to texel center.
+    float u = (float(x) + 0.5f) * (2 * inverseEdgeLength) - 1.0f;
+    float v = (float(y) + 0.5f) * (2 * inverseEdgeLength) - 1.0f;
+    nvDebugCheck(u >= -1.0f && u <= 1.0f);
+    nvDebugCheck(v >= -1.0f && v <= 1.0f);
+
+#if 1
+    // Exact solid angle:
+    float x0 = u - inverseEdgeLength;
+    float y0 = v - inverseEdgeLength;
+    float x1 = u + inverseEdgeLength;
+    float y1 = v + inverseEdgeLength;
+    float solidAngle = areaElement(x0, y0) - areaElement(x0, y1) - areaElement(x1, y0) + areaElement(x1, y1);
+    nvDebugCheck(solidAngle > 0.0f);
+
+    return solidAngle;
+#else
+    // This formula is equivalent, but not as precise.
+    float pixel_area = nv::square(2.0f * inverseEdgeLength);
+    float dist_square = 1.0f + nv::square(u) + nv::square(v);
+    float cos_theta = 1.0f / sqrt(dist_square);
+    float cos_theta_d2 = cos_theta / dist_square; // Funny this is just 1/dist^3 or cos(tetha)^3
+
+    return pixel_area * cos_theta_d2;
+#endif
+}
+
+
+static Vector3 texelDirection(uint face, uint x, uint y, int edgeLength, bool seamless)
+{
+    float u, v;
+    if (seamless) {
+        // Transform x,y to [-1, 1] range, match up edges exactly.
+        u = float(x) * 2 / (edgeLength - 1) - 1.0f;
+        v = float(y) * 2 / (edgeLength - 1) - 1.0f;
+    }
+    else {
+        // Transform x,y to [-1, 1] range, offset by 0.5 to point to texel center.
+        u = (float(x) + 0.5f) * (2 / edgeLength) - 1.0f;
+        v = (float(y) + 0.5f) * (2 / edgeLength) - 1.0f;
+    }
+    nvDebugCheck(u >= -1.0f && u <= 1.0f);
+    nvDebugCheck(v >= -1.0f && v <= 1.0f);
+
+    Vector3 n;
+
+    if (face == 0) {
+        n.x = 1;
+        n.y = -v;
+        n.z = -u;
+    }
+    if (face == 1) {
+        n.x = -1;
+        n.y = -v;
+        n.z = u;
+    }
+
+    if (face == 2) {
+        n.x = u;
+        n.y = 1;
+        n.z = v;
+    }
+    if (face == 3) {
+        n.x = u;
+        n.y = -1;
+        n.z = -v;
+    }
+
+    if (face == 4) {
+        n.x = u;
+        n.y = -v;
+        n.z = 1;
+    }
+    if (face == 5) {
+        n.x = -u;
+        n.y = -v;
+        n.z = -1;
+    }
+
+    return normalizeFast(n);
+}
+
+
+TexelTable::TexelTable(uint edgeLength, bool seamless) : size(edgeLength) {
+
+    uint hsize = size/2;
+
+    // Allocate a small solid angle table that takes into account cube map symmetry.
+    solidAngleArray.resize(hsize * hsize);
+
+    for (uint y = 0; y < hsize; y++) {
+        for (uint x = 0; x < hsize; x++) {
+            solidAngleArray[y * hsize + x] = solidAngleTerm(hsize+x, hsize+y, edgeLength);
+        }
+    }
+
+
+    directionArray.resize(size*size*6);
+
+    for (uint f = 0; f < 6; f++) {
+        for (uint y = 0; y < size; y++) {
+            for (uint x = 0; x < size; x++) {
+                directionArray[(f * size + y) * size + x] = texelDirection(f, x, y, edgeLength, seamless);
+            }
+        }
+    }
+
+
+}
+
+const Vector3 & TexelTable::direction(uint f, uint x, uint y) const {
+    nvDebugCheck(f < 6 && x < size && y < size);
+    return directionArray[(f * size + y) * size + x];
+}
+
+float TexelTable::solidAngle(uint f, uint x, uint y) const {
+    uint hsize = size/2;
+    if (x >= hsize) x -= hsize;
+    else if (x < hsize) x = hsize - x - 1;
+    if (y >= hsize) y -= hsize;
+    else if (y < hsize) y = hsize - y - 1;
+
+    return solidAngleArray[y * hsize + x];
+}
+
+
+static const Vector3 faceNormals[6] = {
+    Vector3(1, 0, 0),
+    Vector3(-1, 0, 0),
+    Vector3(0, 1, 0),
+    Vector3(0, -1, 0),
+    Vector3(0, 0, 1),
+    Vector3(0, 0, -1),
+};
+
+static const Vector3 faceU[6] = {
+    Vector3(0, 0, -1),
+    Vector3(0, 0, 1),
+    Vector3(1, 0, 0),
+    Vector3(1, 0, 0),
+    Vector3(1, 0, 0),
+    Vector3(-1, 0, 0),
+};
+
+static const Vector3 faceV[6] = {
+    Vector3(0, -1, 0),
+    Vector3(0, -1, 0),
+    Vector3(0, 0, 1),
+    Vector3(0, 0, -1),
+    Vector3(0, -1, 0),
+    Vector3(0, -1, 0),
+};
+
+
+static Vector2 toPolar(Vector3::Arg v) {
+    Vector2 p;
+    p.x = atan2(v.x, v.y);  // theta
+    p.y = acosf(v.z);       // phi
+    return p;
+}
+
+static Vector2 toPlane(float theta, float phi) {
+    float x = sin(phi) * cos(theta);
+    float y = sin(phi) * sin(theta);
+    float z = cos(phi);
+
+    Vector2 p;
+    p.x = x / fabs(z);
+    p.y = y / fabs(z);
+    //p.x = tan(phi) * cos(theta);
+    //p.y = tan(phi) * sin(theta);
+
+    return p;
+}
+
+static Vector2 toPlane(Vector3::Arg v) {
+    Vector2 p;
+    p.x = v.x / fabs(v.z);
+    p.y = v.y / fabs(v.z);
+    return p;
+}
+
+
+
+
 
 CubeSurface::CubeSurface() : m(new CubeSurface::Private())
 {
@@ -183,169 +376,50 @@ Surface CubeSurface::unfold(CubeLayout layout) const
 }
 
 
-float CubeSurface::average(int channel) const
-{
-    const uint edgeLength = m->edgeLength;
-
-    // These tables along with the surface so that we only compute them once.
-    if (m->solidAngleTable == NULL) {
-        m->solidAngleTable = new SolidAngleTable(edgeLength);
-    }
-
-    float total = 0.0f;
-    float sum = 0.0f;
-
-    for (int f = 0; f < 6; f++) {
-        float * c = m->face[f].m->image->channel(channel);
-
-        for (uint y = 0; y < edgeLength; y++) {
-            for (uint x = 0; x < edgeLength; x++) {
-                float solidAngle = m->solidAngleTable->lookup(x, y);
-
-                total += solidAngle;
-                sum += c[y * edgeLength + x] * solidAngle;
-            }
-        }
-    }
+#include "nvmath/SphericalHarmonic.h"
 
-    return sum / total;
-}
-
-
-CubeSurface CubeSurface::irradianceFilter(int size) const
+CubeSurface CubeSurface::irradianceFilter(int size, bool seamless) const
 {
-    // @@ TODO
-    return CubeSurface();
-}
-
-
-
-// Solid angle of an axis aligned quad from (0,0,1) to (x,y,1)
-// See: http://www.fizzmoll11.com/thesis/ for a derivation of this formula.
-static float areaElement(float x, float y) {
-    return atan2(x*y, sqrtf(x*x + y*y + 1));
-}
-
-// Solid angle of a hemicube texel.
-static float solidAngleTerm(uint x, uint y, float inverseEdgeLength) {
-    // Transform x,y to [-1, 1] range, offset by 0.5 to point to texel center.
-    float u = (float(x) + 0.5f) * (2 * inverseEdgeLength) - 1.0f;
-    float v = (float(y) + 0.5f) * (2 * inverseEdgeLength) - 1.0f;
-    nvDebugCheck(u >= -1.0f && u <= 1.0f);
-    nvDebugCheck(v >= -1.0f && v <= 1.0f);
-
-#if 1   
-    // Exact solid angle:
-    float x0 = u - inverseEdgeLength;
-    float y0 = v - inverseEdgeLength;
-    float x1 = u + inverseEdgeLength;
-    float y1 = v + inverseEdgeLength;
-    float solidAngle = areaElement(x0, y0) - areaElement(x0, y1) - areaElement(x1, y0) + areaElement(x1, y1);
-    nvDebugCheck(solidAngle > 0.0f);
-    
-    return solidAngle;
-#else
-    // This formula is equivalent, but not as precise.
-    float pixel_area = nv::square(2.0f * inverseEdgeLength);
-    float dist_square = 1.0f + nv::square(u) + nv::square(v);
-    float cos_theta = 1.0f / sqrt(dist_square);
-    float cos_theta_d2 = cos_theta / dist_square; // Funny this is just 1/dist^3 or cos(tetha)^3
+    m->allocateTexelTable();
 
-    return pixel_area * cos_theta_d2;
-#endif
-}
+    // Transform this cube to spherical harmonic basis
+    Sh2 sh;
 
+    // For each texel of the input cube.
+    const uint edgeLength = m->edgeLength;
+    for (uint f = 0; f < 6; f++) {
+        for (int y = 0; y < edgeLength; y++) {
+            for (int x = 0; x < edgeLength; x++) {
 
-// Small solid angle table that takes into account cube map symmetry.
-SolidAngleTable::SolidAngleTable(uint edgeLength) : size(edgeLength/2) {
-    // Allocate table.
-    data.resize(size * size);
+                Vector3 dir = m->texelTable->direction(f, x, y);
+                float solidAngle = m->texelTable->solidAngle(f, x, y);
 
-    // Init table.
-    const float inverseEdgeLength = 1.0f / edgeLength;
+                Sh2 shDir;
+                shDir.eval(dir);
 
-    for (uint y = 0; y < size; y++) {
-        for (uint x = 0; x < size; x++) {
-            data[y * size + x] = solidAngleTerm(size+x, size+y, inverseEdgeLength);
+                sh.addScaled(sh, solidAngle);
+            }
         }
     }
-}
 
-float SolidAngleTable::lookup(uint x, uint y) const {
-    if (x >= size) x -= size;
-    else if (x < size) x = size - x - 1;
-    if (y >= size) y -= size;
-    else if (y < size) y = size - y - 1;
 
-    return data[y * size + x];
-}
+    // Evaluate spherical harmonic for each output texel.
+    CubeSurface output;
+    output.m->allocate(size);
 
 
-static Vector3 texelDirection(uint face, uint x, uint y, float inverseEdgeLength)
-{
-    // Transform x,y to [-1, 1] range, offset by 0.5 to point to texel center.
-    float u = (float(x) + 0.5f) * (2 * inverseEdgeLength) - 1.0f;
-    float v = (float(y) + 0.5f) * (2 * inverseEdgeLength) - 1.0f;
-    nvDebugCheck(u >= -1.0f && u <= 1.0f);
-    nvDebugCheck(v >= -1.0f && v <= 1.0f);
 
-    Vector3 n;
 
-    if (face == 0) {
-        n.x = 1;
-        n.y = -v;
-        n.z = -u;
-    }
-    if (face == 1) {
-        n.x = -1;
-        n.y = -v;
-        n.z = u;
-    }
-
-    if (face == 2) {
-        n.x = u;
-        n.y = 1;
-        n.z = v;
-    }
-    if (face == 3) {
-        n.x = u;
-        n.y = -1;
-        n.z = -v;
-    }
-
-    if (face == 4) {
-        n.x = u;
-        n.y = -v;
-        n.z = 1;
-    }
-    if (face == 5) {
-        n.x = -u;
-        n.y = -v;
-        n.z = -1;
-    }
-
-    return normalizeFast(n);
+    // @@ TODO
+    return CubeSurface();
 }
 
 
-VectorTable::VectorTable(uint edgeLength) : size(edgeLength) {
-    float invEdgeLength = 1.0f / edgeLength;
-
-    data.resize(size*size*6);
+// Warp uv coordinate from [-1, 1] to
+float warp(float u, int size) {
 
-    for (uint f = 0; f < 6; f++) {
-        for (uint y = 0; y < size; y++) {
-            for (uint x = 0; x < size; x++) {
-                data[(f * size + y) * size + x] = texelDirection(f, x, y, invEdgeLength);
-            }
-        }
-    }
 }
 
-const Vector3 & VectorTable::lookup(uint f, uint x, uint y) const {
-    nvDebugCheck(f < 6 && x < size && y < size);
-    return data[(f * size + y) * size + x];
-}
 
 
 
@@ -359,68 +433,9 @@ const Vector3 & VectorTable::lookup(uint f, uint x, uint y) const {
 // -
 
 // Other speedups:
-// - parallelize.
+// - parallelize. Done.
 // - use ISPC?
 
-static const Vector3 faceNormals[6] = {
-    Vector3(1, 0, 0),
-    Vector3(-1, 0, 0),
-    Vector3(0, 1, 0),
-    Vector3(0, -1, 0),
-    Vector3(0, 0, 1),
-    Vector3(0, 0, -1),
-};
-
-static const Vector3 faceU[6] = {
-    Vector3(0, 0, -1),
-    Vector3(0, 0, 1),
-    Vector3(1, 0, 0),
-    Vector3(1, 0, 0),
-    Vector3(1, 0, 0),
-    Vector3(-1, 0, 0),
-};
-
-static const Vector3 faceV[6] = {
-    Vector3(0, -1, 0),
-    Vector3(0, -1, 0),
-    Vector3(0, 0, 1),
-    Vector3(0, 0, -1),
-    Vector3(0, -1, 0),
-    Vector3(0, -1, 0),
-};
-
-
-static Vector2 toPolar(Vector3::Arg v) {
-    Vector2 p;
-    p.x = atan2(v.x, v.y);  // theta
-    p.y = acosf(v.z);       // phi
-    return p;
-}
-
-static Vector2 toPlane(float theta, float phi) {
-    float x = sin(phi) * cos(theta);
-    float y = sin(phi) * sin(theta);
-    float z = cos(phi);
-
-    Vector2 p;
-    p.x = x / fabs(z);
-    p.y = y / fabs(z);
-    //p.x = tan(phi) * cos(theta);
-    //p.y = tan(phi) * sin(theta);
-
-    return p;
-}
-
-static Vector2 toPlane(Vector3::Arg v) {
-    Vector2 p;
-    p.x = v.x / fabs(v.z);
-    p.y = v.y / fabs(v.z);
-    return p;
-}
-
-
-
-
 // Convolve filter against this cube.
 Vector3 CubeSurface::Private::applyCosinePowerFilter(const Vector3 & filterDir, float coneAngle, float cosinePower)
 {
@@ -503,7 +518,7 @@ Vector3 CubeSurface::Private::applyCosinePowerFilter(const Vector3 & filterDir,
             // Focal point in polar coordinates:
             Vector2 Fp = toPolar(F);
             nvCheck(Fp.y >= 0.0f);  // top
-            //nvCheck(Fp.y <= PI/2);  // horizon    @@ We should cull this earlier.
+            nvCheck(Fp.y <= PI/2);  // horizon
 
             // If this is an ellipse:
             if (Fp.y + coneAngle < PI/2) {
@@ -589,11 +604,11 @@ Vector3 CubeSurface::Private::applyCosinePowerFilter(const Vector3 & filterDir,
             bool inside = false;
             for (int x = x0; x <= x1; x++) {
 
-                Vector3 dir = vectorTable->lookup(f, x, y);
+                Vector3 dir = texelTable->direction(f, x, y);
                 float cosineAngle = dot(dir, filterDir);
 
                 if (cosineAngle > cosineConeAngle) {
-                    float solidAngle = solidAngleTable->lookup(x, y);
+                    float solidAngle = texelTable->solidAngle(f, x, y);
                     float scale = powf(saturate(cosineAngle), cosinePower);
                     float contribution = solidAngle * scale;
 
@@ -641,7 +656,7 @@ void ApplyCosinePowerFilterTask(void * context, int id)
     nvtt::Surface & filteredFace = ctx->filteredCube->face[f];
     FloatImage * filteredImage = filteredFace.m->image;
 
-    const Vector3 filterDir = texelDirection(f, x, y, 1.0f / size);
+    const Vector3 filterDir = texelDirection(f, x, y, size, ctx->filteredCube->seamless);
 
     // Convolve filter against cube.
     Vector3 color = ctx->inputCube->applyCosinePowerFilter(filterDir, ctx->coneAngle, ctx->cosinePower);
@@ -652,33 +667,22 @@ void ApplyCosinePowerFilterTask(void * context, int id)
 }
 
 
-CubeSurface CubeSurface::cosinePowerFilter(int size, float cosinePower) const
+CubeSurface CubeSurface::cosinePowerFilter(int size, float cosinePower, bool seamless) const
 {
     const uint edgeLength = m->edgeLength;
 
     // Allocate output cube.
     CubeSurface filteredCube;
     filteredCube.m->allocate(size);
+    filteredCube.m->seamless = seamless;
 
-    // These tables along with the surface so that we only compute them once.
-    if (m->solidAngleTable == NULL) {
-        m->solidAngleTable = new SolidAngleTable(edgeLength);
-    }
-    if (m->vectorTable == NULL) {
-        m->vectorTable = new VectorTable(edgeLength);
-    }
+    // Texel table is stored along with the surface so that it's compute only once.
+    m->allocateTexelTable();
 
     const float threshold = 0.001f;
     const float coneAngle = acosf(powf(threshold, 1.0f/cosinePower));
 
 
-#if 1
-    // Gather approach. This should be easier to parallelize, because there's no contention in the filtered output.
-
-    // For each texel of the output cube.
-    // - Determine what texels of the input cube contribute to it.
-    // - Add weighted contributions. Normalize.
-
     // For each texel of the output cube.
     /*for (uint f = 0; f < 6; f++) {
         nvtt::Surface filteredFace = filteredCube.m->face[f];
@@ -687,10 +691,10 @@ CubeSurface CubeSurface::cosinePowerFilter(int size, float cosinePower) const
         for (uint y = 0; y < uint(size); y++) {
             for (uint x = 0; x < uint(size); x++) {
 
-                const Vector3 filterDir = texelDirection(f, x, y, 1.0f / size);
+                const Vector3 filterDir = texelDirection(f, x, y, size, seamless);
 
                 // Convolve filter against cube.
-                Vector3 color = m->applyCosinePowerFilter(filterDir, coneAngle, cosinePower);
+                Vector3 color = m->applyCosinePowerFilter(filterDir, coneAngle, cosinePower, seamless);
 
                 filteredImage->pixel(0, x, y, 0) = color.x;
                 filteredImage->pixel(1, x, y, 0) = color.y;
@@ -708,68 +712,6 @@ CubeSurface CubeSurface::cosinePowerFilter(int size, float cosinePower) const
     nv::ParallelFor parallelFor(ApplyCosinePowerFilterTask, &context);
     parallelFor.run(6 * size * size);
 
-#else
-    // Scatter approach.
-
-    // For each texel of the input cube.
-    // - Lookup our solid angle.
-    // - Determine to what texels of the output cube we contribute.
-    // - Add our contribution to the texels whose power is above threshold.
-
-    for (uint f = 0; f < 6; f++) {
-        const Surface & face = m->face[f];
-
-        for (uint y = 0; y < edgeLength; y++) {
-            for (uint x = 0; x < edgeLength; x++) {
-                float solidAngle = solidAngleTable.lookup(x, y);
-                float r = face.m->image->pixel(0, x, y, 0) * solidAngle;;
-                float g = face.m->image->pixel(1, x, y, 0) * solidAngle;;
-                float b = face.m->image->pixel(2, x, y, 0) * solidAngle;;
-
-                Vector3 texelDir = texelDirection(f, x, y, 1.0f / edgeLength);
-
-                for (uint ff = 0; ff < 6; ff++) {
-                    FloatImage * filteredFace = filteredCube.m->face[ff].m->image;
-
-                    for (uint yy = 0; yy < uint(size); yy++) {
-                        for (uint xx = 0; xx < uint(size); xx++) {
-
-                            Vector3 filterDir = texelDirection(ff, xx, yy, 1.0f / size);
-
-                            float scale = powf(saturate(dot(texelDir, filterDir)), cosinePower);
-
-                            if (scale > threshold) {
-                                filteredFace->pixel(0, xx, yy, 0) += r * scale;
-                                filteredFace->pixel(1, xx, yy, 0) += g * scale;
-                                filteredFace->pixel(2, xx, yy, 0) += b * scale;
-                                filteredFace->pixel(3, xx, yy, 0) += solidAngle * scale;
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    // Normalize contributions.
-    for (uint f = 0; f < 6; f++) {
-        FloatImage * filteredFace = filteredCube.m->face[f].m->image;
-
-        for (int i = 0; i < size*size; i++) {
-            float & r = filteredFace->pixel(0, i);
-            float & g = filteredFace->pixel(1, i);
-            float & b = filteredFace->pixel(2, i);
-            float & sum = filteredFace->pixel(3, i);
-            float isum = 1.0f / sum;
-            r *= isum;
-            g *= isum;
-            b *= isum;
-            sum = 1;
-        }
-    }
-
-#endif
-
     return filteredCube;
 }
 
diff --git a/src/nvtt/CubeSurface.h b/src/nvtt/CubeSurface.h
index 19a42e0..8427b64 100644
--- a/src/nvtt/CubeSurface.h
+++ b/src/nvtt/CubeSurface.h
@@ -38,21 +38,15 @@
 
 namespace nvtt
 {
-    struct SolidAngleTable {
-        SolidAngleTable(uint edgeLength);
-        float lookup(uint x, uint y) const;
+    struct TexelTable {
+        TexelTable(uint edgeLength, bool seamless);
 
-        uint size;
-        nv::Array<float> data;
-
-    };
-
-    struct VectorTable {
-        VectorTable(uint edgeLength);
-        const nv::Vector3 & lookup(uint f, uint x, uint y) const;
+        float solidAngle(uint f, uint x, uint y) const;
+        const nv::Vector3 & direction(uint f, uint x, uint y) const;
 
         uint size;
-        nv::Array<nv::Vector3> data;
+        nv::Array<float> solidAngleArray;
+        nv::Array<nv::Vector3> directionArray;
     };
 
 
@@ -65,24 +59,23 @@ namespace nvtt
             nvDebugCheck( refCount() == 0 );
 
             edgeLength = 0;
-            solidAngleTable = NULL;
-            vectorTable = NULL;
+            seamless = false;
+            texelTable = NULL;
         }
         Private(const Private & p) : RefCounted() // Copy ctor. inits refcount to 0.
         {
             nvDebugCheck( refCount() == 0 );
 
             edgeLength = p.edgeLength;
+            seamless = p.seamless;
             for (uint i = 0; i < 6; i++) {
                 face[i] = p.face[i];
             }
-            solidAngleTable = NULL; // @@ Transfer tables. Needs refcounting?
-            vectorTable = NULL;
+            texelTable = NULL; // @@ Transfer tables. Needs refcounting?
         }
         ~Private()
         {
-            delete solidAngleTable;
-            delete vectorTable;
+            delete texelTable;
         }
 
         void allocate(uint edgeLength)
@@ -95,13 +88,20 @@ namespace nvtt
             }
         }
 
+        void allocateTexelTable()
+        {
+            if (texelTable == NULL) {
+                texelTable = new TexelTable(edgeLength, seamless);
+            }
+        }
+
         // Filtering helpers:
         nv::Vector3 applyCosinePowerFilter(const nv::Vector3 & dir, float coneAngle, float cosinePower);
 
         uint edgeLength;
+        bool seamless;
         Surface face[6];
-        SolidAngleTable * solidAngleTable;
-        VectorTable * vectorTable;
+        TexelTable * texelTable;
     };
 
 } // nvtt namespace
diff --git a/src/nvtt/nvtt.h b/src/nvtt/nvtt.h
index 4f2b068..c8c901b 100644
--- a/src/nvtt/nvtt.h
+++ b/src/nvtt/nvtt.h
@@ -548,6 +548,7 @@ namespace nvtt
         NVTT_API bool isNull() const;
         NVTT_API int edgeLength() const;
         NVTT_API int countMipmaps() const;
+        NVTT_API bool isSeamless() const;
 
         // Texture data.
         NVTT_API bool load(const char * fileName, int mipmap);
@@ -569,8 +570,8 @@ namespace nvtt
         NVTT_API float average(int channel) const;
 
         // Filtering.
-        NVTT_API CubeSurface irradianceFilter(int size) const;
-        NVTT_API CubeSurface cosinePowerFilter(int size, float cosinePower) const;
+        NVTT_API CubeSurface irradianceFilter(int size, bool seamless) const;
+        NVTT_API CubeSurface cosinePowerFilter(int size, float cosinePower, bool seamless) const;
 
 
         /*
diff --git a/src/nvtt/tests/cubemaptest.cpp b/src/nvtt/tests/cubemaptest.cpp
index e1c87ed..1db2579 100644
--- a/src/nvtt/tests/cubemaptest.cpp
+++ b/src/nvtt/tests/cubemaptest.cpp
@@ -86,7 +86,7 @@ int main(int argc, char *argv[])
 
         printf("filtering step: %d/%d\n", m+1, mipmapCount);
 
-        filteredEnvmap[m] = envmap.cosinePowerFilter(size, cosine_power);
+        filteredEnvmap[m] = envmap.cosinePowerFilter(size, cosine_power, false);
         filteredEnvmap[m].toGamma(2.2f);
     }