From d11d7a5f386461409b0b92620d60611938a14802 Mon Sep 17 00:00:00 2001 From: castano Date: Tue, 11 Oct 2011 06:40:40 +0000 Subject: [PATCH] seamless cubemap filtering. --- src/nvimage/DirectDrawSurface.cpp | 56 ++-- src/nvimage/DirectDrawSurface.h | 45 ++- src/nvmath/CMakeLists.txt | 13 +- src/nvmath/Color.inl | 20 +- src/nvmath/SphericalHarmonic.cpp | 243 ++++++++++++++ src/nvmath/SphericalHarmonic.h | 418 ++++++++++++++++++++++++ src/nvmath/nvmath.h | 4 +- src/nvthread/ParallelFor.cpp | 2 +- src/nvtt/CubeSurface.cpp | 512 +++++++++++++----------------- src/nvtt/CubeSurface.h | 40 +-- src/nvtt/nvtt.h | 5 +- src/nvtt/tests/cubemaptest.cpp | 2 +- 12 files changed, 978 insertions(+), 382 deletions(-) create mode 100644 src/nvmath/SphericalHarmonic.cpp create mode 100644 src/nvmath/SphericalHarmonic.h diff --git a/src/nvimage/DirectDrawSurface.cpp b/src/nvimage/DirectDrawSurface.cpp index c9faa76..0f3e29d 100644 --- a/src/nvimage/DirectDrawSurface.cpp +++ b/src/nvimage/DirectDrawSurface.cpp @@ -36,32 +36,9 @@ using namespace nv; - -const uint nv::FOURCC_NVTT = MAKEFOURCC('N', 'V', 'T', 'T'); - -const uint nv::FOURCC_DDS = MAKEFOURCC('D', 'D', 'S', ' '); -const uint nv::FOURCC_DXT1 = MAKEFOURCC('D', 'X', 'T', '1'); -const uint nv::FOURCC_DXT2 = MAKEFOURCC('D', 'X', 'T', '2'); -const uint nv::FOURCC_DXT3 = MAKEFOURCC('D', 'X', 'T', '3'); -const uint nv::FOURCC_DXT4 = MAKEFOURCC('D', 'X', 'T', '4'); -const uint nv::FOURCC_DXT5 = MAKEFOURCC('D', 'X', 'T', '5'); -const uint nv::FOURCC_RXGB = MAKEFOURCC('R', 'X', 'G', 'B'); -const uint nv::FOURCC_ATI1 = MAKEFOURCC('A', 'T', 'I', '1'); -const uint nv::FOURCC_ATI2 = MAKEFOURCC('A', 'T', 'I', '2'); - - - namespace { - static const uint FOURCC_A2XY = MAKEFOURCC('A', '2', 'X', 'Y'); - - static const uint FOURCC_DX10 = MAKEFOURCC('D', 'X', '1', '0'); - - static const uint FOURCC_UVER = MAKEFOURCC('U', 'V', 'E', 'R'); - - - static const uint DDSD_CAPS = 0x00000001U; static const uint DDSD_PIXELFORMAT = 0x00001000U; static const uint DDSD_WIDTH = 0x00000004U; @@ -210,16 +187,16 @@ namespace #undef CASE } - const char * getD3d10ResourceDimensionString(D3D10_RESOURCE_DIMENSION resourceDimension) + const char * getD3d10ResourceDimensionString(DDS_DIMENSION resourceDimension) { switch(resourceDimension) { default: - case D3D10_RESOURCE_DIMENSION_UNKNOWN: return "UNKNOWN"; - case D3D10_RESOURCE_DIMENSION_BUFFER: return "BUFFER"; - case D3D10_RESOURCE_DIMENSION_TEXTURE1D: return "TEXTURE1D"; - case D3D10_RESOURCE_DIMENSION_TEXTURE2D: return "TEXTURE2D"; - case D3D10_RESOURCE_DIMENSION_TEXTURE3D: return "TEXTURE3D"; + case DDS_DIMENSION_UNKNOWN: return "UNKNOWN"; + case DDS_DIMENSION_BUFFER: return "BUFFER"; + case DDS_DIMENSION_TEXTURE1D: return "TEXTURE1D"; + case DDS_DIMENSION_TEXTURE2D: return "TEXTURE2D"; + case DDS_DIMENSION_TEXTURE3D: return "TEXTURE3D"; } } @@ -531,7 +508,7 @@ DDSHeader::DDSHeader() this->notused = 0; this->header10.dxgiFormat = DXGI_FORMAT_UNKNOWN; - this->header10.resourceDimension = D3D10_RESOURCE_DIMENSION_UNKNOWN; + this->header10.resourceDimension = DDS_DIMENSION_UNKNOWN; this->header10.miscFlag = 0; this->header10.arraySize = 0; this->header10.reserved = 0; @@ -580,7 +557,8 @@ void DDSHeader::setMipmapCount(uint count) void DDSHeader::setTexture2D() { - this->header10.resourceDimension = D3D10_RESOURCE_DIMENSION_TEXTURE2D; + this->header10.resourceDimension = DDS_DIMENSION_TEXTURE2D; + this->header10.miscFlag = 0; this->header10.arraySize = 1; } @@ -588,7 +566,8 @@ void DDSHeader::setTexture3D() { this->caps.caps2 = DDSCAPS2_VOLUME; - this->header10.resourceDimension = D3D10_RESOURCE_DIMENSION_TEXTURE3D; + this->header10.resourceDimension = DDS_DIMENSION_TEXTURE3D; + this->header10.miscFlag = 0; this->header10.arraySize = 1; } @@ -597,8 +576,9 @@ void DDSHeader::setTextureCube() this->caps.caps1 |= DDSCAPS_COMPLEX; this->caps.caps2 = DDSCAPS2_CUBEMAP | DDSCAPS2_CUBEMAP_ALL_FACES; - this->header10.resourceDimension = D3D10_RESOURCE_DIMENSION_TEXTURE2D; - this->header10.arraySize = 6; + this->header10.resourceDimension = DDS_DIMENSION_TEXTURE2D; + this->header10.miscFlag = DDS_MISC_TEXTURECUBE; + this->header10.arraySize = 1; } void DDSHeader::setLinearSize(uint size) @@ -1084,7 +1064,7 @@ bool DirectDrawSurface::isTexture1D() const nvDebugCheck(isValid()); if (header.hasDX10Header()) { - return header.header10.resourceDimension == D3D10_RESOURCE_DIMENSION_TEXTURE1D; + return header.header10.resourceDimension == DDS_DIMENSION_TEXTURE1D; } return false; } @@ -1094,7 +1074,7 @@ bool DirectDrawSurface::isTexture2D() const nvDebugCheck(isValid()); if (header.hasDX10Header()) { - return header.header10.resourceDimension == D3D10_RESOURCE_DIMENSION_TEXTURE2D; + return header.header10.resourceDimension == DDS_DIMENSION_TEXTURE2D; } else { @@ -1107,7 +1087,7 @@ bool DirectDrawSurface::isTexture3D() const nvDebugCheck(isValid()); if (header.hasDX10Header()) { - return header.header10.resourceDimension == D3D10_RESOURCE_DIMENSION_TEXTURE3D; + return header.header10.resourceDimension == DDS_DIMENSION_TEXTURE3D; } else { @@ -1597,7 +1577,7 @@ void DirectDrawSurface::printInfo() const { printf("DX10 Header:\n"); printf("\tDXGI Format: %u (%s)\n", header.header10.dxgiFormat, getDxgiFormatString((DXGI_FORMAT)header.header10.dxgiFormat)); - printf("\tResource dimension: %u (%s)\n", header.header10.resourceDimension, getD3d10ResourceDimensionString((D3D10_RESOURCE_DIMENSION)header.header10.resourceDimension)); + printf("\tResource dimension: %u (%s)\n", header.header10.resourceDimension, getD3d10ResourceDimensionString((DDS_DIMENSION)header.header10.resourceDimension)); printf("\tMisc flag: %u\n", header.header10.miscFlag); printf("\tArray size: %u\n", header.header10.arraySize); } diff --git a/src/nvimage/DirectDrawSurface.h b/src/nvimage/DirectDrawSurface.h index 5a8c62b..07d135c 100644 --- a/src/nvimage/DirectDrawSurface.h +++ b/src/nvimage/DirectDrawSurface.h @@ -39,17 +39,6 @@ namespace nv class Stream; struct ColorBlock; - extern const uint FOURCC_NVTT; - extern const uint FOURCC_DDS; - extern const uint FOURCC_DXT1; - extern const uint FOURCC_DXT2; - extern const uint FOURCC_DXT3; - extern const uint FOURCC_DXT4; - extern const uint FOURCC_DXT5; - extern const uint FOURCC_RXGB; - extern const uint FOURCC_ATI1; - extern const uint FOURCC_ATI2; - enum DDPF { DDPF_ALPHAPIXELS = 0x00000001U, @@ -110,15 +99,37 @@ namespace nv D3DFMT_A32B32G32R32F = 116, }; + enum FOURCC + { + FOURCC_NVTT = MAKEFOURCC('N', 'V', 'T', 'T'), + FOURCC_DDS = MAKEFOURCC('D', 'D', 'S', ' '), + FOURCC_DXT1 = MAKEFOURCC('D', 'X', 'T', '1'), + FOURCC_DXT2 = MAKEFOURCC('D', 'X', 'T', '2'), + FOURCC_DXT3 = MAKEFOURCC('D', 'X', 'T', '3'), + FOURCC_DXT4 = MAKEFOURCC('D', 'X', 'T', '4'), + FOURCC_DXT5 = MAKEFOURCC('D', 'X', 'T', '5'), + FOURCC_RXGB = MAKEFOURCC('R', 'X', 'G', 'B'), + FOURCC_ATI1 = MAKEFOURCC('A', 'T', 'I', '1'), + FOURCC_ATI2 = MAKEFOURCC('A', 'T', 'I', '2'), + FOURCC_A2XY = MAKEFOURCC('A', '2', 'X', 'Y'), + FOURCC_DX10 = MAKEFOURCC('D', 'X', '1', '0'), + FOURCC_UVER = MAKEFOURCC('U', 'V', 'E', 'R'), + }; + // D3D1x resource dimensions. - enum D3D10_RESOURCE_DIMENSION + enum DDS_DIMENSION // D3D10_RESOURCE_DIMENSION + { + DDS_DIMENSION_UNKNOWN = 0, + DDS_DIMENSION_BUFFER = 1, + DDS_DIMENSION_TEXTURE1D = 2, + DDS_DIMENSION_TEXTURE2D = 3, + DDS_DIMENSION_TEXTURE3D = 4, + }; + + enum DDS_MISC_FLAG { - D3D10_RESOURCE_DIMENSION_UNKNOWN = 0, - D3D10_RESOURCE_DIMENSION_BUFFER = 1, - D3D10_RESOURCE_DIMENSION_TEXTURE1D = 2, - D3D10_RESOURCE_DIMENSION_TEXTURE2D = 3, - D3D10_RESOURCE_DIMENSION_TEXTURE3D = 4, + DDS_MISC_TEXTURECUBE = 0x4, }; // DXGI formats. diff --git a/src/nvmath/CMakeLists.txt b/src/nvmath/CMakeLists.txt index 53c6b13..2460661 100644 --- a/src/nvmath/CMakeLists.txt +++ b/src/nvmath/CMakeLists.txt @@ -2,13 +2,14 @@ PROJECT(nvmath) SET(MATH_SRCS nvmath.h - Vector.h - Matrix.h - Plane.h Plane.cpp - Box.h - Color.h + Box.h Box.inl + Color.h Color.inl + Fitting.h Fitting.cpp Half.h Half.cpp - Fitting.h Fitting.cpp) + Matrix.h + Plane.h Plane.inl Plane.cpp + SphericalHarmonic.h SphericalHarmonic.cpp + Vector.h Vector.inl) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/src/nvmath/Color.inl b/src/nvmath/Color.inl index 6da2f3a..bdbe03d 100644 --- a/src/nvmath/Color.inl +++ b/src/nvmath/Color.inl @@ -11,13 +11,13 @@ namespace nv { - /// Clamp color components. + // Clamp color components. inline Vector3 colorClamp(Vector3::Arg c) { return Vector3(clamp(c.x, 0.0f, 1.0f), clamp(c.y, 0.0f, 1.0f), clamp(c.z, 0.0f, 1.0f)); } - /// Clamp without allowing the hue to change. + // Clamp without allowing the hue to change. inline Vector3 colorNormalize(Vector3::Arg c) { float scale = 1.0f; @@ -27,15 +27,15 @@ namespace nv return c / scale; } - /// Convert Color32 to Color16. + // Convert Color32 to Color16. inline Color16 toColor16(Color32 c) { Color16 color; // rrrrrggggggbbbbb // rrrrr000gggggg00bbbbb000 - // color.u = (c.u >> 3) & 0x1F; - // color.u |= (c.u >> 5) & 0x7E0; - // color.u |= (c.u >> 8) & 0xF800; + // color.u = (c.u >> 3) & 0x1F; + // color.u |= (c.u >> 5) & 0x7E0; + // color.u |= (c.u >> 8) & 0xF800; color.r = c.r >> 3; color.g = c.g >> 2; @@ -44,13 +44,13 @@ namespace nv } - /// Promote 16 bit color to 32 bit using regular bit expansion. + // Promote 16 bit color to 32 bit using regular bit expansion. inline Color32 toColor32(Color16 c) { Color32 color; - // c.u = ((col0.u << 3) & 0xf8) | ((col0.u << 5) & 0xfc00) | ((col0.u << 8) & 0xf80000); - // c.u |= (c.u >> 5) & 0x070007; - // c.u |= (c.u >> 6) & 0x000300; + // c.u = ((col0.u << 3) & 0xf8) | ((col0.u << 5) & 0xfc00) | ((col0.u << 8) & 0xf80000); + // c.u |= (c.u >> 5) & 0x070007; + // c.u |= (c.u >> 6) & 0x000300; color.b = (c.b << 3) | (c.b >> 2); color.g = (c.g << 2) | (c.g >> 4); diff --git a/src/nvmath/SphericalHarmonic.cpp b/src/nvmath/SphericalHarmonic.cpp new file mode 100644 index 0000000..25832e7 --- /dev/null +++ b/src/nvmath/SphericalHarmonic.cpp @@ -0,0 +1,243 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#include + +using namespace nv; + + +namespace +{ + + // Basic integer factorial. + inline static int factorial( int v ) + { + const static int fac_table[] = { 1, 1, 2, 6, 24, 120, 720, 5040, 40320, 362880, 3628800, 39916800 }; + + if(v <= 11){ + return fac_table[v]; + } + + int result = v; + while (--v > 0) { + result *= v; + } + return result; + } + + + // Double factorial. + // Defined as: n!! = n*(n - 2)*(n - 4)..., n!!(0,-1) = 1. + inline static int doubleFactorial( int x ) + { + if (x == 0 || x == -1) { + return 1; + } + + int result = x; + while ((x -= 2) > 0) { + result *= x; + } + + return result; + } + + /// Normalization constant for spherical harmonic. + /// @param l is the band. + /// @param m is the argument, in the range [0, m] + inline static float K( int l, int m ) + { + nvDebugCheck( m >= 0 ); + return sqrtf(((2 * l + 1) * factorial(l - m)) / (4 * PI * factorial(l + m))); + } + + /// Normalization constant for hemispherical harmonic. + inline static float HK( int l, int m ) + { + nvDebugCheck( m >= 0 ); + return sqrtf(((2 * l + 1) * factorial(l - m)) / (2 * PI * factorial(l + m))); + } + + /// Evaluate Legendre polynomial. */ + static float legendre( int l, int m, float x ) + { + // piDebugCheck( m >= 0 ); + // piDebugCheck( m <= l ); + // piDebugCheck( fabs(x) <= 1 ); + + // Rule 2 needs no previous results + if (l == m) { + return powf(-1.0f, float(m)) * doubleFactorial(2 * m - 1) * powf(1 - x*x, 0.5f * m); + } + + // Rule 3 requires the result for the same argument of the previous band + if (l == m + 1) { + return x * (2 * m + 1) * legendrePolynomial(m, m, x); + } + + // Main reccurence used by rule 1 that uses result of the same argument from + // the previous two bands + return (x * (2 * l - 1) * legendrePolynomial(l - 1, m, x) - (l + m - 1) * legendrePolynomial(l - 2, m, x)) / (l - m); + } + + + template float legendre(float x); + + template <> float legendre<0, 0>(float ) { + return 1; + } + + template <> float legendre<1, 0>(float x) { + return x; + } + template <> float legendre<1, 1>(float x) { + return -sqrtf(1 - x * x); + } + + template <> float legendre<2, 0>(float x) { + return -0.5f + (3 * x * x) / 2; + } + template <> float legendre<2, 1>(float x) { + return -3 * x * sqrtf(1 - x * x); + } + template <> float legendre<2, 2>(float x) { + return -3 * (-1 + x * x); + } + + template <> float legendre<3, 0>(float x) { + return -(3 * x) / 2 + (5 * x * x * x) / 2; + } + template <> float legendre<3, 1>(float x) { + return -3 * sqrtf(1 - x * x) / 2 * (-1 + 5 * x * x); + } + template <> float legendre<3, 2>(float x) { + return -15 * (-x + x * x * x); + } + template <> float legendre<3, 3>(float x) { + return -15 * powf(1 - x * x, 1.5f); + } + + template <> float legendre<4, 0>(float x) { + return 0.125f * (3.0f - 30.0f * x * x + 35.0f * x * x * x * x); + } + template <> float legendre<4, 1>(float x) { + return -2.5f * x * sqrtf(1.0f - x * x) * (7.0f * x * x - 3.0f); + } + template <> float legendre<4, 2>(float x) { + return -7.5f * (1.0f - 8.0f * x * x + 7.0f * x * x * x * x); + } + template <> float legendre<4, 3>(float x) { + return -105.0f * x * powf(1 - x * x, 1.5f); + } + template <> float legendre<4, 4>(float x) { + return 105.0f * (x * x - 1.0f) * (x * x - 1.0f); + } + +} // namespace + + +float nv::legendrePolynomial(int l, int m, float x) +{ + switch(l) + { + case 0: + return legendre<0, 0>(x); + case 1: + if(m == 0) return legendre<1, 0>(x); + return legendre<1, 1>(x); + case 2: + if(m == 0) return legendre<2, 0>(x); + else if(m == 1) return legendre<2, 1>(x); + return legendre<2, 2>(x); + case 3: + if(m == 0) return legendre<3, 0>(x); + else if(m == 1) return legendre<3, 1>(x); + else if(m == 2) return legendre<3, 2>(x); + return legendre<3, 3>(x); + case 4: + if(m == 0) return legendre<4, 0>(x); + else if(m == 1) return legendre<4, 1>(x); + else if(m == 2) return legendre<4, 2>(x); + else if(m == 3) return legendre<4, 3>(x); + else return legendre<4, 4>(x); + } + + // Fallback to the expensive version. + return legendre(l, m, x); +} + + +/** + * Evaluate the spherical harmonic function for the given angles. + * @param l is the band. + * @param m is the argument, in the range [-l,l] + * @param theta is the altitude, in the range [0, PI] + * @param phi is the azimuth, in the range [0, 2*PI] + */ +float nv::shBasis( int l, int m, float theta, float phi ) +{ + if( m == 0 ) { + // K(l, 0) = sqrt((2*l+1)/(4*PI)) + return sqrtf((2 * l + 1) / (4 * PI)) * legendrePolynomial(l, 0, cosf(theta)); + } + else if( m > 0 ) { + return sqrtf(2.0f) * K(l, m) * cosf(m * phi) * legendrePolynomial(l, m, cosf(theta)); + } + else { + return sqrtf(2.0f) * K(l, -m) * sinf(-m * phi) * legendrePolynomial(l, -m, cosf(theta)); + } +} + + +/** + * Real spherical harmonic function of an unit vector. Uses the following + * equalities to call the angular function: + * x = sin(theta)*cos(phi) + * y = sin(theta)*sin(phi) + * z = cos(theta) + */ +float nv::shBasis( int l, int m, Vector3::Arg v ) +{ + float theta = acosf(v.z); + float phi = atan2f(v.y, v.x); + return shBasis( l, m, theta, phi ); +} + + +/** + * Evaluate the hemispherical harmonic function for the given angles. + * @param l is the band. + * @param m is the argument, in the range [-l,l] + * @param theta is the altitude, in the range [0, PI/2] + * @param phi is the azimuth, in the range [0, 2*PI] + */ +float nv::hshBasis( int l, int m, float theta, float phi ) +{ + if( m == 0 ) { + // HK(l, 0) = sqrt((2*l+1)/(2*PI)) + return sqrtf((2 * l + 1) / (2 * PI)) * legendrePolynomial(l, 0, 2*cosf(theta)-1); + } + else if( m > 0 ) { + return sqrtf(2.0f) * HK(l, m) * cosf(m * phi) * legendrePolynomial(l, m, 2*cosf(theta)-1); + } + else { + return sqrtf(2.0f) * HK(l, -m) * sinf(-m * phi) * legendrePolynomial(l, -m, 2*cosf(theta)-1); + } +} + + +/** + * Real hemispherical harmonic function of an unit vector. Uses the following + * equalities to call the angular function: + * x = sin(theta)*cos(phi) + * y = sin(theta)*sin(phi) + * z = cos(theta) + */ +float nv::hshBasis( int l, int m, Vector3::Arg v ) +{ + float theta = acosf(v.z); + float phi = atan2f(v.y, v.x); + return hshBasis( l, m, theta, phi ); +} + + + diff --git a/src/nvmath/SphericalHarmonic.h b/src/nvmath/SphericalHarmonic.h new file mode 100644 index 0000000..3847d57 --- /dev/null +++ b/src/nvmath/SphericalHarmonic.h @@ -0,0 +1,418 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#ifndef NV_MATH_SPHERICALHARMONIC_H +#define NV_MATH_SPHERICALHARMONIC_H + +#include "Vector.h" + +#include // memcpy + + +namespace nv +{ + class Matrix; + + NVMATH_API float legendrePolynomial( int l, int m, float x ) NV_CONST; + NVMATH_API float shBasis( int l, int m, float theta, float phi ) NV_CONST; + NVMATH_API float shBasis( int l, int m, Vector3::Arg v ) NV_CONST; + NVMATH_API float hshBasis( int l, int m, float theta, float phi ) NV_CONST; + NVMATH_API float hshBasis( int l, int m, Vector3::Arg v ) NV_CONST; + + class Sh; + float dot(const Sh & a, const Sh & b) NV_CONST; + + + /// Spherical harmonic class. + class Sh + { + friend class Sh2; + friend class ShMatrix; + public: + + /// Construct a spherical harmonic of the given order. + Sh(int o) : m_order(o) + { + m_elemArray = new float[basisNum()]; + } + + /// Copy constructor. + Sh(const Sh & sh) : m_order(sh.order()) + { + m_elemArray = new float[basisNum()]; + memcpy(m_elemArray, sh.m_elemArray, sizeof(float) * basisNum()); + } + + /// Destructor. + ~Sh() + { + delete [] m_elemArray; + m_elemArray = NULL; + } + + /// Get number of bands. + static int bandNum(int m_order) { + return m_order + 1; + } + + /// Get number of sh basis. + static int basisNum(int m_order) { + return (m_order + 1) * (m_order + 1); + } + + /// Get the index for the given coefficients. + static int index( int l, int m ) { + return l * l + l + m; + } + + /// Get sh order. + int order() const + { + return m_order; + } + + /// Get sh order. + int bandNum() const + { + return bandNum(m_order); + } + + /// Get sh order. + int basisNum() const + { + return basisNum(m_order); + } + + /// Get sh coefficient indexed by l,m. + float elem( int l, int m ) const + { + return m_elemArray[index(l, m)]; + } + + /// Get sh coefficient indexed by l,m. + float & elem( int l, int m ) + { + return m_elemArray[index(l, m)]; + } + + + /// Get sh coefficient indexed by i. + float elemAt( int i ) const { + return m_elemArray[i]; + } + + /// Get sh coefficient indexed by i. + float & elemAt( int i ) + { + return m_elemArray[i]; + } + + + /// Reset the sh coefficients. + void reset() + { + for( int i = 0; i < basisNum(); i++ ) { + m_elemArray[i] = 0.0f; + } + } + + /// Copy spherical harmonic. + void operator= ( const Sh & sh ) + { + nvDebugCheck(order() <= sh.order()); + + for(int i = 0; i < basisNum(); i++) { + m_elemArray[i] = sh.m_elemArray[i]; + } + } + + /// Add spherical harmonics. + void operator+= ( const Sh & sh ) + { + nvDebugCheck(order() == sh.order()); + + for(int i = 0; i < basisNum(); i++) { + m_elemArray[i] += sh.m_elemArray[i]; + } + } + + /// Substract spherical harmonics. + void operator-= ( const Sh & sh ) + { + nvDebugCheck(order() == sh.order()); + + for(int i = 0; i < basisNum(); i++) { + m_elemArray[i] -= sh.m_elemArray[i]; + } + } + + // Not exactly convolution, nor product. + void operator*= ( const Sh & sh ) + { + nvDebugCheck(order() == sh.order()); + + for(int i = 0; i < basisNum(); i++) { + m_elemArray[i] *= sh.m_elemArray[i]; + } + } + + /// Scale spherical harmonics. + void operator*= ( float f ) + { + for(int i = 0; i < basisNum(); i++) { + m_elemArray[i] *= f; + } + } + + /// Add scaled spherical harmonics. + void addScaled( const Sh & sh, float f ) + { + nvDebugCheck(order() == sh.order()); + + for(int i = 0; i < basisNum(); i++) { + m_elemArray[i] += sh.m_elemArray[i] * f; + } + } + + + /*/// Add a weighted sample to the sh coefficients. + void AddSample( const Vec3 & dir, const Color3f & color, float w=1.0f ) { + for(int l = 0; l <= order; l++) { + for(int m = -l; m <= l; m++) { + Color3f & elem = GetElem(l, m); + elem.Mad( elem, color, w * shBasis(l, m, dir) ); + } + } + }*/ + + /// Evaluate + void eval(Vector3::Arg dir) + { + for(int l = 0; l <= m_order; l++) { + for(int m = -l; m <= l; m++) { + elem(l, m) = shBasis(l, m, dir); + } + } + } + + + /// Evaluate the spherical harmonic function. + float sample(Vector3::Arg dir) const + { + Sh sh(order()); + sh.eval(dir); + + return dot(sh, *this); + } + + + protected: + + const int m_order; + float * m_elemArray; + + }; + + + /// Compute dot product of the spherical harmonics. + inline float dot(const Sh & a, const Sh & b) + { + nvDebugCheck(a.order() == b.order()); + + float sum = 0; + for( int i = 0; i < Sh::basisNum(a.order()); i++ ) { + sum += a.elemAt(i) * b.elemAt(i); + } + + return sum; + } + + + /// Second order spherical harmonic. + class Sh2 : public Sh + { + public: + + /// Constructor. + Sh2() : Sh(2) {} + + /// Copy constructor. + Sh2(const Sh2 & sh) : Sh(sh) {} + + /// Spherical harmonic resulting from projecting the clamped cosine transfer function to the SH basis. + void cosineTransfer() + { + const float c1 = 0.282095f; // K(0, 0) + const float c2 = 0.488603f; // K(1, 0) + const float c3 = 1.092548f; // sqrt(15.0f / PI) / 2.0f = K(2, -2) + const float c4 = 0.315392f; // sqrt(5.0f / PI) / 4.0f) = K(2, 0) + const float c5 = 0.546274f; // sqrt(15.0f / PI) / 4.0f) = K(2, 2) + + const float normalization = PI * 16.0f / 17.0f; + + const float const1 = c1 * normalization * 1.0f; + const float const2 = c2 * normalization * (2.0f / 3.0f); + const float const3 = c3 * normalization * (1.0f / 4.0f); + const float const4 = c4 * normalization * (1.0f / 4.0f); + const float const5 = c5 * normalization * (1.0f / 4.0f); + + m_elemArray[0] = const1; + + m_elemArray[1] = -const2; + m_elemArray[2] = const2; + m_elemArray[3] = -const2; + + m_elemArray[4] = const3; + m_elemArray[5] = -const3; + m_elemArray[6] = const4; + m_elemArray[7] = -const3; + m_elemArray[8] = const5; + } + }; + + + + /// Spherical harmonic matrix. + class ShMatrix + { + public: + + /// Create an identity matrix of the given order. + ShMatrix(int o = 2) : m_order(o), m_identity(true) + { + nvCheck(m_order > 0); + m_e = new float[size()]; + m_band = new float *[bandNum()]; + setupBands(); + } + + /// Destroy and free matrix elements. + ~ShMatrix() + { + delete m_e; + delete m_band; + } + + /// Set identity matrix. + void setIdentity() + { + m_identity = true; + } + + /// Return true if this is an identity matrix, false in other case. + bool isIdentity() const { + return m_identity; + } + + /// Get number of bands of this matrix. + int bandNum() const + { + return m_order+1; + } + + /// Get total number of elements in the matrix. + int size() const + { + int size = 0; + for (int i = 0; i < bandNum(); i++) { + size += square(i * 2 + 1); + } + return size; + } + + /// Get element at the given raw index. + float element(int idx) const + { + return m_e[idx]; + } + + /// Get element at the given with the given indices. + float & element(int b, int x, int y) + { + nvDebugCheck(b >= 0); + nvDebugCheck(b < bandNum()); + return m_band[b][(b + y) * (b * 2 + 1) + (b + x)]; + } + + /// Get element at the given with the given indices. + float element(int b, int x, int y) const + { + nvDebugCheck(b >= 0); + nvDebugCheck(b < bandNum()); + return m_band[b][(b + y) * (b * 2 + 1) + (b + x)]; + } + + /// Copy matrix. + void copy(const ShMatrix & m) + { + nvDebugCheck(m_order == m.m_order); + memcpy(m_e, m.m_e, size() * sizeof(float)); + } + + /// Rotate the given coefficients. + /*void transform( const Sh & restrict source, Sh * restrict dest ) const { + nvCheck( &source != dest ); // Make sure there's no aliasing. + nvCheck( dest->m_order <= m_order ); + nvCheck( m_order <= source.m_order ); + + if (m_identity) { + *dest = source; + return; + } + + // Loop through each band. + for (int l = 0; l <= dest->m_order; l++) { + + for (int mo = -l; mo <= l; mo++) { + + Color3f rgb = Color3f::Black; + + for( int mi = -l; mi <= l; mi++ ) { + rgb.Mad( rgb, source.elem(l, mi), elem(l, mo, mi) ); + } + + dest->elem(l, mo) = rgb; + } + } + }*/ + + + NVMATH_API void multiply( const ShMatrix &A, const ShMatrix &B ); + NVMATH_API void rotation( const Matrix & m ); + NVMATH_API void rotation( int axis, float angles ); + NVMATH_API void print(); + + + private: + + // @@ These could be static indices precomputed only once. + /// Setup the band pointers. + void setupBands() + { + int size = 0; + for( int i = 0; i < bandNum(); i++ ) { + m_band[i] = &m_e[size]; + size += square(i * 2 + 1); + } + } + + + private: + + // Matrix order. + const int m_order; + + // Identity flag for quick transform. + bool m_identity; + + // Array of elements. + float * m_e; + + // Band pointers. + float ** m_band; + + }; + + +} // nv namespace + +#endif // NV_MATH_SPHERICALHARMONIC_H diff --git a/src/nvmath/nvmath.h b/src/nvmath/nvmath.h index b9a1bad..f486743 100644 --- a/src/nvmath/nvmath.h +++ b/src/nvmath/nvmath.h @@ -6,7 +6,7 @@ #include "nvcore/nvcore.h" #include "nvcore/Debug.h" // nvDebugCheck -#include "nvcore/Utils.h" // clamp +#include "nvcore/Utils.h" // max, clamp #include @@ -109,7 +109,7 @@ namespace nv inline bool equal(const float f0, const float f1, const float epsilon = NV_EPSILON) { //return fabs(f0-f1) <= epsilon; - return fabs(f0-f1) <= epsilon * max(1.0f, fabs(f0), fabs(f1)); + return fabs(f0-f1) <= epsilon * max(1.0f, fabsf(f0), fabsf(f1)); } inline bool isZero(const float f, const float epsilon = NV_EPSILON) diff --git a/src/nvthread/ParallelFor.cpp b/src/nvthread/ParallelFor.cpp index fe15416..cf30504 100644 --- a/src/nvthread/ParallelFor.cpp +++ b/src/nvthread/ParallelFor.cpp @@ -7,7 +7,7 @@ using namespace nv; -#define ENABLE_PARALLEL_FOR 1 +#define ENABLE_PARALLEL_FOR 0 void worker(void * arg) { diff --git a/src/nvtt/CubeSurface.cpp b/src/nvtt/CubeSurface.cpp index 99c3c7e..b194574 100644 --- a/src/nvtt/CubeSurface.cpp +++ b/src/nvtt/CubeSurface.cpp @@ -37,6 +37,199 @@ using namespace nvtt; +// Solid angle of an axis aligned quad from (0,0,1) to (x,y,1) +// See: http://www.fizzmoll11.com/thesis/ for a derivation of this formula. +static float areaElement(float x, float y) { + return atan2(x*y, sqrtf(x*x + y*y + 1)); +} + +// Solid angle of a hemicube texel. +static float solidAngleTerm(uint x, uint y, float inverseEdgeLength) { + // Transform x,y to [-1, 1] range, offset by 0.5 to point to texel center. + float u = (float(x) + 0.5f) * (2 * inverseEdgeLength) - 1.0f; + float v = (float(y) + 0.5f) * (2 * inverseEdgeLength) - 1.0f; + nvDebugCheck(u >= -1.0f && u <= 1.0f); + nvDebugCheck(v >= -1.0f && v <= 1.0f); + +#if 1 + // Exact solid angle: + float x0 = u - inverseEdgeLength; + float y0 = v - inverseEdgeLength; + float x1 = u + inverseEdgeLength; + float y1 = v + inverseEdgeLength; + float solidAngle = areaElement(x0, y0) - areaElement(x0, y1) - areaElement(x1, y0) + areaElement(x1, y1); + nvDebugCheck(solidAngle > 0.0f); + + return solidAngle; +#else + // This formula is equivalent, but not as precise. + float pixel_area = nv::square(2.0f * inverseEdgeLength); + float dist_square = 1.0f + nv::square(u) + nv::square(v); + float cos_theta = 1.0f / sqrt(dist_square); + float cos_theta_d2 = cos_theta / dist_square; // Funny this is just 1/dist^3 or cos(tetha)^3 + + return pixel_area * cos_theta_d2; +#endif +} + + +static Vector3 texelDirection(uint face, uint x, uint y, int edgeLength, bool seamless) +{ + float u, v; + if (seamless) { + // Transform x,y to [-1, 1] range, match up edges exactly. + u = float(x) * 2 / (edgeLength - 1) - 1.0f; + v = float(y) * 2 / (edgeLength - 1) - 1.0f; + } + else { + // Transform x,y to [-1, 1] range, offset by 0.5 to point to texel center. + u = (float(x) + 0.5f) * (2 / edgeLength) - 1.0f; + v = (float(y) + 0.5f) * (2 / edgeLength) - 1.0f; + } + nvDebugCheck(u >= -1.0f && u <= 1.0f); + nvDebugCheck(v >= -1.0f && v <= 1.0f); + + Vector3 n; + + if (face == 0) { + n.x = 1; + n.y = -v; + n.z = -u; + } + if (face == 1) { + n.x = -1; + n.y = -v; + n.z = u; + } + + if (face == 2) { + n.x = u; + n.y = 1; + n.z = v; + } + if (face == 3) { + n.x = u; + n.y = -1; + n.z = -v; + } + + if (face == 4) { + n.x = u; + n.y = -v; + n.z = 1; + } + if (face == 5) { + n.x = -u; + n.y = -v; + n.z = -1; + } + + return normalizeFast(n); +} + + +TexelTable::TexelTable(uint edgeLength, bool seamless) : size(edgeLength) { + + uint hsize = size/2; + + // Allocate a small solid angle table that takes into account cube map symmetry. + solidAngleArray.resize(hsize * hsize); + + for (uint y = 0; y < hsize; y++) { + for (uint x = 0; x < hsize; x++) { + solidAngleArray[y * hsize + x] = solidAngleTerm(hsize+x, hsize+y, edgeLength); + } + } + + + directionArray.resize(size*size*6); + + for (uint f = 0; f < 6; f++) { + for (uint y = 0; y < size; y++) { + for (uint x = 0; x < size; x++) { + directionArray[(f * size + y) * size + x] = texelDirection(f, x, y, edgeLength, seamless); + } + } + } + + +} + +const Vector3 & TexelTable::direction(uint f, uint x, uint y) const { + nvDebugCheck(f < 6 && x < size && y < size); + return directionArray[(f * size + y) * size + x]; +} + +float TexelTable::solidAngle(uint f, uint x, uint y) const { + uint hsize = size/2; + if (x >= hsize) x -= hsize; + else if (x < hsize) x = hsize - x - 1; + if (y >= hsize) y -= hsize; + else if (y < hsize) y = hsize - y - 1; + + return solidAngleArray[y * hsize + x]; +} + + +static const Vector3 faceNormals[6] = { + Vector3(1, 0, 0), + Vector3(-1, 0, 0), + Vector3(0, 1, 0), + Vector3(0, -1, 0), + Vector3(0, 0, 1), + Vector3(0, 0, -1), +}; + +static const Vector3 faceU[6] = { + Vector3(0, 0, -1), + Vector3(0, 0, 1), + Vector3(1, 0, 0), + Vector3(1, 0, 0), + Vector3(1, 0, 0), + Vector3(-1, 0, 0), +}; + +static const Vector3 faceV[6] = { + Vector3(0, -1, 0), + Vector3(0, -1, 0), + Vector3(0, 0, 1), + Vector3(0, 0, -1), + Vector3(0, -1, 0), + Vector3(0, -1, 0), +}; + + +static Vector2 toPolar(Vector3::Arg v) { + Vector2 p; + p.x = atan2(v.x, v.y); // theta + p.y = acosf(v.z); // phi + return p; +} + +static Vector2 toPlane(float theta, float phi) { + float x = sin(phi) * cos(theta); + float y = sin(phi) * sin(theta); + float z = cos(phi); + + Vector2 p; + p.x = x / fabs(z); + p.y = y / fabs(z); + //p.x = tan(phi) * cos(theta); + //p.y = tan(phi) * sin(theta); + + return p; +} + +static Vector2 toPlane(Vector3::Arg v) { + Vector2 p; + p.x = v.x / fabs(v.z); + p.y = v.y / fabs(v.z); + return p; +} + + + + CubeSurface::CubeSurface() : m(new CubeSurface::Private()) { @@ -183,169 +376,50 @@ Surface CubeSurface::unfold(CubeLayout layout) const } -float CubeSurface::average(int channel) const -{ - const uint edgeLength = m->edgeLength; - - // These tables along with the surface so that we only compute them once. - if (m->solidAngleTable == NULL) { - m->solidAngleTable = new SolidAngleTable(edgeLength); - } - - float total = 0.0f; - float sum = 0.0f; - - for (int f = 0; f < 6; f++) { - float * c = m->face[f].m->image->channel(channel); - - for (uint y = 0; y < edgeLength; y++) { - for (uint x = 0; x < edgeLength; x++) { - float solidAngle = m->solidAngleTable->lookup(x, y); - - total += solidAngle; - sum += c[y * edgeLength + x] * solidAngle; - } - } - } +#include "nvmath/SphericalHarmonic.h" - return sum / total; -} - - -CubeSurface CubeSurface::irradianceFilter(int size) const +CubeSurface CubeSurface::irradianceFilter(int size, bool seamless) const { - // @@ TODO - return CubeSurface(); -} - - - -// Solid angle of an axis aligned quad from (0,0,1) to (x,y,1) -// See: http://www.fizzmoll11.com/thesis/ for a derivation of this formula. -static float areaElement(float x, float y) { - return atan2(x*y, sqrtf(x*x + y*y + 1)); -} - -// Solid angle of a hemicube texel. -static float solidAngleTerm(uint x, uint y, float inverseEdgeLength) { - // Transform x,y to [-1, 1] range, offset by 0.5 to point to texel center. - float u = (float(x) + 0.5f) * (2 * inverseEdgeLength) - 1.0f; - float v = (float(y) + 0.5f) * (2 * inverseEdgeLength) - 1.0f; - nvDebugCheck(u >= -1.0f && u <= 1.0f); - nvDebugCheck(v >= -1.0f && v <= 1.0f); - -#if 1 - // Exact solid angle: - float x0 = u - inverseEdgeLength; - float y0 = v - inverseEdgeLength; - float x1 = u + inverseEdgeLength; - float y1 = v + inverseEdgeLength; - float solidAngle = areaElement(x0, y0) - areaElement(x0, y1) - areaElement(x1, y0) + areaElement(x1, y1); - nvDebugCheck(solidAngle > 0.0f); - - return solidAngle; -#else - // This formula is equivalent, but not as precise. - float pixel_area = nv::square(2.0f * inverseEdgeLength); - float dist_square = 1.0f + nv::square(u) + nv::square(v); - float cos_theta = 1.0f / sqrt(dist_square); - float cos_theta_d2 = cos_theta / dist_square; // Funny this is just 1/dist^3 or cos(tetha)^3 + m->allocateTexelTable(); - return pixel_area * cos_theta_d2; -#endif -} + // Transform this cube to spherical harmonic basis + Sh2 sh; + // For each texel of the input cube. + const uint edgeLength = m->edgeLength; + for (uint f = 0; f < 6; f++) { + for (int y = 0; y < edgeLength; y++) { + for (int x = 0; x < edgeLength; x++) { -// Small solid angle table that takes into account cube map symmetry. -SolidAngleTable::SolidAngleTable(uint edgeLength) : size(edgeLength/2) { - // Allocate table. - data.resize(size * size); + Vector3 dir = m->texelTable->direction(f, x, y); + float solidAngle = m->texelTable->solidAngle(f, x, y); - // Init table. - const float inverseEdgeLength = 1.0f / edgeLength; + Sh2 shDir; + shDir.eval(dir); - for (uint y = 0; y < size; y++) { - for (uint x = 0; x < size; x++) { - data[y * size + x] = solidAngleTerm(size+x, size+y, inverseEdgeLength); + sh.addScaled(sh, solidAngle); + } } } -} -float SolidAngleTable::lookup(uint x, uint y) const { - if (x >= size) x -= size; - else if (x < size) x = size - x - 1; - if (y >= size) y -= size; - else if (y < size) y = size - y - 1; - return data[y * size + x]; -} + // Evaluate spherical harmonic for each output texel. + CubeSurface output; + output.m->allocate(size); -static Vector3 texelDirection(uint face, uint x, uint y, float inverseEdgeLength) -{ - // Transform x,y to [-1, 1] range, offset by 0.5 to point to texel center. - float u = (float(x) + 0.5f) * (2 * inverseEdgeLength) - 1.0f; - float v = (float(y) + 0.5f) * (2 * inverseEdgeLength) - 1.0f; - nvDebugCheck(u >= -1.0f && u <= 1.0f); - nvDebugCheck(v >= -1.0f && v <= 1.0f); - Vector3 n; - if (face == 0) { - n.x = 1; - n.y = -v; - n.z = -u; - } - if (face == 1) { - n.x = -1; - n.y = -v; - n.z = u; - } - - if (face == 2) { - n.x = u; - n.y = 1; - n.z = v; - } - if (face == 3) { - n.x = u; - n.y = -1; - n.z = -v; - } - - if (face == 4) { - n.x = u; - n.y = -v; - n.z = 1; - } - if (face == 5) { - n.x = -u; - n.y = -v; - n.z = -1; - } - - return normalizeFast(n); + // @@ TODO + return CubeSurface(); } -VectorTable::VectorTable(uint edgeLength) : size(edgeLength) { - float invEdgeLength = 1.0f / edgeLength; - - data.resize(size*size*6); +// Warp uv coordinate from [-1, 1] to +float warp(float u, int size) { - for (uint f = 0; f < 6; f++) { - for (uint y = 0; y < size; y++) { - for (uint x = 0; x < size; x++) { - data[(f * size + y) * size + x] = texelDirection(f, x, y, invEdgeLength); - } - } - } } -const Vector3 & VectorTable::lookup(uint f, uint x, uint y) const { - nvDebugCheck(f < 6 && x < size && y < size); - return data[(f * size + y) * size + x]; -} @@ -359,68 +433,9 @@ const Vector3 & VectorTable::lookup(uint f, uint x, uint y) const { // - // Other speedups: -// - parallelize. +// - parallelize. Done. // - use ISPC? -static const Vector3 faceNormals[6] = { - Vector3(1, 0, 0), - Vector3(-1, 0, 0), - Vector3(0, 1, 0), - Vector3(0, -1, 0), - Vector3(0, 0, 1), - Vector3(0, 0, -1), -}; - -static const Vector3 faceU[6] = { - Vector3(0, 0, -1), - Vector3(0, 0, 1), - Vector3(1, 0, 0), - Vector3(1, 0, 0), - Vector3(1, 0, 0), - Vector3(-1, 0, 0), -}; - -static const Vector3 faceV[6] = { - Vector3(0, -1, 0), - Vector3(0, -1, 0), - Vector3(0, 0, 1), - Vector3(0, 0, -1), - Vector3(0, -1, 0), - Vector3(0, -1, 0), -}; - - -static Vector2 toPolar(Vector3::Arg v) { - Vector2 p; - p.x = atan2(v.x, v.y); // theta - p.y = acosf(v.z); // phi - return p; -} - -static Vector2 toPlane(float theta, float phi) { - float x = sin(phi) * cos(theta); - float y = sin(phi) * sin(theta); - float z = cos(phi); - - Vector2 p; - p.x = x / fabs(z); - p.y = y / fabs(z); - //p.x = tan(phi) * cos(theta); - //p.y = tan(phi) * sin(theta); - - return p; -} - -static Vector2 toPlane(Vector3::Arg v) { - Vector2 p; - p.x = v.x / fabs(v.z); - p.y = v.y / fabs(v.z); - return p; -} - - - - // Convolve filter against this cube. Vector3 CubeSurface::Private::applyCosinePowerFilter(const Vector3 & filterDir, float coneAngle, float cosinePower) { @@ -503,7 +518,7 @@ Vector3 CubeSurface::Private::applyCosinePowerFilter(const Vector3 & filterDir, // Focal point in polar coordinates: Vector2 Fp = toPolar(F); nvCheck(Fp.y >= 0.0f); // top - //nvCheck(Fp.y <= PI/2); // horizon @@ We should cull this earlier. + nvCheck(Fp.y <= PI/2); // horizon // If this is an ellipse: if (Fp.y + coneAngle < PI/2) { @@ -589,11 +604,11 @@ Vector3 CubeSurface::Private::applyCosinePowerFilter(const Vector3 & filterDir, bool inside = false; for (int x = x0; x <= x1; x++) { - Vector3 dir = vectorTable->lookup(f, x, y); + Vector3 dir = texelTable->direction(f, x, y); float cosineAngle = dot(dir, filterDir); if (cosineAngle > cosineConeAngle) { - float solidAngle = solidAngleTable->lookup(x, y); + float solidAngle = texelTable->solidAngle(f, x, y); float scale = powf(saturate(cosineAngle), cosinePower); float contribution = solidAngle * scale; @@ -641,7 +656,7 @@ void ApplyCosinePowerFilterTask(void * context, int id) nvtt::Surface & filteredFace = ctx->filteredCube->face[f]; FloatImage * filteredImage = filteredFace.m->image; - const Vector3 filterDir = texelDirection(f, x, y, 1.0f / size); + const Vector3 filterDir = texelDirection(f, x, y, size, ctx->filteredCube->seamless); // Convolve filter against cube. Vector3 color = ctx->inputCube->applyCosinePowerFilter(filterDir, ctx->coneAngle, ctx->cosinePower); @@ -652,33 +667,22 @@ void ApplyCosinePowerFilterTask(void * context, int id) } -CubeSurface CubeSurface::cosinePowerFilter(int size, float cosinePower) const +CubeSurface CubeSurface::cosinePowerFilter(int size, float cosinePower, bool seamless) const { const uint edgeLength = m->edgeLength; // Allocate output cube. CubeSurface filteredCube; filteredCube.m->allocate(size); + filteredCube.m->seamless = seamless; - // These tables along with the surface so that we only compute them once. - if (m->solidAngleTable == NULL) { - m->solidAngleTable = new SolidAngleTable(edgeLength); - } - if (m->vectorTable == NULL) { - m->vectorTable = new VectorTable(edgeLength); - } + // Texel table is stored along with the surface so that it's compute only once. + m->allocateTexelTable(); const float threshold = 0.001f; const float coneAngle = acosf(powf(threshold, 1.0f/cosinePower)); -#if 1 - // Gather approach. This should be easier to parallelize, because there's no contention in the filtered output. - - // For each texel of the output cube. - // - Determine what texels of the input cube contribute to it. - // - Add weighted contributions. Normalize. - // For each texel of the output cube. /*for (uint f = 0; f < 6; f++) { nvtt::Surface filteredFace = filteredCube.m->face[f]; @@ -687,10 +691,10 @@ CubeSurface CubeSurface::cosinePowerFilter(int size, float cosinePower) const for (uint y = 0; y < uint(size); y++) { for (uint x = 0; x < uint(size); x++) { - const Vector3 filterDir = texelDirection(f, x, y, 1.0f / size); + const Vector3 filterDir = texelDirection(f, x, y, size, seamless); // Convolve filter against cube. - Vector3 color = m->applyCosinePowerFilter(filterDir, coneAngle, cosinePower); + Vector3 color = m->applyCosinePowerFilter(filterDir, coneAngle, cosinePower, seamless); filteredImage->pixel(0, x, y, 0) = color.x; filteredImage->pixel(1, x, y, 0) = color.y; @@ -708,68 +712,6 @@ CubeSurface CubeSurface::cosinePowerFilter(int size, float cosinePower) const nv::ParallelFor parallelFor(ApplyCosinePowerFilterTask, &context); parallelFor.run(6 * size * size); -#else - // Scatter approach. - - // For each texel of the input cube. - // - Lookup our solid angle. - // - Determine to what texels of the output cube we contribute. - // - Add our contribution to the texels whose power is above threshold. - - for (uint f = 0; f < 6; f++) { - const Surface & face = m->face[f]; - - for (uint y = 0; y < edgeLength; y++) { - for (uint x = 0; x < edgeLength; x++) { - float solidAngle = solidAngleTable.lookup(x, y); - float r = face.m->image->pixel(0, x, y, 0) * solidAngle;; - float g = face.m->image->pixel(1, x, y, 0) * solidAngle;; - float b = face.m->image->pixel(2, x, y, 0) * solidAngle;; - - Vector3 texelDir = texelDirection(f, x, y, 1.0f / edgeLength); - - for (uint ff = 0; ff < 6; ff++) { - FloatImage * filteredFace = filteredCube.m->face[ff].m->image; - - for (uint yy = 0; yy < uint(size); yy++) { - for (uint xx = 0; xx < uint(size); xx++) { - - Vector3 filterDir = texelDirection(ff, xx, yy, 1.0f / size); - - float scale = powf(saturate(dot(texelDir, filterDir)), cosinePower); - - if (scale > threshold) { - filteredFace->pixel(0, xx, yy, 0) += r * scale; - filteredFace->pixel(1, xx, yy, 0) += g * scale; - filteredFace->pixel(2, xx, yy, 0) += b * scale; - filteredFace->pixel(3, xx, yy, 0) += solidAngle * scale; - } - } - } - } - } - } - } - - // Normalize contributions. - for (uint f = 0; f < 6; f++) { - FloatImage * filteredFace = filteredCube.m->face[f].m->image; - - for (int i = 0; i < size*size; i++) { - float & r = filteredFace->pixel(0, i); - float & g = filteredFace->pixel(1, i); - float & b = filteredFace->pixel(2, i); - float & sum = filteredFace->pixel(3, i); - float isum = 1.0f / sum; - r *= isum; - g *= isum; - b *= isum; - sum = 1; - } - } - -#endif - return filteredCube; } diff --git a/src/nvtt/CubeSurface.h b/src/nvtt/CubeSurface.h index 19a42e0..8427b64 100644 --- a/src/nvtt/CubeSurface.h +++ b/src/nvtt/CubeSurface.h @@ -38,21 +38,15 @@ namespace nvtt { - struct SolidAngleTable { - SolidAngleTable(uint edgeLength); - float lookup(uint x, uint y) const; + struct TexelTable { + TexelTable(uint edgeLength, bool seamless); - uint size; - nv::Array data; - - }; - - struct VectorTable { - VectorTable(uint edgeLength); - const nv::Vector3 & lookup(uint f, uint x, uint y) const; + float solidAngle(uint f, uint x, uint y) const; + const nv::Vector3 & direction(uint f, uint x, uint y) const; uint size; - nv::Array data; + nv::Array solidAngleArray; + nv::Array directionArray; }; @@ -65,24 +59,23 @@ namespace nvtt nvDebugCheck( refCount() == 0 ); edgeLength = 0; - solidAngleTable = NULL; - vectorTable = NULL; + seamless = false; + texelTable = NULL; } Private(const Private & p) : RefCounted() // Copy ctor. inits refcount to 0. { nvDebugCheck( refCount() == 0 ); edgeLength = p.edgeLength; + seamless = p.seamless; for (uint i = 0; i < 6; i++) { face[i] = p.face[i]; } - solidAngleTable = NULL; // @@ Transfer tables. Needs refcounting? - vectorTable = NULL; + texelTable = NULL; // @@ Transfer tables. Needs refcounting? } ~Private() { - delete solidAngleTable; - delete vectorTable; + delete texelTable; } void allocate(uint edgeLength) @@ -95,13 +88,20 @@ namespace nvtt } } + void allocateTexelTable() + { + if (texelTable == NULL) { + texelTable = new TexelTable(edgeLength, seamless); + } + } + // Filtering helpers: nv::Vector3 applyCosinePowerFilter(const nv::Vector3 & dir, float coneAngle, float cosinePower); uint edgeLength; + bool seamless; Surface face[6]; - SolidAngleTable * solidAngleTable; - VectorTable * vectorTable; + TexelTable * texelTable; }; } // nvtt namespace diff --git a/src/nvtt/nvtt.h b/src/nvtt/nvtt.h index 4f2b068..c8c901b 100644 --- a/src/nvtt/nvtt.h +++ b/src/nvtt/nvtt.h @@ -548,6 +548,7 @@ namespace nvtt NVTT_API bool isNull() const; NVTT_API int edgeLength() const; NVTT_API int countMipmaps() const; + NVTT_API bool isSeamless() const; // Texture data. NVTT_API bool load(const char * fileName, int mipmap); @@ -569,8 +570,8 @@ namespace nvtt NVTT_API float average(int channel) const; // Filtering. - NVTT_API CubeSurface irradianceFilter(int size) const; - NVTT_API CubeSurface cosinePowerFilter(int size, float cosinePower) const; + NVTT_API CubeSurface irradianceFilter(int size, bool seamless) const; + NVTT_API CubeSurface cosinePowerFilter(int size, float cosinePower, bool seamless) const; /* diff --git a/src/nvtt/tests/cubemaptest.cpp b/src/nvtt/tests/cubemaptest.cpp index e1c87ed..1db2579 100644 --- a/src/nvtt/tests/cubemaptest.cpp +++ b/src/nvtt/tests/cubemaptest.cpp @@ -86,7 +86,7 @@ int main(int argc, char *argv[]) printf("filtering step: %d/%d\n", m+1, mipmapCount); - filteredEnvmap[m] = envmap.cosinePowerFilter(size, cosine_power); + filteredEnvmap[m] = envmap.cosinePowerFilter(size, cosine_power, false); filteredEnvmap[m].toGamma(2.2f); }