- weighted cuda compressor.

- faster and better BC3n compressor
- add normal map flag to DDS files that store normal maps.
- stop using RXGB fourcc code.
- move tools to tools/
- add temptative config dialog for UI based tool. 
- add experimental normal map mipmap generation.
- start adding support for input dds files in nvcompress.
- many other small fixes and cleanup.
This commit is contained in:
castano
2007-05-17 00:11:38 +00:00
parent d729ea51f6
commit babb7e8df7
53 changed files with 9935 additions and 6137 deletions

View File

@ -7,7 +7,10 @@ SET(MATH_SRCS
Box.h
Color.h
Eigen.h Eigen.cpp
Fitting.h Fitting.cpp)
Fitting.h Fitting.cpp
Montecarlo.h Montecarlo.cpp
Random.h Random.cpp
SphericalHarmonic.h SphericalHarmonic.cpp)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})

View File

@ -33,11 +33,12 @@ public:
Vector4 row(uint i) const;
Vector4 column(uint i) const;
void scale(scalar s);
void scale(double s);
void scale(Vector3::Arg s);
void translate(Vector3::Arg t);
void rotate(scalar theta, scalar v0, scalar v1, scalar v2);
Matrix inverse();
double determinant();
void apply(Matrix::Arg m);
@ -109,11 +110,12 @@ inline Vector4 Matrix::column(uint i) const
}
/// Apply scale.
inline void Matrix::scale(scalar s)
inline void Matrix::scale(double s)
{
m_data[0] *= s; m_data[1] *= s; m_data[2] *= s; m_data[3] *= s;
m_data[4] *= s; m_data[5] *= s; m_data[6] *= s; m_data[7] *= s;
m_data[8] *= s; m_data[9] *= s; m_data[10] *= s; m_data[11] *= s;
m_data[12] *= s; m_data[13] *= s; m_data[14] *= s; m_data[15] *= s;
}
/// Apply scale.
@ -303,24 +305,42 @@ inline Matrix perspective(scalar fovy, scalar aspect, scalar zNear)
}
/// Get matrix determinant.
inline scalar determinant(Matrix::Arg m)
inline double Matrix::determinant()
{
// @@ not sure this is correct.
return m(0,0) * m(1,1) * m(2,2) * m(3,3) -
m(1,0) * m(2,1) * m(3,2) * m(0,3) +
m(2,0) * m(3,1) * m(0,2) * m(1,3) -
m(3,0) * m(0,1) * m(1,2) * m(2,3) -
m(3,0) * m(2,1) * m(1,2) * m(0,3) +
m(2,0) * m(1,1) * m(0,2) * m(3,3) -
m(1,0) * m(0,1) * m(3,2) * m(2,3) +
m(0,0) * m(3,1) * m(2,2) * m(1,3);
return m_data[3] * m_data[6] * m_data[9] * m_data[12]-m_data[2] * m_data[7] * m_data[9] * m_data[12]-m_data[3] * m_data[5] * m_data[10] * m_data[12]+m_data[1] * m_data[7] * m_data[10] * m_data[12]+
m_data[2] * m_data[5] * m_data[11] * m_data[12]-m_data[1] * m_data[6] * m_data[11] * m_data[12]-m_data[3] * m_data[6] * m_data[8] * m_data[13]+m_data[2] * m_data[7] * m_data[8] * m_data[13]+
m_data[3] * m_data[4] * m_data[10] * m_data[13]-m_data[0] * m_data[7] * m_data[10] * m_data[13]-m_data[2] * m_data[4] * m_data[11] * m_data[13]+m_data[0] * m_data[6] * m_data[11] * m_data[13]+
m_data[3] * m_data[5] * m_data[8] * m_data[14]-m_data[1] * m_data[7] * m_data[8] * m_data[14]-m_data[3] * m_data[4] * m_data[9] * m_data[14]+m_data[0] * m_data[7] * m_data[9] * m_data[14]+
m_data[1] * m_data[4] * m_data[11] * m_data[14]-m_data[0] * m_data[5] * m_data[11] * m_data[14]-m_data[2] * m_data[5] * m_data[8] * m_data[15]+m_data[1] * m_data[6] * m_data[8] * m_data[15]+
m_data[2] * m_data[4] * m_data[9] * m_data[15]-m_data[0] * m_data[6] * m_data[9] * m_data[15]-m_data[1] * m_data[4] * m_data[10] * m_data[15]+m_data[0] * m_data[5] * m_data[10] * m_data[15];
}
//inline Matrix transpose(Matrix::Arg m)
//{
//}
inline Matrix Matrix::inverse()
{
Matrix r;
r.m_data[ 0] = m_data[6]*m_data[11]*m_data[13] - m_data[7]*m_data[10]*m_data[13] + m_data[7]*m_data[9]*m_data[14] - m_data[5]*m_data[11]*m_data[14] - m_data[6]*m_data[9]*m_data[15] + m_data[5]*m_data[10]*m_data[15];
r.m_data[ 1] = m_data[3]*m_data[10]*m_data[13] - m_data[2]*m_data[11]*m_data[13] - m_data[3]*m_data[9]*m_data[14] + m_data[1]*m_data[11]*m_data[14] + m_data[2]*m_data[9]*m_data[15] - m_data[1]*m_data[10]*m_data[15];
r.m_data[ 2] = m_data[2]*m_data[7]*m_data[13] - m_data[3]*m_data[6]*m_data[13] + m_data[3]*m_data[5]*m_data[14] - m_data[1]*m_data[7]*m_data[14] - m_data[2]*m_data[5]*m_data[15] + m_data[1]*m_data[6]*m_data[15];
r.m_data[ 3] = m_data[3]*m_data[6]*m_data[9] - m_data[2]*m_data[7]*m_data[9] - m_data[3]*m_data[5]*m_data[10] + m_data[1]*m_data[7]*m_data[10] + m_data[2]*m_data[5]*m_data[11] - m_data[1]*m_data[6]*m_data[11];
r.m_data[ 4] = m_data[7]*m_data[10]*m_data[12] - m_data[6]*m_data[11]*m_data[12] - m_data[7]*m_data[8]*m_data[14] + m_data[4]*m_data[11]*m_data[14] + m_data[6]*m_data[8]*m_data[15] - m_data[4]*m_data[10]*m_data[15];
r.m_data[ 5] = m_data[2]*m_data[11]*m_data[12] - m_data[3]*m_data[10]*m_data[12] + m_data[3]*m_data[8]*m_data[14] - m_data[0]*m_data[11]*m_data[14] - m_data[2]*m_data[8]*m_data[15] + m_data[0]*m_data[10]*m_data[15];
r.m_data[ 6] = m_data[3]*m_data[6]*m_data[12] - m_data[2]*m_data[7]*m_data[12] - m_data[3]*m_data[4]*m_data[14] + m_data[0]*m_data[7]*m_data[14] + m_data[2]*m_data[4]*m_data[15] - m_data[0]*m_data[6]*m_data[15];
r.m_data[ 7] = m_data[2]*m_data[7]*m_data[8] - m_data[3]*m_data[6]*m_data[8] + m_data[3]*m_data[4]*m_data[10] - m_data[0]*m_data[7]*m_data[10] - m_data[2]*m_data[4]*m_data[11] + m_data[0]*m_data[6]*m_data[11];
r.m_data[ 8] = m_data[5]*m_data[11]*m_data[12] - m_data[7]*m_data[9]*m_data[12] + m_data[7]*m_data[8]*m_data[13] - m_data[4]*m_data[11]*m_data[13] - m_data[5]*m_data[8]*m_data[15] + m_data[4]*m_data[9]*m_data[15];
r.m_data[ 9] = m_data[3]*m_data[9]*m_data[12] - m_data[1]*m_data[11]*m_data[12] - m_data[3]*m_data[8]*m_data[13] + m_data[0]*m_data[11]*m_data[13] + m_data[1]*m_data[8]*m_data[15] - m_data[0]*m_data[9]*m_data[15];
r.m_data[10] = m_data[1]*m_data[7]*m_data[12] - m_data[3]*m_data[5]*m_data[12] + m_data[3]*m_data[4]*m_data[13] - m_data[0]*m_data[7]*m_data[13] - m_data[1]*m_data[4]*m_data[15] + m_data[0]*m_data[5]*m_data[15];
r.m_data[11] = m_data[3]*m_data[5]*m_data[8] - m_data[1]*m_data[7]*m_data[8] - m_data[3]*m_data[4]*m_data[9] + m_data[0]*m_data[7]*m_data[9] + m_data[1]*m_data[4]*m_data[11] - m_data[0]*m_data[5]*m_data[11];
r.m_data[12] = m_data[6]*m_data[9]*m_data[12] - m_data[5]*m_data[10]*m_data[12] - m_data[6]*m_data[8]*m_data[13] + m_data[4]*m_data[10]*m_data[13] + m_data[5]*m_data[8]*m_data[14] - m_data[4]*m_data[9]*m_data[14];
r.m_data[13] = m_data[1]*m_data[10]*m_data[12] - m_data[2]*m_data[9]*m_data[12] + m_data[2]*m_data[8]*m_data[13] - m_data[0]*m_data[10]*m_data[13] - m_data[1]*m_data[8]*m_data[14] + m_data[0]*m_data[9]*m_data[14];
r.m_data[14] = m_data[2]*m_data[5]*m_data[12] - m_data[1]*m_data[6]*m_data[12] - m_data[2]*m_data[4]*m_data[13] + m_data[0]*m_data[6]*m_data[13] + m_data[1]*m_data[4]*m_data[14] - m_data[0]*m_data[5]*m_data[14];
r.m_data[15] = m_data[1]*m_data[6]*m_data[8] - m_data[2]*m_data[5]*m_data[8] + m_data[2]*m_data[4]*m_data[9] - m_data[0]*m_data[6]*m_data[9] - m_data[1]*m_data[4]*m_data[10] + m_data[0]*m_data[5]*m_data[10];
r.scale(1./determinant());
return r;
}
//Matrix inverse(Matrix::Arg m);
//Matrix isometryInverse(Matrix::Arg m);
//Matrix affineInverse(Matrix::Arg m);

156
src/nvmath/Montecarlo.cpp Normal file
View File

@ -0,0 +1,156 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvmath/Montecarlo.h>
using namespace nv;
void SampleDistribution::redistribute(Method method/*=Method_NRook*/, Distribution dist/*=Distribution_Cosine*/)
{
switch(method)
{
case Method_Random:
redistributeRandom(dist);
break;
case Method_Stratified:
redistributeStratified(dist);
break;
case Method_NRook:
redistributeNRook(dist);
break;
};
}
void SampleDistribution::redistributeRandom(const Distribution dist)
{
const uint sampleCount = m_sampleArray.count();
// This is the worst method possible!
for(uint i = 0; i < sampleCount; i++)
{
float x = m_rand.getReal();
float y = m_rand.getReal();
// Map uniform distribution in the square to the (hemi)sphere.
if( dist == Distribution_Uniform ) {
m_sampleArray[i].setUV(acosf(1 - 2 * x), 2 * PI * y);
}
else {
nvDebugCheck(dist == Distribution_Cosine);
m_sampleArray[i].setUV(acosf(sqrtf(x)), 2 * PI * y);
}
}
}
void SampleDistribution::redistributeStratified(const Distribution dist)
{
const uint sampleCount = m_sampleArray.count();
const uint sqrtSampleCount = uint(sqrtf(float(sampleCount)));
nvDebugCheck(sqrtSampleCount*sqrtSampleCount == sampleCount); // Must use exact powers!
// Create a uniform distribution of points on the hemisphere with low variance.
for(uint v = 0, i = 0; v < sqrtSampleCount; v++) {
for(uint u = 0; u < sqrtSampleCount; u++, i++) {
float x = (u + m_rand.getReal()) / float(sqrtSampleCount);
float y = (v + m_rand.getReal()) / float(sqrtSampleCount);
// Map uniform distribution in the square to the (hemi)sphere.
if( dist == Distribution_Uniform ) {
m_sampleArray[i].setUV(acosf(1 - 2 * x), 2 * PI * y);
}
else {
nvDebugCheck(dist == Distribution_Cosine);
m_sampleArray[i].setUV(acosf(sqrtf(x)), 2 * PI * y);
}
}
}
}
/** Multi-Stage N-rooks Sampling Method.
* See: http://www.acm.org/jgt/papers/WangSung9/9
*/
void SampleDistribution::multiStageNRooks(const int size, int* cells)
{
if (size == 1) {
return;
}
int size1 = size >> 1;
int size2 = size >> 1;
if (size & 1) {
if (m_rand.getReal() > 0.5) {
size1++;
}
else {
size2++;
}
}
int* upper_cells = new int[size1];
int* lower_cells = new int[size2];
int i, j;
for(i = 0, j = 0; i < size - 1; i += 2, j++) {
if (m_rand.get() & 1) {
upper_cells[j] = cells[i];
lower_cells[j] = cells[i + 1];
}
else {
upper_cells[j] = cells[i + 1];
lower_cells[j] = cells[i];
}
}
if (size1 != size2) {
if (size1 > size2) {
upper_cells[j] = cells[i];
}
else {
lower_cells[j] = cells[i];
}
}
multiStageNRooks(size1, upper_cells);
memcpy(cells, upper_cells, size1 * sizeof(int));
delete [] upper_cells;
multiStageNRooks(size2, lower_cells);
memcpy(cells + size1, lower_cells, size2 * sizeof(int));
delete [] lower_cells;
}
void SampleDistribution::redistributeNRook(const Distribution dist)
{
const uint sampleCount = m_sampleArray.count();
// Generate nrook cells
int * cells = new int[sampleCount];
for(uint32 i = 0; i < sampleCount; i++)
{
cells[i] = i;
}
multiStageNRooks(sampleCount, cells);
for(uint i = 0; i < sampleCount; i++)
{
float x = (i + m_rand.getReal()) / sampleCount;
float y = (cells[i] + m_rand.getReal()) / sampleCount;
// Map uniform distribution in the square to the (hemi)sphere.
if( dist == Distribution_Uniform ) {
m_sampleArray[i].setUV(acosf(1 - 2 * x), 2 * PI * y);
}
else {
nvDebugCheck(dist == Distribution_Cosine);
m_sampleArray[i].setUV(acosf(sqrtf(x)), 2 * PI * y);
}
}
delete [] cells;
}

84
src/nvmath/Montecarlo.h Normal file
View File

@ -0,0 +1,84 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_MONTECARLO_H
#define NV_MATH_MONTECARLO_H
#include <nvmath/Vector.h>
#include <nvmath/Random.h>
namespace nv
{
/// A random sample distribution.
class SampleDistribution
{
public:
// Sampling method.
enum Method {
Method_Random,
Method_Stratified,
Method_NRook
};
// Distribution functions.
enum Distribution {
Distribution_Uniform,
Distribution_Cosine
};
/// Constructor.
SampleDistribution(int num)
{
m_sampleArray.resize(num);
}
void redistribute(Method method=Method_NRook, Distribution dist=Distribution_Cosine);
/// Get parametric coordinates of the sample.
Vector2 sample(int i) { return m_sampleArray[i].uv; }
/// Get sample direction.
Vector3 sampleDir(int i) { return m_sampleArray[i].dir; }
/// Get number of samples.
uint sampleCount() const { return m_sampleArray.count(); }
private:
void redistributeRandom(const Distribution dist);
void redistributeStratified(const Distribution dist);
void multiStageNRooks(const int size, int* cells);
void redistributeNRook(const Distribution dist);
/// A sample of the random distribution.
struct Sample
{
/// Set sample given the 3d coordinates.
void setDir(float x, float y, float z) {
dir.set(x, y, z);
uv.set(acosf(z), atan2f(y, x));
}
/// Set sample given the 2d parametric coordinates.
void setUV(float u, float v) {
uv.set(u, v);
dir.set(sinf(u) * cosf(v), sinf(u) * sinf(v), cosf(u));
}
Vector2 uv;
Vector3 dir;
};
/// Random seed.
MTRand m_rand;
/// Samples.
Array<Sample> m_sampleArray;
};
} // nv namespace
#endif // NV_MATH_MONTECARLO_H

54
src/nvmath/Random.cpp Normal file
View File

@ -0,0 +1,54 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvmath/Random.h>
#include <time.h>
using namespace nv;
// Statics
const uint16 Rand48::a0 = 0xE66D;
const uint16 Rand48::a1 = 0xDEEC;
const uint16 Rand48::a2 = 0x0005;
const uint16 Rand48::c0 = 0x000B;
/// Get a random seed based on the current time.
uint Rand::randomSeed()
{
return time(NULL);
}
void MTRand::initialize( uint32 seed )
{
// Initialize generator state with seed
// See Knuth TAOCP Vol 2, 3rd Ed, p.106 for multiplier.
// In previous versions, most significant bits (MSBs) of the seed affect
// only MSBs of the state array. Modified 9 Jan 2002 by Makoto Matsumoto.
uint32 *s = state;
uint32 *r = state;
int i = 1;
*s++ = seed & 0xffffffffUL;
for( ; i < N; ++i )
{
*s++ = ( 1812433253UL * ( *r ^ (*r >> 30) ) + i ) & 0xffffffffUL;
r++;
}
}
void MTRand::reload()
{
// Generate N new values in state
// Made clearer and faster by Matthew Bellew (matthew.bellew@home.com)
uint32 *p = state;
int i;
for( i = N - M; i--; ++p )
*p = twist( p[M], p[0], p[1] );
for( i = M; --i; ++p )
*p = twist( p[M-N], p[0], p[1] );
*p = twist( p[M-N], p[0], state[0] );
left = N, next = state;
}

353
src/nvmath/Random.h Normal file
View File

@ -0,0 +1,353 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_RANDOM_H
#define NV_MATH_RANDOM_H
#include <nvcore/Containers.h> // nextPowerOfTwo
#include <nvmath/nvmath.h>
namespace nv
{
/// Interface of the random number generators.
class Rand
{
public:
virtual ~Rand() {}
enum time_e { Time };
/// Provide a new seed.
virtual void seed( uint s ) { /* empty */ };
/// Get an integer random number.
virtual uint get() = 0;
/// Get a random number on [0, max] interval.
uint getRange( uint max )
{
uint n;
// uint mask = Bitmask( max );
// do { n = Get() & mask; } while( n > max );
uint np2 = nextPowerOfTwo( max );
do { n = get() & (np2-1); } while( n > max );
return n;
}
/// Random number on [0.0, 1.0] interval.
double getReal()
{
return double(get()) * (1.0/4294967295.0); // 2^32-1
}
/// Random number on [0.0, 1.0) interval.
double getRealExclusive()
{
return double(get()) * (1.0/4294967296.0); // 2^32
}
/// Get the max value of the random number.
uint max() const { return 4294967295U; }
// Get a random seed.
static uint randomSeed();
};
/// Very simple random number generator with low storage requirements.
class SimpleRand : public Rand
{
public:
/// Constructor that uses the current time as the seed.
SimpleRand( time_e )
{
seed(randomSeed());
}
/// Constructor that uses the given seed.
SimpleRand( uint s = 0 )
{
seed(s);
}
/// Set the given seed.
virtual void seed( uint s )
{
current = s;
}
/// Get a random number.
virtual uint get()
{
return current = current * 1103515245 + 12345;
}
private:
uint current;
};
/// Mersenne twister random number generator.
class MTRand : public Rand
{
public:
enum { N = 624 }; // length of state vector
enum { M = 397 };
/// Constructor that uses the current time as the seed.
MTRand( time_e )
{
seed(randomSeed());
}
/// Constructor that uses the given seed.
MTRand( uint s = 0 )
{
seed(s);
}
/// Constructor that uses the given seeds.
NVMATH_API MTRand( const uint * seed_array, uint length );
/// Provide a new seed.
virtual void seed( uint s )
{
initialize(s);
reload();
}
/// Get a random number between 0 - 65536.
virtual uint get()
{
// Pull a 32-bit integer from the generator state
// Every other access function simply transforms the numbers extracted here
if( left == 0 ) {
reload();
}
left--;
uint s1;
s1 = *next++;
s1 ^= (s1 >> 11);
s1 ^= (s1 << 7) & 0x9d2c5680U;
s1 ^= (s1 << 15) & 0xefc60000U;
return ( s1 ^ (s1 >> 18) );
};
private:
NVMATH_API void initialize( uint32 seed );
NVMATH_API void reload();
uint hiBit( uint u ) const { return u & 0x80000000U; }
uint loBit( uint u ) const { return u & 0x00000001U; }
uint loBits( uint u ) const { return u & 0x7fffffffU; }
uint mixBits( uint u, uint v ) const { return hiBit(u) | loBits(v); }
uint twist( uint m, uint s0, uint s1 ) const { return m ^ (mixBits(s0,s1)>>1) ^ ((~loBit(s1)+1) & 0x9908b0dfU); }
private:
uint state[N]; // internal state
uint * next; // next value to get from state
int left; // number of values left before reload needed
};
/** George Marsaglia's random number generator.
* Code based on Thatcher Ulrich public domain source code:
* http://cvs.sourceforge.net/viewcvs.py/tu-testbed/tu-testbed/base/tu_random.cpp?rev=1.7&view=auto
*
* PRNG code adapted from the complimentary-multiply-with-carry
* code in the article: George Marsaglia, "Seeds for Random Number
* Generators", Communications of the ACM, May 2003, Vol 46 No 5,
* pp90-93.
*
* The article says:
*
* "Any one of the choices for seed table size and multiplier will
* provide a RNG that has passed extensive tests of randomness,
* particularly those in [3], yet is simple and fast --
* approximately 30 million random 32-bit integers per second on a
* 850MHz PC. The period is a*b^n, where a is the multiplier, n
* the size of the seed table and b=2^32-1. (a is chosen so that
* b is a primitive root of the prime a*b^n + 1.)"
*
* [3] Marsaglia, G., Zaman, A., and Tsang, W. Toward a universal
* random number generator. _Statistics and Probability Letters
* 8_ (1990), 35-39.
*/
class GMRand : public Rand
{
public:
enum { SEED_COUNT = 8 };
// const uint64 a = 123471786; // for SEED_COUNT=1024
// const uint64 a = 123554632; // for SEED_COUNT=512
// const uint64 a = 8001634; // for SEED_COUNT=255
// const uint64 a = 8007626; // for SEED_COUNT=128
// const uint64 a = 647535442; // for SEED_COUNT=64
// const uint64 a = 547416522; // for SEED_COUNT=32
// const uint64 a = 487198574; // for SEED_COUNT=16
// const uint64 a = 716514398U; // for SEED_COUNT=8
enum { a = 716514398U };
GMRand( time_e )
{
seed(randomSeed());
}
GMRand(uint s = 987654321)
{
seed(s);
}
/// Provide a new seed.
virtual void seed( uint s )
{
c = 362436;
i = SEED_COUNT - 1;
for(int i = 0; i < SEED_COUNT; i++) {
s = s ^ (s << 13);
s = s ^ (s >> 17);
s = s ^ (s << 5);
Q[i] = s;
}
}
/// Get a random number between 0 - 65536.
virtual uint get()
{
const uint32 r = 0xFFFFFFFE;
uint64 t;
uint32 x;
i = (i + 1) & (SEED_COUNT - 1);
t = a * Q[i] + c;
c = uint32(t >> 32);
x = uint32(t + c);
if( x < c ) {
x++;
c++;
}
uint32 val = r - x;
Q[i] = val;
return val;
};
private:
uint32 c;
uint32 i;
uint32 Q[8];
};
/** Random number implementation from the GNU Sci. Lib. (GSL).
* Adapted from Nicholas Chapman version:
*
* Copyright (C) 1996, 1997, 1998, 1999, 2000 James Theiler, Brian Gough
* This is the Unix rand48() generator. The generator returns the
* upper 32 bits from each term of the sequence,
*
* x_{n+1} = (a x_n + c) mod m
*
* using 48-bit unsigned arithmetic, with a = 0x5DEECE66D , c = 0xB
* and m = 2^48. The seed specifies the upper 32 bits of the initial
* value, x_1, with the lower 16 bits set to 0x330E.
*
* The theoretical value of x_{10001} is 244131582646046.
*
* The period of this generator is ? FIXME (probably around 2^48).
*/
class Rand48 : public Rand
{
public:
Rand48( time_e )
{
seed(randomSeed());
}
Rand48( uint s = 0x1234ABCD )
{
seed(s);
}
/** Set the given seed. */
virtual void seed( uint s ) {
vstate.x0 = 0x330E;
vstate.x1 = uint16(s & 0xFFFF);
vstate.x2 = uint16((s >> 16) & 0xFFFF);
}
/** Get a random number. */
virtual uint Get() {
advance();
uint x1 = vstate.x1;
uint x2 = vstate.x2;
return (x2 << 16) + x1;
}
private:
void advance()
{
/* work with unsigned long ints throughout to get correct integer
promotions of any unsigned short ints */
const uint32 x0 = vstate.x0;
const uint32 x1 = vstate.x1;
const uint32 x2 = vstate.x2;
uint32 a;
a = a0 * x0 + c0;
vstate.x0 = uint16(a & 0xFFFF);
a >>= 16;
/* although the next line may overflow we only need the top 16 bits
in the following stage, so it does not matter */
a += a0 * x1 + a1 * x0;
vstate.x1 = uint16(a & 0xFFFF);
a >>= 16;
a += a0 * x2 + a1 * x1 + a2 * x0;
vstate.x2 = uint16(a & 0xFFFF);
}
private:
NVMATH_API static const uint16 a0, a1, a2, c0;
struct rand48_state_t {
uint16 x0, x1, x2;
} vstate;
};
} // nv namespace
#endif // NV_MATH_RANDOM_H

View File

@ -0,0 +1,241 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvmath/SphericalHarmonic.h>
using namespace nv;
namespace
{
// Basic integer factorial.
inline static int factorial( int v )
{
if (v == 0) {
return 1;
}
int result = v;
while (--v > 0) {
result *= v;
}
return result;
}
// Double factorial.
// Defined as: n!! = n*(n - 2)*(n - 4)..., n!!(0,-1) = 1.
inline static int doubleFactorial( int x )
{
if (x == 0 || x == -1) {
return 1;
}
int result = x;
while ((x -= 2) > 0) {
result *= x;
}
return result;
}
/// Normalization constant for spherical harmonic.
/// @param l is the band.
/// @param m is the argument, in the range [0, m]
inline static float K( int l, int m )
{
nvDebugCheck( m >= 0 );
return sqrtf(((2 * l + 1) * factorial(l - m)) / (4 * PI * factorial(l + m)));
}
/// Normalization constant for hemispherical harmonic.
inline static float HK( int l, int m )
{
nvDebugCheck( m >= 0 );
return sqrtf(((2 * l + 1) * factorial(l - m)) / (2 * PI * factorial(l + m)));
}
/// Evaluate Legendre polynomial. */
static float legendre( int l, int m, float x )
{
// piDebugCheck( m >= 0 );
// piDebugCheck( m <= l );
// piDebugCheck( fabs(x) <= 1 );
// Rule 2 needs no previous results
if (l == m) {
return powf(-1.0f, m) * doubleFactorial(2 * m - 1) * powf(1 - x*x, 0.5f * m);
}
// Rule 3 requires the result for the same argument of the previous band
if (l == m + 1) {
return x * (2 * m + 1) * legendrePolynomial(m, m, x);
}
// Main reccurence used by rule 1 that uses result of the same argument from
// the previous two bands
return (x * (2 * l - 1) * legendrePolynomial(l - 1, m, x) - (l + m - 1) * legendrePolynomial(l - 2, m, x)) / (l - m);
}
template <int l, int m> float legendre(float x);
template <> float legendre<0, 0>(float x) {
return 1;
}
template <> float legendre<1, 0>(float x) {
return x;
}
template <> float legendre<1, 1>(float x) {
return -sqrtf(1 - x * x);
}
template <> float legendre<2, 0>(float x) {
return -0.5f + (3 * x * x) / 2;
}
template <> float legendre<2, 1>(float x) {
return -3 * x * sqrtf(1 - x * x);
}
template <> float legendre<2, 2>(float x) {
return -3 * (-1 + x * x);
}
template <> float legendre<3, 0>(float x) {
return -(3 * x) / 2 + (5 * x * x * x) / 2;
}
template <> float legendre<3, 1>(float x) {
return -3 * sqrtf(1 - x * x) / 2 * (-1 + 5 * x * x);
}
template <> float legendre<3, 2>(float x) {
return -15 * (-x + x * x * x);
}
template <> float legendre<3, 3>(float x) {
return -15 * powf(1 - x * x, 1.5f);
}
template <> float legendre<4, 0>(float x) {
return 0.125f * (3.0f - 30.0f * x * x + 35.0f * x * x * x * x);
}
template <> float legendre<4, 1>(float x) {
return -2.5f * x * sqrtf(1.0f - x * x) * (7.0f * x * x - 3.0f);
}
template <> float legendre<4, 2>(float x) {
return -7.5f * (1.0f - 8.0f * x * x + 7.0f * x * x * x * x);
}
template <> float legendre<4, 3>(float x) {
return -105.0f * x * powf(1 - x * x, 1.5f);
}
template <> float legendre<4, 4>(float x) {
return 105.0f * (x * x - 1.0f) * (x * x - 1.0f);
}
} // namespace
float nv::legendrePolynomial(int l, int m, float x)
{
switch(l)
{
case 0:
return legendre<0, 0>(x);
case 1:
if(m == 0) return legendre<1, 0>(x);
return legendre<1, 1>(x);
case 2:
if(m == 0) return legendre<2, 0>(x);
else if(m == 1) return legendre<2, 1>(x);
return legendre<2, 2>(x);
case 3:
if(m == 0) return legendre<3, 0>(x);
else if(m == 1) return legendre<3, 1>(x);
else if(m == 2) return legendre<3, 2>(x);
return legendre<3, 3>(x);
case 4:
if(m == 0) return legendre<4, 0>(x);
else if(m == 1) return legendre<4, 1>(x);
else if(m == 2) return legendre<4, 2>(x);
else if(m == 3) return legendre<4, 3>(x);
else return legendre<4, 4>(x);
}
// Fallback to the expensive version.
return legendre(l, m, x);
}
/**
* Evaluate the spherical harmonic function for the given angles.
* @param l is the band.
* @param m is the argument, in the range [-l,l]
* @param theta is the altitude, in the range [0, PI]
* @param phi is the azimuth, in the range [0, 2*PI]
*/
float nv::y( int l, int m, float theta, float phi )
{
if( m == 0 ) {
// K(l, 0) = sqrt((2*l+1)/(4*PI))
return sqrtf((2 * l + 1) / (4 * PI)) * legendrePolynomial(l, 0, cosf(theta));
}
else if( m > 0 ) {
return sqrtf(2.0f) * K(l, m) * cosf(m * phi) * legendrePolynomial(l, m, cosf(theta));
}
else {
return sqrtf(2.0f) * K(l, -m) * sinf(-m * phi) * legendrePolynomial(l, -m, cosf(theta));
}
}
/**
* Real spherical harmonic function of an unit vector. Uses the following
* equalities to call the angular function:
* x = sin(theta)*cos(phi)
* y = sin(theta)*sin(phi)
* z = cos(theta)
*/
float nv::y( int l, int m, Vector3::Arg v )
{
float theta = acosf(v.z());
float phi = atan2f(v.y(), v.x());
return y( l, m, theta, phi );
}
/**
* Evaluate the hemispherical harmonic function for the given angles.
* @param l is the band.
* @param m is the argument, in the range [-l,l]
* @param theta is the altitude, in the range [0, PI/2]
* @param phi is the azimuth, in the range [0, 2*PI]
*/
float nv::hy( int l, int m, float theta, float phi )
{
if( m == 0 ) {
// HK(l, 0) = sqrt((2*l+1)/(2*PI))
return sqrtf((2 * l + 1) / (2 * PI)) * legendrePolynomial(l, 0, 2*cosf(theta)-1);
}
else if( m > 0 ) {
return sqrtf(2.0f) * HK(l, m) * cosf(m * phi) * legendrePolynomial(l, m, 2*cosf(theta)-1);
}
else {
return sqrtf(2.0f) * HK(l, -m) * sinf(-m * phi) * legendrePolynomial(l, -m, 2*cosf(theta)-1);
}
}
/**
* Real hemispherical harmonic function of an unit vector. Uses the following
* equalities to call the angular function:
* x = sin(theta)*cos(phi)
* y = sin(theta)*sin(phi)
* z = cos(theta)
*/
float nv::hy( int l, int m, Vector3::Arg v )
{
float theta = acosf(v.z());
float phi = atan2f(v.y(), v.x());
return y( l, m, theta, phi );
}

View File

@ -0,0 +1,421 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_SPHERICALHARMONIC_H
#define NV_MATH_SPHERICALHARMONIC_H
#include <nvmath/Vector.h>
namespace nv
{
NVMATH_API float legendrePolynomial( int l, int m, float x ) NV_CONST;
NVMATH_API float y( int l, int m, float theta, float phi ) NV_CONST;
NVMATH_API float y( int l, int m, Vector3::Arg v ) NV_CONST;
NVMATH_API float hy( int l, int m, float theta, float phi ) NV_CONST;
NVMATH_API float hy( int l, int m, Vector3::Arg v ) NV_CONST;
class Sh;
float dot(const Sh & a, const Sh & b) NV_CONST;
/// Spherical harmonic class.
class Sh
{
friend class Sh2;
friend class ShMatrix;
public:
/// Construct a spherical harmonic of the given order.
Sh(int o) : m_order(o)
{
m_elemArray = new float[basisNum()];
}
/// Copy constructor.
Sh(const Sh & sh) : m_order(sh.order())
{
m_elemArray = new float[basisNum()];
memcpy(m_elemArray, sh.m_elemArray, sizeof(float) * basisNum());
}
/// Destructor.
~Sh()
{
delete [] m_elemArray;
m_elemArray = NULL;
}
/// Get number of bands.
static int bandNum(int order) {
return order + 1;
}
/// Get number of sh basis.
static int basisNum(int order) {
return (order + 1) * (order + 1);
}
/// Get the index for the given coefficients.
static int index( int l, int m ) {
return l * l + l + m;
}
/// Get sh order.
int order() const
{
return m_order;
}
/// Get sh order.
int bandNum() const
{
return bandNum(m_order);
}
/// Get sh order.
int basisNum() const
{
return basisNum(m_order);
}
/// Get sh coefficient indexed by l,m.
float elem( int l, int m ) const
{
return m_elemArray[index(l, m)];
}
/// Get sh coefficient indexed by l,m.
float & elem( int l, int m )
{
return m_elemArray[index(l, m)];
}
/// Get sh coefficient indexed by i.
float elemAt( int i ) const {
return m_elemArray[i];
}
/// Get sh coefficient indexed by i.
float & elemAt( int i )
{
return m_elemArray[i];
}
/// Reset the sh coefficients.
void reset()
{
for( int i = 0; i < basisNum(); i++ ) {
m_elemArray[i] = 0.0f;
}
}
/// Copy spherical harmonic.
void operator= ( const Sh & sh )
{
nvDebugCheck(order() <= sh.order());
for(int i = 0; i < basisNum(); i++) {
m_elemArray[i] = sh.m_elemArray[i];
}
}
/// Add spherical harmonics.
void operator+= ( const Sh & sh )
{
nvDebugCheck(order() == sh.order());
for(int i = 0; i < basisNum(); i++) {
m_elemArray[i] += sh.m_elemArray[i];
}
}
/// Substract spherical harmonics.
void operator-= ( const Sh & sh )
{
nvDebugCheck(order() == sh.order());
for(int i = 0; i < basisNum(); i++) {
m_elemArray[i] -= sh.m_elemArray[i];
}
}
// Not exactly convolution, nor product.
void operator*= ( const Sh & sh )
{
nvDebugCheck(order() == sh.order());
for(int i = 0; i < basisNum(); i++) {
m_elemArray[i] *= sh.m_elemArray[i];
}
}
/// Scale spherical harmonics.
void operator*= ( float f )
{
for(int i = 0; i < basisNum(); i++) {
m_elemArray[i] *= f;
}
}
/// Add scaled spherical harmonics.
void addScaled( const Sh & sh, float f )
{
nvDebugCheck(order() == sh.order());
for(int i = 0; i < basisNum(); i++) {
m_elemArray[i] += sh.m_elemArray[i] * f;
}
}
/*/// Add a weighted sample to the sh coefficients.
void AddSample( const Vec3 & dir, const Color3f & color, float w=1.0f ) {
for(int l = 0; l <= order; l++) {
for(int m = -l; m <= l; m++) {
Color3f & elem = GetElem(l, m);
elem.Mad( elem, color, w * y(l, m, dir) );
}
}
}*/
/// Evaluate
void eval(Vector3::Arg dir)
{
for(int l = 0; l <= m_order; l++) {
for(int m = -l; m <= l; m++) {
elem(l, m) = y(l, m, dir);
}
}
}
/// Evaluate the spherical harmonic function.
float sample(Vector3::Arg dir) const
{
float out = 0.0f;
Sh sh(order());
sh.eval(dir);
return dot(sh, *this);
}
protected:
const int m_order;
float * m_elemArray;
};
/// Compute dot product of the spherical harmonics.
inline float dot(const Sh & a, const Sh & b)
{
nvDebugCheck(a.order() == b.order());
float sum = 0;
for( int i = 0; i < Sh::basisNum(a.order()); i++ ) {
sum += a.elemAt(i) * b.elemAt(i);
}
return sum;
}
/// Second order spherical harmonic.
class Sh2 : public Sh
{
public:
/// Constructor.
Sh2() : Sh(2) {}
/// Copy constructor.
Sh2(const Sh2 & sh) : Sh(sh) {}
/// Spherical harmonic resulting from projecting the clamped cosine transfer function to the SH basis.
void cosineTransfer()
{
const float c1 = 0.282095f; // K(0, 0)
const float c2 = 0.488603f; // K(1, 0)
const float c3 = 1.092548f; // sqrt(15.0f / PI) / 2.0f = K(2, -2)
const float c4 = 0.315392f; // sqrt(5.0f / PI) / 4.0f) = K(2, 0)
const float c5 = 0.546274f; // sqrt(15.0f / PI) / 4.0f) = K(2, 2)
const float normalization = PI * 16.0f / 17.0f;
const float const1 = c1 * normalization * 1.0f;
const float const2 = c2 * normalization * (2.0f / 3.0f);
const float const3 = c3 * normalization * (1.0f / 4.0f);
const float const4 = c4 * normalization * (1.0f / 4.0f);
const float const5 = c5 * normalization * (1.0f / 4.0f);
m_elemArray[0] = const1;
m_elemArray[1] = -const2;
m_elemArray[2] = const2;
m_elemArray[3] = -const2;
m_elemArray[4] = const3;
m_elemArray[5] = -const3;
m_elemArray[6] = const4;
m_elemArray[7] = -const3;
m_elemArray[8] = const5;
}
};
#if 0
/// Spherical harmonic matrix.
class ShMatrix
{
public:
/// Create an identity matrix of the given order.
ShMatrix(int o = 2) : order(o), identity(true)
{
nvCheck(order > 0);
e = new float[Size()];
band = new float *[GetBandNum()];
setupBands();
}
/// Destroy and free matrix elements.
~ShMatrix()
{
delete e;
delete band;
}
/// Set identity matrix.
void setIdentity()
{
identity = true;
}
/// Return true if this is an identity matrix, false in other case.
bool isIdentity() const {
return identity;
}
/// Get number of bands of this matrix.
int bandNum() const
{
return order+1;
}
/// Get total number of elements in the matrix.
int size() const
{
int size = 0;
for( int i = 0; i < bandNum(); i++ ) {
size += SQ(i * 2 + 1);
}
return size;
}
/// Get element at the given raw index.
float elem(const int idx) const
{
return e[idx];
}
/// Get element at the given with the given indices.
float & elem( const int b, const int x, const int y )
{
nvDebugCheck(b >= 0);
nvDebugCheck(b < bandNum());
return band[b][(b + y) * (b * 2 + 1) + (b + x)];
}
/// Get element at the given with the given indices.
float elem( const int b, const int x, const int y ) const
{
nvDebugCheck(b >= 0);
nvDebugCheck(b < bandNum());
return band[b][(b + y) * (b * 2 + 1) + (b + x)];
}
/** Copy matrix. */
void Copy( const ShMatrix & m )
{
nvDebugCheck(order == m.order);
memcpy(e, m.e, Size() * sizeof(float));
}
/** Rotate the given coefficients. */
void transform( const Sh & restrict source, Sh * restrict dest ) const {
piCheck( &source != dest ); // Make sure there's no aliasing.
piCheck( dest->order <= order );
piCheck( order <= source.order );
if( identity ) {
*dest = source;
return;
}
// Loop through each band.
for( int l = 0; l <= dest->order; l++ ) {
for( int mo = -l; mo <= l; mo++ ) {
Color3f rgb = Color3f::Black;
for( int mi = -l; mi <= l; mi++ ) {
rgb.Mad( rgb, source.elem(l, mi), elem(l, mo, mi) );
}
dest->elem(l, mo) = rgb;
}
}
}
MATHLIB_API void multiply( const ShMatrix &A, const ShMatrix &B );
MATHLIB_API void rotation( const Matrix & m );
MATHLIB_API void rotation( int axis, float angles );
MATHLIB_API void print();
private:
// @@ These could be static indices precomputed only once.
/// Setup the band pointers.
void setupBands()
{
int size = 0;
for( int i = 0; i < bandNum(); i++ ) {
band[i] = &e[size];
size += SQ(i * 2 + 1);
}
}
private:
// Matrix order.
const int m_order;
// Identity flag for quick transform.
bool m_identity;
// Array of elements.
float * m_e;
// Band pointers.
float ** m_band;
};
#endif // 0
} // nv namespace
#endif // NV_MATH_SPHERICALHARMONIC_H

View File

@ -74,6 +74,8 @@ public:
void operator+=(Vector3::Arg v);
void operator-=(Vector3::Arg v);
void operator*=(scalar s);
inline void operator/=(scalar s)
{ m_x /= s; m_y /= s; m_z /= s; }
void operator*=(Vector3::Arg v);
friend bool operator==(Vector3::Arg a, Vector3::Arg b);