From fd6b8449bf0cbb23787c873fcb541c6a03fc4982 Mon Sep 17 00:00:00 2001 From: castano Date: Sat, 29 May 2010 02:47:57 +0000 Subject: [PATCH] Add bc6 and bc7 compressors from nvidia. --- src/nvtt/bc6h/arvo/ArvoMath.cpp | 342 +++++++++ src/nvtt/bc6h/arvo/ArvoMath.h | 187 +++++ src/nvtt/bc6h/arvo/Char.cpp | 420 +++++++++++ src/nvtt/bc6h/arvo/Char.h | 245 +++++++ src/nvtt/bc6h/arvo/Complex.cpp | 76 ++ src/nvtt/bc6h/arvo/Complex.h | 187 +++++ src/nvtt/bc6h/arvo/Matrix.cpp | 1201 ++++++++++++++++++++++++++++++ src/nvtt/bc6h/arvo/Matrix.h | 142 ++++ src/nvtt/bc6h/arvo/Perm.cpp | 503 +++++++++++++ src/nvtt/bc6h/arvo/Perm.h | 111 +++ src/nvtt/bc6h/arvo/Rand.cpp | 230 ++++++ src/nvtt/bc6h/arvo/Rand.h | 114 +++ src/nvtt/bc6h/arvo/SI_units.h | 232 ++++++ src/nvtt/bc6h/arvo/SVD.cpp | 398 ++++++++++ src/nvtt/bc6h/arvo/SVD.h | 54 ++ src/nvtt/bc6h/arvo/SphTri.cpp | 292 ++++++++ src/nvtt/bc6h/arvo/SphTri.h | 124 ++++ src/nvtt/bc6h/arvo/Token.cpp | 913 +++++++++++++++++++++++ src/nvtt/bc6h/arvo/Token.h | 203 +++++ src/nvtt/bc6h/arvo/Vec2.cpp | 94 +++ src/nvtt/bc6h/arvo/Vec2.h | 358 +++++++++ src/nvtt/bc6h/arvo/Vec3.cpp | 119 +++ src/nvtt/bc6h/arvo/Vec3.h | 517 +++++++++++++ src/nvtt/bc6h/arvo/Vector.cpp | 366 +++++++++ src/nvtt/bc6h/arvo/Vector.h | 103 +++ src/nvtt/bc6h/arvo/form.h | 26 + src/nvtt/bc6h/bits.h | 73 ++ src/nvtt/bc6h/exr.cpp | 51 ++ src/nvtt/bc6h/exr.h | 37 + src/nvtt/bc6h/shapes_two.h | 133 ++++ src/nvtt/bc6h/tile.h | 115 +++ src/nvtt/bc6h/utils.cpp | 466 ++++++++++++ src/nvtt/bc6h/utils.h | 79 ++ src/nvtt/bc6h/zoh.cpp | 205 ++++++ src/nvtt/bc6h/zoh.h | 78 ++ src/nvtt/bc6h/zoh.sln | 21 + src/nvtt/bc6h/zoh.vcproj | 281 +++++++ src/nvtt/bc6h/zohc.cpp | 301 ++++++++ src/nvtt/bc6h/zohone.cpp | 804 ++++++++++++++++++++ src/nvtt/bc6h/zohtwo.cpp | 892 ++++++++++++++++++++++ src/nvtt/bc7/ImfArray.h | 261 +++++++ src/nvtt/bc7/arvo/ArvoMath.cpp | 342 +++++++++ src/nvtt/bc7/arvo/ArvoMath.h | 212 ++++++ src/nvtt/bc7/arvo/Char.cpp | 420 +++++++++++ src/nvtt/bc7/arvo/Char.h | 245 +++++++ src/nvtt/bc7/arvo/Complex.cpp | 76 ++ src/nvtt/bc7/arvo/Complex.h | 187 +++++ src/nvtt/bc7/arvo/Matrix.cpp | 1201 ++++++++++++++++++++++++++++++ src/nvtt/bc7/arvo/Matrix.h | 142 ++++ src/nvtt/bc7/arvo/Perm.cpp | 503 +++++++++++++ src/nvtt/bc7/arvo/Perm.h | 111 +++ src/nvtt/bc7/arvo/Rand.cpp | 230 ++++++ src/nvtt/bc7/arvo/Rand.h | 114 +++ src/nvtt/bc7/arvo/SI_units.h | 232 ++++++ src/nvtt/bc7/arvo/SVD.cpp | 398 ++++++++++ src/nvtt/bc7/arvo/SVD.h | 54 ++ src/nvtt/bc7/arvo/SphTri.cpp | 292 ++++++++ src/nvtt/bc7/arvo/SphTri.h | 124 ++++ src/nvtt/bc7/arvo/Token.cpp | 913 +++++++++++++++++++++++ src/nvtt/bc7/arvo/Token.h | 203 +++++ src/nvtt/bc7/arvo/Vec2.cpp | 94 +++ src/nvtt/bc7/arvo/Vec2.h | 358 +++++++++ src/nvtt/bc7/arvo/Vec3.cpp | 119 +++ src/nvtt/bc7/arvo/Vec3.h | 517 +++++++++++++ src/nvtt/bc7/arvo/Vec4.cpp | 79 ++ src/nvtt/bc7/arvo/Vec4.h | 238 ++++++ src/nvtt/bc7/arvo/Vector.cpp | 366 +++++++++ src/nvtt/bc7/arvo/Vector.h | 103 +++ src/nvtt/bc7/arvo/form.h | 26 + src/nvtt/bc7/avpcl.cpp | 263 +++++++ src/nvtt/bc7/avpcl.h | 107 +++ src/nvtt/bc7/avpcl.sln | 21 + src/nvtt/bc7/avpcl.vcproj | 314 ++++++++ src/nvtt/bc7/avpcl_mode0.cpp | 1068 +++++++++++++++++++++++++++ src/nvtt/bc7/avpcl_mode1.cpp | 1049 ++++++++++++++++++++++++++ src/nvtt/bc7/avpcl_mode2.cpp | 1005 +++++++++++++++++++++++++ src/nvtt/bc7/avpcl_mode3.cpp | 1061 +++++++++++++++++++++++++++ src/nvtt/bc7/avpcl_mode4.cpp | 1220 ++++++++++++++++++++++++++++++ src/nvtt/bc7/avpcl_mode5.cpp | 1222 +++++++++++++++++++++++++++++++ src/nvtt/bc7/avpcl_mode6.cpp | 1059 ++++++++++++++++++++++++++ src/nvtt/bc7/avpcl_mode7.cpp | 1098 +++++++++++++++++++++++++++ src/nvtt/bc7/avpclc.cpp | 348 +++++++++ src/nvtt/bc7/bits.h | 73 ++ src/nvtt/bc7/endpts.h | 80 ++ src/nvtt/bc7/rgba.h | 27 + src/nvtt/bc7/shapes_three.h | 132 ++++ src/nvtt/bc7/shapes_two.h | 133 ++++ src/nvtt/bc7/targa.cpp | 179 +++++ src/nvtt/bc7/targa.h | 30 + src/nvtt/bc7/tile.h | 67 ++ src/nvtt/bc7/utils.cpp | 391 ++++++++++ src/nvtt/bc7/utils.h | 69 ++ 92 files changed, 30269 insertions(+) create mode 100755 src/nvtt/bc6h/arvo/ArvoMath.cpp create mode 100755 src/nvtt/bc6h/arvo/ArvoMath.h create mode 100755 src/nvtt/bc6h/arvo/Char.cpp create mode 100755 src/nvtt/bc6h/arvo/Char.h create mode 100755 src/nvtt/bc6h/arvo/Complex.cpp create mode 100755 src/nvtt/bc6h/arvo/Complex.h create mode 100755 src/nvtt/bc6h/arvo/Matrix.cpp create mode 100755 src/nvtt/bc6h/arvo/Matrix.h create mode 100755 src/nvtt/bc6h/arvo/Perm.cpp create mode 100755 src/nvtt/bc6h/arvo/Perm.h create mode 100755 src/nvtt/bc6h/arvo/Rand.cpp create mode 100755 src/nvtt/bc6h/arvo/Rand.h create mode 100755 src/nvtt/bc6h/arvo/SI_units.h create mode 100755 src/nvtt/bc6h/arvo/SVD.cpp create mode 100755 src/nvtt/bc6h/arvo/SVD.h create mode 100755 src/nvtt/bc6h/arvo/SphTri.cpp create mode 100755 src/nvtt/bc6h/arvo/SphTri.h create mode 100755 src/nvtt/bc6h/arvo/Token.cpp create mode 100755 src/nvtt/bc6h/arvo/Token.h create mode 100755 src/nvtt/bc6h/arvo/Vec2.cpp create mode 100755 src/nvtt/bc6h/arvo/Vec2.h create mode 100755 src/nvtt/bc6h/arvo/Vec3.cpp create mode 100755 src/nvtt/bc6h/arvo/Vec3.h create mode 100755 src/nvtt/bc6h/arvo/Vector.cpp create mode 100755 src/nvtt/bc6h/arvo/Vector.h create mode 100755 src/nvtt/bc6h/arvo/form.h create mode 100755 src/nvtt/bc6h/bits.h create mode 100755 src/nvtt/bc6h/exr.cpp create mode 100755 src/nvtt/bc6h/exr.h create mode 100755 src/nvtt/bc6h/shapes_two.h create mode 100755 src/nvtt/bc6h/tile.h create mode 100755 src/nvtt/bc6h/utils.cpp create mode 100755 src/nvtt/bc6h/utils.h create mode 100755 src/nvtt/bc6h/zoh.cpp create mode 100755 src/nvtt/bc6h/zoh.h create mode 100755 src/nvtt/bc6h/zoh.sln create mode 100755 src/nvtt/bc6h/zoh.vcproj create mode 100755 src/nvtt/bc6h/zohc.cpp create mode 100755 src/nvtt/bc6h/zohone.cpp create mode 100755 src/nvtt/bc6h/zohtwo.cpp create mode 100644 src/nvtt/bc7/ImfArray.h create mode 100644 src/nvtt/bc7/arvo/ArvoMath.cpp create mode 100644 src/nvtt/bc7/arvo/ArvoMath.h create mode 100644 src/nvtt/bc7/arvo/Char.cpp create mode 100644 src/nvtt/bc7/arvo/Char.h create mode 100644 src/nvtt/bc7/arvo/Complex.cpp create mode 100644 src/nvtt/bc7/arvo/Complex.h create mode 100644 src/nvtt/bc7/arvo/Matrix.cpp create mode 100644 src/nvtt/bc7/arvo/Matrix.h create mode 100644 src/nvtt/bc7/arvo/Perm.cpp create mode 100644 src/nvtt/bc7/arvo/Perm.h create mode 100644 src/nvtt/bc7/arvo/Rand.cpp create mode 100644 src/nvtt/bc7/arvo/Rand.h create mode 100644 src/nvtt/bc7/arvo/SI_units.h create mode 100644 src/nvtt/bc7/arvo/SVD.cpp create mode 100644 src/nvtt/bc7/arvo/SVD.h create mode 100644 src/nvtt/bc7/arvo/SphTri.cpp create mode 100644 src/nvtt/bc7/arvo/SphTri.h create mode 100644 src/nvtt/bc7/arvo/Token.cpp create mode 100644 src/nvtt/bc7/arvo/Token.h create mode 100644 src/nvtt/bc7/arvo/Vec2.cpp create mode 100644 src/nvtt/bc7/arvo/Vec2.h create mode 100644 src/nvtt/bc7/arvo/Vec3.cpp create mode 100644 src/nvtt/bc7/arvo/Vec3.h create mode 100644 src/nvtt/bc7/arvo/Vec4.cpp create mode 100644 src/nvtt/bc7/arvo/Vec4.h create mode 100644 src/nvtt/bc7/arvo/Vector.cpp create mode 100644 src/nvtt/bc7/arvo/Vector.h create mode 100644 src/nvtt/bc7/arvo/form.h create mode 100644 src/nvtt/bc7/avpcl.cpp create mode 100644 src/nvtt/bc7/avpcl.h create mode 100644 src/nvtt/bc7/avpcl.sln create mode 100644 src/nvtt/bc7/avpcl.vcproj create mode 100644 src/nvtt/bc7/avpcl_mode0.cpp create mode 100644 src/nvtt/bc7/avpcl_mode1.cpp create mode 100644 src/nvtt/bc7/avpcl_mode2.cpp create mode 100644 src/nvtt/bc7/avpcl_mode3.cpp create mode 100644 src/nvtt/bc7/avpcl_mode4.cpp create mode 100644 src/nvtt/bc7/avpcl_mode5.cpp create mode 100644 src/nvtt/bc7/avpcl_mode6.cpp create mode 100644 src/nvtt/bc7/avpcl_mode7.cpp create mode 100644 src/nvtt/bc7/avpclc.cpp create mode 100644 src/nvtt/bc7/bits.h create mode 100644 src/nvtt/bc7/endpts.h create mode 100644 src/nvtt/bc7/rgba.h create mode 100644 src/nvtt/bc7/shapes_three.h create mode 100644 src/nvtt/bc7/shapes_two.h create mode 100644 src/nvtt/bc7/targa.cpp create mode 100644 src/nvtt/bc7/targa.h create mode 100644 src/nvtt/bc7/tile.h create mode 100644 src/nvtt/bc7/utils.cpp create mode 100644 src/nvtt/bc7/utils.h diff --git a/src/nvtt/bc6h/arvo/ArvoMath.cpp b/src/nvtt/bc6h/arvo/ArvoMath.cpp new file mode 100755 index 0000000..95d1a7d --- /dev/null +++ b/src/nvtt/bc6h/arvo/ArvoMath.cpp @@ -0,0 +1,342 @@ +/*************************************************************************** +* Math.C * +* * +* Some basic math functions. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 06/21/93 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1999, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#include +#include +#include +#include +#include "ArvoMath.h" +#include "form.h" + +namespace ArvoMath { + static const float Epsilon = 1.0E-5; + static const double LogTwo = log( 2.0 ); + +#define BinCoeffMax 500 + + double RelErr( double x, double y ) + { + double z = x - y; + if( x < 0.0 ) x = -x; + if( y < 0.0 ) y = -y; + return z / ( x > y ? x : y ); + } + + /*************************************************************************** + * A R C Q U A D * + * * + * Returns the theta / ( 2*PI ) where the input variables x and y are * + * such that x == COS( theta ) and y == SIN( theta ). * + * * + ***************************************************************************/ + float ArcQuad( float x, float y ) + { + if( Abs( x ) > Epsilon ) + { + float temp = OverTwoPi * atan( Abs( y ) / Abs( x ) ); + if( x < 0.0 ) temp = 0.5 - temp; + if( y < 0.0 ) temp = 1.0 - temp; + return( temp ); + } + else if( y > Epsilon ) return( 0.25 ); + else if( y < -Epsilon ) return( 0.75 ); + else return( 0.0 ); + } + + /*************************************************************************** + * A R C T A N * + * * + * Returns the angle theta such that x = COS( theta ) & y = SIN( theta ). * + * * + ***************************************************************************/ + float ArcTan( float x, float y ) + { + if( Abs( x ) > Epsilon ) + { + float temp = atan( Abs( y ) / Abs( x ) ); + if( x < 0.0 ) temp = Pi - temp; + if( y < 0.0 ) temp = TwoPi - temp; + return( temp ); + } + else if( y > Epsilon ) return( PiOverTwo ); + else if( y < -Epsilon ) return( 3 * PiOverTwo ); + else return( 0.0 ); + } + + /*************************************************************************** + * M A C H I N E E P S I L O N * + * * + * Returns the machine epsilon. * + * * + ***************************************************************************/ + float MachineEpsilon() + { + float x = 1.0; + float y; + float z = 1.0 + x; + while( z > 1.0 ) + { + y = x; + x /= 2.0; + z = (float)( 1.0 + (float)x ); // Avoid double precision! + } + return (float)y; + } + + /*************************************************************************** + * L O G G A M M A * + * * + * Computes the natural log of the gamma function using the Lanczos * + * approximation formula. Gamma is defined by * + * * + * ( z - 1 ) -t * + * gamma( z ) = Integral[ t e dt ] * + * * + * * + * where the integral ranges from 0 to infinity. The gamma function * + * satisfies * + * gamma( n + 1 ) = n! * + * * + * This algorithm has been adapted from "Numerical Recipes", p. 157. * + * * + ***************************************************************************/ + double LogGamma( double x ) + { + static const double + coeff0 = 7.61800917300E+1, + coeff1 = -8.65053203300E+1, + coeff2 = 2.40140982200E+1, + coeff3 = -1.23173951600E+0, + coeff4 = 1.20858003000E-3, + coeff5 = -5.36382000000E-6, + stp = 2.50662827465E+0, + half = 5.00000000000E-1, + fourpf = 4.50000000000E+0, + one = 1.00000000000E+0, + two = 2.00000000000E+0, + three = 3.00000000000E+0, + four = 4.00000000000E+0, + five = 5.00000000000E+0; + double r = coeff0 / ( x ) + coeff1 / ( x + one ) + + coeff2 / ( x + two ) + coeff3 / ( x + three ) + + coeff4 / ( x + four ) + coeff5 / ( x + five ) ; + double s = x + fourpf; + double t = ( x - half ) * log( s ) - s; + return t + log( stp * ( r + one ) ); + } + + /*************************************************************************** + * L O G F A C T * + * * + * Returns the natural logarithm of n factorial. For efficiency, some * + * of the values are cached, so they need be computed only once. * + * * + ***************************************************************************/ + double LogFact( int n ) + { + static const int Cache_Size = 100; + static double c[ Cache_Size ] = { 0.0 }; // Cache some of the values. + if( n <= 1 ) return 0.0; + if( n < Cache_Size ) + { + if( c[n] == 0.0 ) c[n] = LogGamma((double)(n+1)); + return c[n]; + } + return LogGamma((double)(n+1)); // gamma(n+1) == n! + } + + /*************************************************************************** + * M U L T I N O M I A L C O E F F * + * * + * Returns the multinomial coefficient ( n; X1 X2 ... Xk ) which is * + * defined to be n! / ( X1! X2! ... Xk! ). This is done by computing * + * exp( log(n!) - log(X1!) - log(X2!) - ... - log(Xk!) ). The value of * + * n is obtained by summing the Xi's. * + * * + ***************************************************************************/ + double MultinomialCoeff( int k, int X[] ) + { + int i; + // Find n by summing the coefficients. + + int n = X[0]; + for( i = 1; i < k; i++ ) n += X[i]; + + // Compute log(n!) then subtract log(X!) for each X. + + double LogCoeff = LogFact( n ); + for( i = 0; i < k; i++ ) LogCoeff -= LogFact( X[i] ); + + // Round the exponential of the result to the nearest integer. + + return floor( exp( LogCoeff ) + 0.5 ); + } + + + double MultinomialCoeff( int i, int j, int k ) + { + int n = i + j + k; + double x = LogFact( n ) - LogFact( i ) - LogFact( j ) - LogFact( k ); + return floor( exp( x ) + 0.5 ); + } + + /*************************************************************************** + * B I N O M I A L C O E F F S * + * * + * Generate all n+1 binomial coefficents for a given n. This is done by * + * computing the n'th row of Pascal's triange, starting from the top. * + * No additional storage is required. * + * * + ***************************************************************************/ + void BinomialCoeffs( int n, long *coeff ) + { + coeff[0] = 1; + for( int i = 1; i <= n; i++ ) + { + long a = coeff[0]; + long b = coeff[1]; + for( int j = 1; j < i; j++ ) // Make next row of Pascal's triangle. + { + coeff[j] = a + b; // Overwrite the old row. + a = b; + b = coeff[j+1]; + } + coeff[i] = 1; // The last entry in any row is always 1. + } + } + + void BinomialCoeffs( int n, double *coeff ) + { + coeff[0] = 1.0; + for( int i = 1; i <= n; i++ ) + { + double a = coeff[0]; + double b = coeff[1]; + for( int j = 1; j < i; j++ ) // Make next row of Pascal's triangle. + { + coeff[j] = a + b; // Overwrite the old row. + a = b; + b = coeff[j+1]; + } + coeff[i] = 1.0; // The last entry in any row is always 1. + } + } + + const double *BinomialCoeffs( int n ) + { + static double *coeff[ BinCoeffMax + 1 ] = { 0 }; + if( n > BinCoeffMax || n < 0 ) + { + std::cerr << form( "%d is outside of (0,%d) in BinomialCoeffs", n, BinCoeffMax ); + return NULL; + } + if( coeff[n] == NULL ) // Fill in this entry. + { + double *c = new double[ n + 1 ]; + if( c == NULL ) + { + std::cerr << form( "Could not allocate for BinomialCoeffs(%d)", n ); + return NULL; + } + BinomialCoeffs( n, c ); + coeff[n] = c; + } + return coeff[n]; + } + + /*************************************************************************** + * B I N O M I A L C O E F F * + * * + * Compute a given binomial coefficient. Several rows of Pascal's * + * triangle are stored for efficiently computing the small coefficients. * + * Higher-order terms are computed using LogFact. * + * * + ***************************************************************************/ + double BinomialCoeff( int n, int k ) + { + double b; + int p = n - k; + if( k <= 1 || p <= 1 ) // Check for errors and special cases. + { + if( k == 0 || p == 0 ) return 1; + if( k == 1 || p == 1 ) return n; + std::cerr << form( "BinomialCoeff(%d,%d) is undefined", n, k ); + return 0; + } + static const int // Store part of Pascal's triange for small coeffs. + n0[] = { 1 }, + n1[] = { 1, 1 }, + n2[] = { 1, 2, 1 }, + n3[] = { 1, 3, 3, 1 }, + n4[] = { 1, 4, 6, 4, 1 }, + n5[] = { 1, 5, 10, 10, 5, 1 }, + n6[] = { 1, 6, 15, 20, 15, 6, 1 }, + n7[] = { 1, 7, 21, 35, 35, 21, 7, 1 }, + n8[] = { 1, 8, 28, 56, 70, 56, 28, 8, 1 }, + n9[] = { 1, 9, 36, 84, 126, 126, 84, 36, 9, 1 }; + switch( n ) + { + case 0 : b = n0[k]; break; + case 1 : b = n1[k]; break; + case 2 : b = n2[k]; break; + case 3 : b = n3[k]; break; + case 4 : b = n4[k]; break; + case 5 : b = n5[k]; break; + case 6 : b = n6[k]; break; + case 7 : b = n7[k]; break; + case 8 : b = n8[k]; break; + case 9 : b = n9[k]; break; + default: + { + double x = LogFact( n ) - LogFact( p ) - LogFact( k ); + b = floor( exp( x ) + 0.5 ); + } + } + return b; + } + + + /*************************************************************************** + * L O G D O U B L E F A C T (Log of double factorial) * + * * + * Return log( n!! ) where the double factorial is defined by * + * * + * (2 n + 1)!! = 1 * 3 * 5 * ... * (2n + 1) (Odd integers) * + * * + * (2 n)!! = 2 * 4 * 6 * ... * 2n (Even integers) * + * * + * and is related to the single factorial via * + * * + * (2 n + 1)!! = (2 n + 1)! / ( 2^n n! ) (Odd integers) * + * * + * (2 n)!! = 2^n n! (Even integers) * + * * + ***************************************************************************/ + double LogDoubleFact( int n ) // log( n!! ) + { + int k = n / 2; + double f = LogFact( k ) + k * LogTwo; + if( Odd(n) ) f = LogFact( n ) - f; + return f; + } +}; diff --git a/src/nvtt/bc6h/arvo/ArvoMath.h b/src/nvtt/bc6h/arvo/ArvoMath.h new file mode 100755 index 0000000..957cb25 --- /dev/null +++ b/src/nvtt/bc6h/arvo/ArvoMath.h @@ -0,0 +1,187 @@ +/*************************************************************************** +* Math.h * +* * +* Convenient constants, macros, and inline functions for basic math * +* functions. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 06/17/93 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1999, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#ifndef __MATH_INCLUDED__ +#define __MATH_INCLUDED__ + +#include +#include + +namespace ArvoMath { + +#ifndef MAXFLOAT +#define MAXFLOAT 1.0E+20 +#endif + + static const double + Pi = 3.14159265358979, + PiSquared = Pi * Pi, + TwoPi = 2.0 * Pi, + FourPi = 4.0 * Pi, + PiOverTwo = Pi / 2.0, + PiOverFour = Pi / 4.0, + OverPi = 1.0 / Pi, + OverTwoPi = 1.0 / TwoPi, + OverFourPi = 1.0 / FourPi, + Infinity = MAXFLOAT, + Tiny = 1.0 / MAXFLOAT, + DegreesToRad = Pi / 180.0, + RadToDegrees = 180.0 / Pi; + + inline int Odd ( int k ) { return k & 1; } + inline int Even ( int k ) { return !(k & 1); } + inline float Abs ( int x ) { return x > 0 ? x : -x; } + inline float Abs ( float x ) { return x > 0. ? x : -x; } + inline float Abs ( double x ) { return x > 0. ? x : -x; } + inline float Min ( float x, float y ) { return x < y ? x : y; } + inline float Max ( float x, float y ) { return x > y ? x : y; } + inline double dMin ( double x, double y ) { return x < y ? x : y; } + inline double dMax ( double x, double y ) { return x > y ? x : y; } + inline float Sqr ( int x ) { return x * x; } + inline float Sqr ( float x ) { return x * x; } + inline float Sqr ( double x ) { return x * x; } + inline float Sqrt ( double x ) { return x > 0. ? sqrt(x) : 0.; } + inline float Cubed ( float x ) { return x * x * x; } + inline int Sign ( float x ) { return x > 0. ? 1 : (x < 0. ? -1 : 0); } + inline void Swap ( float &a, float &b ) { float c = a; a = b; b = c; } + inline void Swap ( int &a, int &b ) { int c = a; a = b; b = c; } + inline double Sin ( double x, int n ) { return pow( sin(x), n ); } + inline double Cos ( double x, int n ) { return pow( cos(x), n ); } + inline float ToSin ( double x ) { return Sqrt( 1.0 - Sqr(x) ); } + inline float ToCos ( double x ) { return Sqrt( 1.0 - Sqr(x) ); } + inline float MaxAbs( float x, float y ) { return Max( Abs(x), Abs(y) ); } + inline float MinAbs( float x, float y ) { return Min( Abs(x), Abs(y) ); } + inline float Pythag( double x, double y ) { return Sqrt( x*x + y*y ); } + + inline double ArcCos( double x ) + { + double y; + if( -1.0 <= x && x <= 1.0 ) y = acos( x ); + else if( x > 1.0 ) y = 0.0; + else if( x < -1.0 ) y = Pi; + return y; + } + + inline double ArcSin( double x ) + { + if( x < -1.0 ) x = -1.0; + if( x > 1.0 ) x = 1.0; + return asin( x ); + } + + inline float Clamp( float min, float &x, float max ) + { + if( x < min ) x = min; else + if( x > max ) x = max; + return x; + } + + inline double Clamp( float min, double &x, float max ) + { + if( x < min ) x = min; else + if( x > max ) x = max; + return x; + } + + inline float Max( float x, float y, float z ) + { + float t; + if( x >= y && x >= z ) t = x; + else if( y >= z ) t = y; + else t = z; + return t; + } + + inline float Min( float x, float y, float z ) + { + float t; + if( x <= y && x <= z ) t = x; + else if( y <= z ) t = y; + else t = z; + return t; + } + + inline double dMax( double x, double y, double z ) + { + double t; + if( x >= y && x >= z ) t = x; + else if( y >= z ) t = y; + else t = z; + return t; + } + + inline double dMin( double x, double y, double z ) + { + double t; + if( x <= y && x <= z ) t = x; + else if( y <= z ) t = y; + else t = z; + return t; + } + + inline float MaxAbs( float x, float y, float z ) + { + return Max( Abs( x ), Abs( y ), Abs( z ) ); + } + + inline float Pythag( float x, float y, float z ) + { + return sqrt( x * x + y * y + z * z ); + } + + extern float ArcTan ( float x, float y ); + extern float ArcQuad ( float x, float y ); + extern float MachineEpsilon ( ); + extern double LogGamma ( double x ); + extern double LogFact ( int n ); + extern double LogDoubleFact ( int n ); // log( n!! ) + extern double BinomialCoeff ( int n, int k ); + extern void BinomialCoeffs ( int n, long *coeffs ); + extern void BinomialCoeffs ( int n, double *coeffs ); + extern double MultinomialCoeff( int i, int j, int k ); + extern double MultinomialCoeff( int k, int N[] ); + extern double RelErr ( double x, double y ); + +#ifndef ABS +#define ABS( x ) ((x) > 0 ? (x) : -(x)) +#endif + +#ifndef MAX +#define MAX( x, y ) ((x) > (y) ? (x) : (y)) +#endif + +#ifndef MIN +#define MIN( x, y ) ((x) < (y) ? (x) : (y)) +#endif + +}; + +#endif + + + + + + + diff --git a/src/nvtt/bc6h/arvo/Char.cpp b/src/nvtt/bc6h/arvo/Char.cpp new file mode 100755 index 0000000..cc450a5 --- /dev/null +++ b/src/nvtt/bc6h/arvo/Char.cpp @@ -0,0 +1,420 @@ +/*************************************************************************** +* Char.h * +* * +* Convenient constants, macros, and inline functions for manipulation of * +* characters and strings. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 07/01/93 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1999, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#include +#include +#include +#include "Char.h" + +namespace ArvoMath { + + typedef char *charPtr; + + // Treat "str" as a file name, and return just the directory + // portion -- i.e. strip off the name of the leaf object (but + // leave the final "/". + const char *getPath( const char *str, char *buff ) + { + int k; + for( k = strlen( str ) - 1; k >= 0; k-- ) + { + if( str[k] == Slash ) break; + } + for( int i = 0; i <= k; i++ ) buff[i] = str[i]; + buff[k+1] = NullChar; + return buff; + } + + // Treat "str" as a file name, and return just the file name + // portion -- i.e. strip off everything up to and including + // the final "/". + const char *getFile( const char *str, char *buff ) + { + int k; + int len = strlen( str ); + for( k = len - 1; k >= 0; k-- ) + { + if( str[k] == Slash ) break; + } + for( int i = 0; i < len - k; i++ ) buff[i] = str[ i + k + 1 ]; + return buff; + } + + int getPrefix( const char *str, char *buff ) + { + int len = 0; + while( *str != NullChar && *str != Period ) + { + *buff++ = *str++; + len++; + } + *buff = NullChar; + return len; + } + + int getSuffix( const char *str, char *buff ) + { + int n = strlen( str ); + int k = n - 1; + while( k >= 0 && str[k] != Period ) k--; + for( int i = k + 1; i < n; i++ ) *buff++ = str[i]; + *buff = NullChar; + return n - k - 1; + } + + const char* toString( int number, char *buff ) + { + static char local_buff[32]; + char *str = ( buff == NULL ) ? local_buff : buff; + sprintf( str, "%d", number ); + return str; + } + + const char* toString( float number, char *buff ) + { + static char local_buff[32]; + char *str = ( buff == NULL ) ? local_buff : buff; + sprintf( str, "%g", number ); + return str; + } + + int isInteger( const char *str ) + { + int n = strlen( str ); + for( int i = 0; i < n; i++ ) + { + char c = str[i]; + if( isDigit(c) ) continue; + if( c == Plus || c == Minus ) continue; + if( c == Space ) continue; + return 0; + } + return 1; + } + + // Test to see if a string has a given suffix. + int hasSuffix( const char *string, const char *suffix ) + { + if( suffix == NULL ) return 1; // The null suffix always matches. + if( string == NULL ) return 0; // The null string can only have a null suffix. + int m = strlen( string ); + int k = strlen( suffix ); + if( k <= 0 ) return 1; // Empty suffix always matches. + if( m < k + 1 ) return 0; // String is too short to have this suffix. + + // See if the file has the given suffix. + int s = m - k; // Beginning of suffix (if it matches). + for( int i = 0; i < k; i++ ) + if( string[ s + i ] != suffix[ i ] ) return 0; + return s; // Always > 0. + } + + // Test to see if a string has a given prefix. + int hasPrefix( const char *string, const char *prefix ) + { + if( prefix == NULL ) return 1; // The null prefix always matches. + if( string == NULL ) return 0; // The null string can only have a null suffix. + while( *prefix ) + { + if( *prefix++ != *string++ ) return 0; + } + return 1; + } + + // Test to see if the string contains the given character. + int inString( char c, const char *str ) + { + if( str == NULL || str[0] == NullChar ) return 0; + while( *str != '\0' ) + if( *str++ == c ) return 1; + return 0; + } + + int nullString( const char *str ) + { + return str == NULL || str[0] == NullChar; + } + + const char *stripSuffix( const char *string, const char *suffix, char *buff ) + { + static char local_buff[256]; + if( buff == NULL ) buff = local_buff; + buff[0] = NullChar; + if( !hasSuffix( string, suffix ) ) return NULL; + int s = strlen( string ) - strlen( suffix ); + for( int i = 0; i < s; i++ ) + { + buff[i] = string[i]; + } + buff[s] = NullChar; + return buff; + } + + int getIndex( const char *pat, const char *str ) + { + int p_len = strlen( pat ); + int s_len = strlen( str ); + if( p_len == 0 || s_len == 0 ) return -1; + for( int i = 0; i <= s_len - p_len; i++ ) + { + int match = 1; + for( int j = 0; j < p_len; j++ ) + { + if( str[ i + j ] != pat[ j ] ) { match = 0; break; } + } + if( match ) return i; + } + return -1; + } + + int getSubstringAfter( const char *pat, const char *str, char *buff ) + { + int ind = getIndex( pat, str ); + if( ind < 0 ) return -1; + int p_len = strlen( pat ); + int k = 0; + for( int i = ind + p_len; ; i++ ) + { + buff[ k++ ] = str[ i ]; + if( str[ i ] == NullChar ) break; + } + return k; + } + + const char *SubstringAfter( const char *pat, const char *str, char *user_buff ) + { + static char temp[128]; + char *buff = ( user_buff != NULL ) ? user_buff : temp; + int k = getSubstringAfter( pat, str, buff ); + if( k > 0 ) return buff; + return str; + } + + const char *metaString( const char *str, char *user_buff ) + { + static char temp[128]; + char *buff = ( user_buff != NULL ) ? user_buff : temp; + sprintf( buff, "\"%s\"", str ); + return buff; + } + + // This is the opposite of metaString. + const char *stripQuotes( const char *str, char *user_buff ) + { + static char temp[128]; + char *buff = ( user_buff != NULL ) ? user_buff : temp; + char *b = buff; + for(;;) + { + if( *str != DoubleQuote ) *b++ = *str; + if( *str == NullChar ) break; + str++; + } + return buff; + } + + int getIntFlag( const char *flags, const char *flag, int &value ) + { + while( *flags ) + { + if( hasPrefix( flags, flag ) ) + { + int k = strlen( flag ); + if( flags[k] == '=' ) + { + value = atoi( flags + k + 1 ); + return 1; + } + } + flags++; + } + return 0; + } + + int getFloatFlag( const char *flags, const char *flag, float &value ) + { + while( *flags ) + { + if( hasPrefix( flags, flag ) ) + { + int k = strlen( flag ); + if( flags[k] == '=' ) + { + value = atof( flags + k + 1 ); + return 1; + } + } + flags++; + } + return 0; + } + + SortedList::SortedList( sort_type type_, int ascend_ ) + { + type = type_; + ascend = ascend_; + num_elements = 0; + max_elements = 0; + sorted = 1; + list = NULL; + } + + SortedList::~SortedList() + { + Clear(); + delete[] list; + } + + void SortedList::Clear() + { + // Delete all the private copies of the strings and re-initialize the + // list. Reuse the same list, expanding it when necessary. + for( int i = 0; i < num_elements; i++ ) + { + delete list[i]; + list[i] = NULL; + } + num_elements = 0; + sorted = 1; + } + + SortedList &SortedList::operator<<( const char *str ) + { + // Add a new string to the end of the list, expanding the list if necessary. + // Mark the list as unsorted, so that the next reference to an element will + // cause the list to be sorted again. + if( num_elements == max_elements ) Expand(); + list[ num_elements++ ] = strdup( str ); + sorted = 0; + return *this; + } + + const char *SortedList::operator()( int i ) + { + // Return the i'th element of the list. Sort first if necessary. + static char *null = ""; + if( num_elements == 0 || i < 0 || i >= num_elements ) return null; + if( !sorted ) Sort(); + return list[i]; + } + + void SortedList::Expand() + { + // Create a new list of twice the size and copy the old list into it. + // This doubles "max_elements", but leaves "num_elements" unchanged. + if( max_elements == 0 ) max_elements = 1; + max_elements *= 2; + charPtr *new_list = new charPtr[ max_elements ]; + for( int i = 0; i < max_elements; i++ ) + new_list[i] = ( i < num_elements ) ? list[i] : NULL; + delete[] list; + list = new_list; + } + + void SortedList::Swap( int i, int j ) + { + char *temp = list[i]; + list[i] = list[j]; + list[j] = temp; + } + + int SortedList::inOrder( int p, int q ) const + { + int test; + if( type == sort_alphabetic ) + test = ( strcmp( list[p], list[q] ) <= 0 ); + else + { + int len_p = strlen( list[p] ); + int len_q = strlen( list[q] ); + test = ( len_p < len_q ) || + ( len_p == len_q && strcmp( list[p], list[q] ) <= 0 ); + } + if( ascend ) return test; + return !test; + } + + // This is an insertion sort that operates on subsets of the + // input defined by the step length. + void SortedList::InsertionSort( int start, int size, int step ) + { + for( int i = 0; i + step < size; i += step ) + { + for( int j = i; j >= 0; j -= step ) + { + int p = start + j; + int q = p + step; + if( inOrder( p, q ) ) break; + Swap( p, q ); + } + } + } + + // This is a Shell sort. + void SortedList::Sort() + { + for( int step = num_elements / 2; step > 1; step /= 2 ) + for( int start = 0; start < step; start++ ) + InsertionSort( start, num_elements - start, step ); + InsertionSort( 0, num_elements, 1 ); + sorted = 1; + } + + void SortedList::SetOrder( sort_type type_, int ascend_ ) + { + if( type_ != type || ascend_ != ascend ) + { + type = type_; + ascend = ascend_; + sorted = 0; + } + } + + int getstring( std::istream &in, const char *str ) + { + char ch; + if( str == NULL ) return 1; + while( *str != NullChar ) + { + in >> ch; + if( *str != ch ) return 0; + str++; + } + return 1; + } + + std::istream &skipWhite( std::istream &in ) + { + char c; + while( in.get(c) ) + { + if( !isWhite( c ) ) + { + in.putback(c); + break; + } + } + return in; + } +}; diff --git a/src/nvtt/bc6h/arvo/Char.h b/src/nvtt/bc6h/arvo/Char.h new file mode 100755 index 0000000..2742c1d --- /dev/null +++ b/src/nvtt/bc6h/arvo/Char.h @@ -0,0 +1,245 @@ +/*************************************************************************** +* Char.h * +* * +* Convenient constants, macros, and inline functions for manipulation of * +* characters and strings. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 07/01/93 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1999, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#ifndef __CHAR_INCLUDED__ +#define __CHAR_INCLUDED__ + +#include +#include + +namespace ArvoMath { + + static const char + Apostrophe = '\'' , + Asterisk = '*' , + Atsign = '@' , + Backslash = '\\' , + Bell = '\7' , + Colon = ':' , + Comma = ',' , + Dash = '-' , + DoubleQuote = '"' , + EqualSign = '=' , + Exclamation = '!' , + GreaterThan = '>' , + Hash = '#' , + Lbrack = '[' , + Lcurley = '{' , + LessThan = '<' , + Lparen = '(' , + Minus = '-' , + NewLine = '\n' , + NullChar = '\0' , + Percent = '%' , + Period = '.' , + Pound = '#' , + Plus = '+' , + Rbrack = ']' , + Rcurley = '}' , + Rparen = ')' , + Semicolon = ';' , + Space = ' ' , + Slash = '/' , + Star = '*' , + Tab = '\t' , + Tilde = '~' , + Underscore = '_' ; + + inline int isWhite( char c ) { return c == Space || c == NewLine || c == Tab; } + inline int isUcase( char c ) { return 'A' <= c && c <= 'Z'; } + inline int isLcase( char c ) { return 'a' <= c && c <= 'z'; } + inline int isAlpha( char c ) { return isUcase( c ) || isLcase( c ); } + inline int isDigit( char c ) { return '0' <= c && c <= '9'; } + inline char ToLower( char c ) { return isUcase( c ) ? c + ( 'a' - 'A' ) : c; } + inline char ToUpper( char c ) { return isLcase( c ) ? c + ( 'A' - 'a' ) : c; } + + extern const char *getPath( + const char *str, + char *buff + ); + + extern const char *getFile( + const char *str, + char *buff + ); + + extern int getPrefix( + const char *str, + char *buff + ); + + extern int getSuffix( + const char *str, + char *buff + ); + + extern int isInteger( + const char *str + ); + + extern int hasSuffix( + const char *string, + const char *suffix + ); + + extern int hasPrefix( + const char *string, + const char *prefix + ); + + extern int inString( + char c, + const char *str + ); + + extern int nullString( + const char *str + ); + + extern const char *stripSuffix( // Return NULL if unsuccessful. + const char *string, // The string to truncate. + const char *suffix, // The suffix to remove. + char *buff = NULL // Defaults to internal buffer. + ); + + extern const char* toString( + int n, // An integer to convert to a string. + char *buff = NULL // Defauts to internal buffer. + ); + + extern const char* toString( + float x, // A float to convert to a string. + char *buff = NULL // Defauts to internal buffer. + ); + + extern int getIndex( // The index of the start of a pattern in a string. + const char *pat, // The pattern to look for. + const char *str // The string to search. + ); + + extern int getSubstringAfter( + const char *pat, + const char *str, + char *buff + ); + + extern const char *SubstringAfter( + const char *pat, + const char *str, + char *buff = NULL // Defauts to internal buffer. + ); + + extern const char *metaString( + const char *str, // Make this a string within a string. + char *buff = NULL // Defauts to internal buffer. + ); + + extern const char *stripQuotes( + const char *str, // This is the opposite of metaString. + char *buff = NULL // Defauts to internal buffer. + ); + + extern int getIntFlag( + const char *flags, // List of assignment statements. + const char *flag, // A specific flag to look for. + int &value // The variable to assign the value to. + ); + + extern int getFloatFlag( + const char *flags, // List of assignment statements. + const char *flag, // A specific flag to look for. + float &value // The variable to assign the value to. + ); + + extern int getstring( + std::istream &in, + const char *str + ); + + enum sort_type { + sort_alphabetic, // Standard dictionary ordering. + sort_lexicographic // Sort first by length, then alphabetically. + }; + + class SortedList { + + public: + SortedList( sort_type = sort_alphabetic, int ascending = 1 ); + ~SortedList(); + SortedList &operator<<( const char * ); + int Size() const { return num_elements; } + const char *operator()( int i ); + void Clear(); + void SetOrder( sort_type = sort_alphabetic, int ascending = 1 ); + + private: + void Sort(); + void InsertionSort( int start, int size, int step ); + void Swap( int i, int j ); + void Expand(); + int inOrder( int i, int j ) const; + int num_elements; + int max_elements; + int sorted; + int ascend; + sort_type type; + char **list; + }; + + + inline int Match( const char *s, const char *t ) + { + return s != NULL && + (t != NULL && strcmp( s, t ) == 0); + } + + inline int Match( const char *s, const char *t1, const char *t2 ) + { + return s != NULL && ( + (t1 != NULL && strcmp( s, t1 ) == 0) || + (t2 != NULL && strcmp( s, t2 ) == 0) ); + } + + union long_union_float { + long i; + float f; + }; + + inline long float_as_long( float x ) + { + long_union_float u; + u.f = x; + return u.i; + } + + inline float long_as_float( long i ) + { + long_union_float u; + u.i = i; + return u.f; + } + + extern std::istream &skipWhite( std::istream &in ); +}; +#endif diff --git a/src/nvtt/bc6h/arvo/Complex.cpp b/src/nvtt/bc6h/arvo/Complex.cpp new file mode 100755 index 0000000..468704f --- /dev/null +++ b/src/nvtt/bc6h/arvo/Complex.cpp @@ -0,0 +1,76 @@ +/*************************************************************************** +* Complex.C * +* * +* Complex numbers, complex arithmetic, and functions of a complex * +* variable. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 03/02/2000 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 2000, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#include "Complex.h" +#include "form.h" + +namespace ArvoMath { + const Complex Complex::i( 0.0, 1.0 ); + + std::ostream &operator<<( std::ostream &out, const Complex &z ) + { + out << form( "(%f,%f) ", z.Real(), z.Imag() ); + return out; + } + + Complex cos( const Complex &z ) + { + return Complex( + ::cos( z.Real() ) * ::cosh( z.Imag() ), + -::sin( z.Real() ) * ::sinh( z.Imag() ) + ); + } + + Complex sin( const Complex &z ) + { + return Complex( + ::sin( z.Real() ) * ::cosh( z.Imag() ), + ::cos( z.Real() ) * ::sinh( z.Imag() ) + ); + } + + Complex cosh( const Complex &z ) + { + return Complex( + ::cosh( z.Real() ) * ::cos( z.Imag() ), + ::sinh( z.Real() ) * ::sin( z.Imag() ) + ); + } + + Complex sinh( const Complex &z ) + { + return Complex( + ::sinh( z.Real() ) * ::cos( z.Imag() ), + ::cosh( z.Real() ) * ::sin( z.Imag() ) + ); + } + + Complex log( const Complex &z ) + { + float r = ::sqrt( z.Real() * z.Real() + z.Imag() * z.Imag() ); + float t = ::acos( z.Real() / r ); + if( z.Imag() < 0.0 ) t = 2.0 * 3.1415926 - t; + return Complex( ::log(r), t ); + } +}; diff --git a/src/nvtt/bc6h/arvo/Complex.h b/src/nvtt/bc6h/arvo/Complex.h new file mode 100755 index 0000000..671fd57 --- /dev/null +++ b/src/nvtt/bc6h/arvo/Complex.h @@ -0,0 +1,187 @@ +/*************************************************************************** +* Complex.h * +* * +* Complex numbers, complex arithmetic, and functions of a complex * +* variable. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 03/02/2000 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 2000, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#ifndef __COMPLEX_INCLUDED__ +#define __COMPLEX_INCLUDED__ + +#include +#include + +namespace ArvoMath { + + class Complex { + public: + Complex() { x = 0; y = 0; } + Complex( float a ) { x = a; y = 0; } + Complex( float a, float b ) { x = a; y = b; } + Complex( const Complex &z ) { *this = z; } + float &Real() { return x; } + float &Imag() { return y; } + float Real() const { return x; } + float Imag() const { return y; } + inline Complex &operator=( const Complex &z ); + static const Complex i; + private: + float x; + float y; + }; + + inline Complex &Complex::operator=( const Complex &z ) + { + x = z.Real(); + y = z.Imag(); + return *this; + } + + inline float Real( const Complex &z ) + { + return z.Real(); + } + + inline float Imag( const Complex &z ) + { + return z.Imag(); + } + + inline Complex conj( const Complex &z ) + { + return Complex( z.Real(), -z.Imag() ); + } + + inline double modsqr( const Complex &z ) + { + return z.Real() * z.Real() + z.Imag() * z.Imag(); + } + + inline double modulus( const Complex &z ) + { + return sqrt( z.Real() * z.Real() + z.Imag() * z.Imag() ); + } + + inline double arg( const Complex &z ) + { + float t = acos( z.Real() / modulus(z) ); + if( z.Imag() < 0.0 ) t = 2.0 * 3.1415926 - t; + return t; + } + + inline Complex operator*( const Complex &z, float a ) + { + return Complex( a * z.Real(), a * z.Imag() ); + } + + inline Complex operator*( float a, const Complex &z ) + { + return Complex( a * z.Real(), a * z.Imag() ); + } + + inline Complex operator*( const Complex &z, const Complex &w ) + { + return Complex( + z.Real() * w.Real() - z.Imag() * w.Imag(), + z.Real() * w.Imag() + z.Imag() * w.Real() + ); + } + + inline Complex operator+( const Complex &z, const Complex &w ) + { + return Complex( z.Real() + w.Real(), z.Imag() + w.Imag() ); + } + + inline Complex operator-( const Complex &z, const Complex &w ) + { + return Complex( z.Real() - w.Real(), z.Imag() - w.Imag() ); + } + + inline Complex operator-( const Complex &z ) + { + return Complex( -z.Real(), -z.Imag() ); + } + + inline Complex operator/( const Complex &z, float w ) + { + return Complex( z.Real() / w, z.Imag() / w ); + } + + inline Complex operator/( const Complex &z, const Complex &w ) + { + return ( z * conj(w) ) / modsqr(w); + } + + inline Complex operator/( float a, const Complex &w ) + { + return conj(w) * ( a / modsqr(w) ); + } + + inline Complex &operator+=( Complex &z, const Complex &w ) + { + z.Real() += w.Real(); + z.Imag() += w.Imag(); + return z; + } + + inline Complex &operator*=( Complex &z, const Complex &w ) + { + return z = ( z * w ); + } + + inline Complex &operator-=( Complex &z, const Complex &w ) + { + z.Real() -= w.Real(); + z.Imag() -= w.Imag(); + return z; + } + + inline Complex exp( const Complex &z ) + { + float r = ::exp( z.Real() ); + return Complex( r * cos( z.Imag() ), r * sin( z.Imag() ) ); + } + + inline Complex pow( const Complex &z, int n ) + { + float r = ::pow( modulus( z ), (double)n ); + float t = arg( z ); + return Complex( r * cos( n * t ), r * sin( n * t ) ); + } + + inline Complex polar( float r, float theta ) + { + return Complex( r * cos( theta ), r * sin( theta ) ); + } + + + extern Complex cos ( const Complex &z ); + extern Complex sin ( const Complex &z ); + extern Complex cosh( const Complex &z ); + extern Complex sinh( const Complex &z ); + extern Complex log ( const Complex &z ); + + extern std::ostream &operator<<( + std::ostream &out, + const Complex & + ); +}; +#endif + diff --git a/src/nvtt/bc6h/arvo/Matrix.cpp b/src/nvtt/bc6h/arvo/Matrix.cpp new file mode 100755 index 0000000..d84b7ef --- /dev/null +++ b/src/nvtt/bc6h/arvo/Matrix.cpp @@ -0,0 +1,1201 @@ +/*************************************************************************** +* Matrix.C * +* * +* General Vector and Matrix classes, with all the associated methods. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 08/16/2000 Revamped for CIT tools. * +* arvo 10/31/1994 Combined RowVec & ColVec into Vector. * +* arvo 06/30/1993 Added singular value decomposition class. * +* arvo 06/25/1993 Major revisions. * +* arvo 09/08/1991 Initial implementation. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 2000, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#include +#include +#include +#include "ArvoMath.h" +#include "Vector.h" +#include "Matrix.h" +#include "form.h" + +namespace ArvoMath { + const Matrix Matrix::Null(0); + + /*-------------------------------------------------------------------------* + * * + * C O N S T R U C T O R S * + * * + *-------------------------------------------------------------------------*/ + + // Create a new matrix of the given size. If n_cols is zero (the default), + // it is assumed that the matrix is to be square; that is, n_rows x n_rows. + // The matrix is filled with "value", which defaults to zero. + Matrix::Matrix( int n_rows, int n_cols, float value ) + { + assert( n_rows >= 0 && n_cols >= 0 ); + rows = 0; + cols = 0; + elem = NULL; + SetSize( n_rows, n_cols ); + float *e = elem; + for( register int i = 0; i < rows * cols; i++ ) *e++ = value; + } + + // Copy constructor. + Matrix::Matrix( const Matrix &M ) + { + rows = 0; + cols = 0; + elem = NULL; + SetSize( M.Rows(), M.Cols() ); + register float *e = elem; + register float *m = M.Array(); + for( register int i = 0; i < rows * cols; i++ ) *e++ = *m++; + } + + Matrix::~Matrix() + { + SetSize( 0, 0 ); + } + + /*-------------------------------------------------------------------------* + * * + * M I S C E L L A N E O U S M E T H O D S * + * * + *-------------------------------------------------------------------------*/ + + // Re-shape the matrix. If the number of elements in the new matrix is + // different from the original matrix, the original data is deleted and + // replaced with a new array. If new_cols is zero (the default), it is + // assumed to be the same as new_rows -- i.e. a square matrix. + void Matrix::SetSize( int new_rows, int new_cols ) + { + if( new_cols == 0 ) new_cols = new_rows; + int n = new_rows * new_cols; + if( rows * cols != n ) + { + if( elem != NULL ) delete[] elem; + elem = ( n == 0 ) ? NULL : new float[ n ]; + } + rows = new_rows; + cols = new_cols; + } + + Vector Matrix::GetCol( int j ) const + { + Vector C( rows ); + float *e = elem + j; + float *c = C.Array(); + for( int i = 0; i < rows; i++ ) + { + *c++ = *e; + e += cols; + } + return C; + } + + Vector Matrix::GetRow( int i ) const + { + Vector R( cols ); + float *e = elem + ( i * cols ); + float *r = R.Array(); + for( int j = 0; j < cols; j++ ) *r++ = *e++; + return R; + } + + void Matrix::SetCol( int j, const Vector &C ) + { + assert( rows == C.Size() ); + float *e = elem + j; + float *c = C.Array(); + for( int i = 0; i < rows; i++ ) + { + *e = *c++; + e += cols; + } + } + + void Matrix::SetRow( int i, const Vector &R ) + { + assert( cols == R.Size() ); + float *e = elem + ( i * cols ); + float *r = R.Array(); + for( int j = 0; j < cols; j++ ) *e++ = *r++; + } + + Matrix Matrix::GetBlock( int imin, int imax, int jmin, int jmax ) const + { + if( imax < imin || jmax < jmin ) return Matrix(0,0); + Matrix M( imax - imin + 1, jmax - jmin + 1 ); + for( int i = imin; i <= imax; i++ ) + for( int j = jmin; j <= jmax; j++ ) + { + M( i - imin, j - jmin ) = (*this)( i, j ); + } + return M; + } + + void Matrix::SetBlock( int imin, int imax, int jmin, int jmax, const Matrix &B ) + { + int ni = imax - imin + 1; + int nj = jmax - jmin + 1; + assert( ni == B.Rows() ); + assert( nj == B.Cols() ); + int k = imin * cols + jmin; + for( int i = 0; i < ni; i++ ) + for( int j = 0; j < nj; j++ ) + { + elem[ k + i * cols + j ] = B(i,j); + } + } + + void Matrix::SetBlock( int imin, int imax, int jmin, int jmax, const Vector &V ) + { + int k = imin * cols + jmin; + if( imin == imax ) + { + int nj = jmax - jmin + 1; + assert( nj == V.Size() ); + for( int j = 0; j < nj; j++ ) elem[ k + j ] = V(j); + } + else if( jmin == jmax ) + { + int ni = imax - imin + 1; + assert( ni == V.Size() ); + for( int i = 0; i < ni; i++ ) elem[ k + i * cols ] = V(i); + } + else + { + // This assertion will be false, and will signal an error. + assert( imin == imax || jmin == jmax ); + } + } + + Matrix &Matrix::SwapRows( int i1, int i2 ) + { + float temp; + float *r1 = elem + ( i1 * cols ); + float *r2 = elem + ( i2 * cols ); + for( register int j = 0; j < cols; j++ ) + { + temp = *r1; + *r1 = *r2; + *r2 = temp; + r1++; + r2++; + } + return *this; + } + + Matrix &Matrix::SwapCols( int j1, int j2 ) + { + float temp; + float *c1 = elem + j1; + float *c2 = elem + j2; + for( register int i = 0; i < rows; i++ ) + { + temp = *c1; + *c1 = *c2; + *c2 = temp; + c1 += cols; + c2 += cols; + } + return *this; + } + + /*-------------------------------------------------------------------------* + * * + * A S S I G N M E N T O P E R A T O R S * + * * + *-------------------------------------------------------------------------*/ + Matrix& Matrix::operator=( const Matrix &M ) + { + SetSize( M.Rows(), M.Cols() ); + register float *e = elem; + register float *m = M.Array(); + for( register int i = 0; i < rows * cols; i++ ) *e++ = *m++; + return *this; + } + + Matrix& Matrix::operator=( float s ) + { + register float *e = elem; + for( register int i = 0; i < rows * cols; i++ ) *e++ = s; + return *this; + } + + /*-------------------------------------------------------------------------* + * * + * O P E R A T O R S * + * * + *-------------------------------------------------------------------------*/ + Vector operator*( const Matrix &M, const Vector &A ) + { + // Handle the special case with translation built in. + if( M.Cols() == 4 && M.Rows() == 4 && A.Size() == 3 ) + { + Vector C(3); + C(0) = M(0,0) * A(0) + M(0,1) * A(1) + M(0,2) * A(2) + M(0,3); + C(1) = M(1,0) * A(0) + M(1,1) * A(1) + M(1,2) * A(2) + M(1,3); + C(2) = M(2,0) * A(0) + M(2,1) * A(1) + M(2,2) * A(2) + M(2,3); + return C; + } + assert( M.Cols() == A.Size() ); + Vector C( M.Rows() ); + float *m = M.Array(); + for( int i = 0; i < M.Rows(); i++ ) + { + register float *a = A.Array(); + register double sum = (*m++) * (*a++); + for( register int j = 1; j < M.Cols(); j++ ) + sum += (*m++) * (*a++); + C(i) = sum; + } + return C; + } + + Vector operator*( const Vector &A, const Matrix &M ) + { + assert( A.Size() == M.Rows() ); + Vector C( M.Cols() ); + for( register int j = 0; j < M.Cols(); j++ ) + { + register double sum = 0.0; + register float *a = A.Array(); + for( register int i = 0; i < M.Rows(); i++ ) + sum += (*a++) * M(i,j); + C(j) = sum; + } + return C; + } + + Vector& operator*=( Vector &A, const Matrix &M ) + { + // Handle the special case with translation built in. + if( M.Cols() == 4 && M.Rows() == 4 && A.Size() == 3 ) + { + float x = M(0,0) * A(0) + M(0,1) * A(1) + M(0,2) * A(2) + M(0,3); + float y = M(1,0) * A(0) + M(1,1) * A(1) + M(1,2) * A(2) + M(1,3); + float z = M(2,0) * A(0) + M(2,1) * A(1) + M(2,2) * A(2) + M(2,3); + A(0) = x; + A(1) = y; + A(2) = z; + return A; + } + assert( M.Cols() == A.Size() ); + Vector C( M.Rows() ); + float *m = M.Array(); + for( register int i = 0; i < M.Rows(); i++ ) + { + double sum = 0.0; + for( register int j = 0; j < A.Size(); j++ ) + sum += (*m++) * A(j); + C(i) = sum; + } + return A = C; + } + + Matrix& operator*=( Matrix &M, float s ) + { + register float *m = M.Array(); + for( register int i = 0; i < M.Rows() * M.Cols(); i++ ) *m++ *= s; + return M; + } + + Matrix& operator/=( Matrix &M, float s ) + { + assert( s != 0.0 ); + register float *m = M.Array(); + for( register int i = 0; i < M.Rows() * M.Cols(); i++ ) *m++ /= s; + return M; + } + + Matrix operator+( const Matrix &A, const Matrix &B ) + { + assert( A.Rows() == B.Rows() ); + assert( A.Cols() == B.Cols() ); + Matrix C( A.Rows(), A.Cols() ); + register float *a = A.Array(); + register float *b = B.Array(); + register float *c = C.Array(); + for( register int i = 0; i < A.Rows() * A.Cols(); i++ ) (*c++) = (*a++) + (*b++); + return C; + } + + Matrix operator-( const Matrix &A, const Matrix &B ) + { + assert( A.Rows() == B.Rows() ); + assert( A.Cols() == B.Cols() ); + Matrix C( A.Rows(), A.Cols() ); + register float *a = A.Array(); + register float *b = B.Array(); + register float *c = C.Array(); + for( register int i = 0; i < A.Rows() * A.Cols(); i++ ) (*c++) = (*a++) - (*b++); + return C; + } + + Matrix operator-( const Matrix &A ) + { + Matrix B( A.Cols(), A.Rows() ); + register float *a = A.Array(); + register float *b = B.Array(); + for( register int i = 0; i < A.Rows() * A.Cols(); i++ ) + { + *b++ = -(*a++); + } + return B; + } + + Matrix& operator+=( Matrix &A, const Matrix &B ) + { + assert( A.Rows() == B.Rows() ); + assert( A.Cols() == B.Cols() ); + register float *a = A.Array(); + register float *b = B.Array(); + for( register int i = 0; i < A.Rows() * A.Cols(); i++ ) (*a++) += (*b++); + return A; + } + + Matrix operator*( const Matrix &A, const Matrix &B ) + { + assert( A.Cols() == B.Rows() ); + Matrix M( A.Rows(), B.Cols() ); + for( register int i = 0; i < A.Rows(); i++ ) + for( register int j = 0; j < B.Cols(); j++ ) + { + double sum = 0.0; + for( register int k = 0; k < A.Cols(); k++ ) sum += A(i,k) * B(k,j); + M(i,j) = sum; + } + return M; + } + + Matrix operator*( float s, const Matrix &A ) + { + Matrix B( A.Cols(), A.Rows() ); + register float *a = A.Array(); + register float *b = B.Array(); + for( register int i = 0; i < A.Rows() * A.Cols(); i++ ) + { + *b++ = s * (*a++); + } + return B; + } + + Matrix operator*( const Matrix &A, float s ) + { + Matrix B( A.Cols(), A.Rows() ); + register float *a = A.Array(); + register float *b = B.Array(); + for( register int i = 0; i < A.Rows() * A.Cols(); i++ ) + { + *b++ = s * (*a++); + } + return B; + } + + Matrix operator/( const Matrix &A, float s ) + { + assert( s != 0.0 ); + Matrix B( A.Cols(), A.Rows() ); + register float *a = A.Array(); + register float *b = B.Array(); + for( register int i = 0; i < A.Rows() * A.Cols(); i++ ) + { + *b++ = (*a++) / s; + } + return B; + } + + Matrix& operator*=( Matrix &A, const Matrix &B ) + { + assert( A.Cols() == B.Rows() ); + Vector R( B.Cols() ); + for( register int i = 0; i < A.Rows(); i++ ) + { + for( register int j = 0; j < B.Cols(); j++ ) // Compute the ith row of A * B. + { + double sum = A(i,0) * B(0,j); + for( register int k = 1; k < A.Cols(); k++ ) sum += A(i,k) * B(k,j); + R(j) = sum; + } + // Copy the new i'th row back into A. + for( register int k = 0; k < A.Cols(); k++ ) A(i,k) = R(k); + } + return A; + } + + /*-------------------------------------------------------------------------* + * * + * M I S C E L L A N E O U S F U N C T I O N S * + * * + *-------------------------------------------------------------------------*/ + Matrix Transp( const Matrix &M ) + { + Matrix T( M.Cols(), M.Rows() ); + register float *m = M.Array(); + for( register int i = 0; i < M.Rows(); i++ ) + for( register int j = 0; j < M.Cols(); j++ ) T(j,i) = *m++; + return T; + } + + // Computes A * Transp(A). + Matrix AATransp( const Matrix &A ) + { + int n = A.Rows(); + Matrix B( n, n ); + for( register int i = 0; i < n; i++ ) + for( register int j = 0; j < n; j++ ) + { + double sum = 0.0; + for( register int k = 0; k < A.Cols(); k++ ) + sum += A(i,k) * A(j,k); + B(i,j) = sum; + } + return B; + } + + // Computes Transp(A) * A. + Matrix ATranspA( const Matrix &A ) + { + int n = A.Cols(); + Matrix B( n, n ); + for( register int i = 0; i < n; i++ ) + for( register int j = 0; j < n; j++ ) + { + double sum = 0.0; + for( register int k = 0; k < A.Rows(); k++ ) + sum += A(k,i) * A(k,j); + B(i,j) = sum; + } + return B; + } + + // Computes the outer product of the vectors A and B. + Matrix Outer( const Vector &A, const Vector &B ) + { + Matrix M( A.Size(), B.Size() ); + for( register int i = 0; i < A.Size(); i++ ) + { + float c = A(i); + for( register int j = 0; j < B.Size(); j++ ) M(i,j) = c * B(j); + } + return M; + } + + // Computes the L1-norm of the matrix A, which is the maximum absolute + // row sum. + double OneNorm( const Matrix &A ) + { + double norm = 0.0; + for( register int i = 0; i < A.Rows(); i++ ) + { + double sum = 0.0; + for( register int j = 0; j < A.Cols(); j++ ) sum += Abs( A(i,j) ); + if( sum > norm ) norm = sum; + } + return norm; + } + + // Computes the L-infinity norm of the matrix A, which is the maximum + // absolute column sum. + double SupNorm( const Matrix &A ) + { + double norm = 0.0; + for( register int j = 0; j < A.Cols(); j++ ) + { + double sum = 0.0; + for( register int i = 0; i < A.Rows(); i++ ) sum += Abs( A(i,j) ); + if( sum > norm ) norm = sum; + } + return norm; + } + + // Returns the square matrix with the elements of the vector d along + // its diagonal. + Matrix Diag( const Vector &d ) + { + Matrix D( d.Size() ); + for( register int i = 0; i < d.Size(); i++ ) D(i,i) = d(i); + return D; + } + + // Returns the 3 x 3 diagonal matrix with x, y, and z as its diagonal + // elements. + Matrix Diag( float x, float y, float z ) + { + Matrix D(3,3); + D(0,0) = x; + D(1,1) = y; + D(2,2) = z; + return D; + } + + // Returns the vector consisting of the diagonal elements of the + // matrix M, which need not be square. + Vector Diag( const Matrix &M ) + { + int m = Min( M.Rows(), M.Cols() ); + Vector V(m); + for( register int i = 0; i < m; i++ ) V(i) = M(i,i); + return V; + } + + // Returns the n x n identity matrix. + Matrix Ident( int n ) + { + Matrix I( n ); + for( register int i = 0; i < n; i++ ) I(i,i) = 1.0; + return I; + } + + // Determines whether the matrix M is "Null" -- i.e. has zero rows + // or columns. + int Null( const Matrix &M ) + { + return M.Rows() == 0 || M.Cols() == 0; + } + + int Square( const Matrix &M ) + { + return M.Rows() == M.Cols(); + } + + // Convert a "vector-shaped" matrix to a vector. That is, represent a + // matrix with a single row or a single column as a vector. + Vector ToVector( const Matrix &M ) + { + if( M.Rows() == 1 ) + { + Vector V( M.Cols() ); + for( int j = 0; j < M.Cols(); j++ ) V(j) = M(0,j); + return V; + } + else if( M.Cols() == 1 ) + { + Vector V( M.Rows() ); + for( int i = 0; i < M.Rows(); i++ ) V(i) = M(i,0); + return V; + } + else + { + // Report an error. + assert( M.Rows() == 1 || M.Cols() == 1 ); + } + return Vector(); + } + + std::ostream &operator<<( std::ostream &out, const Matrix &M ) + { + if( M.Rows() == 0 || M.Cols() == 0 ) + { + out << "NULL" << std::endl; + } + else for( register int i = 0; i < M.Rows(); i++ ) + { + out << form( "%3d: ", i ); + for( register int j = 0; j < M.Cols(); j++ ) + out << form( " %10.5g", M(i,j) ); + out << std::endl; + } + return out; + } + + /*-------------------------------------------------------------------------* + * R O T A T I O N * + * * + * Builds a 3x3 modeling matrix that performs a rotation about an * + * arbitrary axis. The rotation is right-handed about this axis and * + * "angle" is taken to be in radians. The only error that can occur is * + * when "axis" is the zero-vector. * + * * + *-------------------------------------------------------------------------*/ + Matrix Rotation( const Vector &Axis, float angle ) + { + // Compute a unit quaternion (a,b,c,d) that performs the rotation. + + float t = TwoNormSqr( Axis ); + if( t == 0.0 ) return Matrix(3,3); + t = sin( angle * 0.5 ) / sqrt( t ); + + // Fill in the entries of the quaternion. + + float a = cos( angle * 0.5 ); + float b = t * Axis(0); + float c = t * Axis(1); + float d = t * Axis(2); + + // Compute all the double products of a, b, c, and d, except a * a. + + float bb = b * b; + float cc = c * c; + float dd = d * d; + float ab = a * b; + float ac = a * c; + float ad = a * d; + float bc = b * c; + float bd = b * d; + float cd = c * d; + + // Fill in the entries of the rotation matrix. + + Matrix R(3,3); + + R(0,0) = 1.0 - 2.0 * ( cc + dd ); + R(0,1) = 2.0 * ( bc + ad ); + R(0,2) = 2.0 * ( bd - ac ); + + R(1,0) = 2.0 * ( bc - ad ); + R(1,1) = 1.0 - 2.0 * ( bb + dd ); + R(1,2) = 2.0 * ( cd + ab ); + + R(2,0) = 2.0 * ( bd + ac ); + R(2,1) = 2.0 * ( cd - ab ); + R(2,2) = 1.0 - 2.0 * ( bb + cc ); + + return R; + } + + /*-------------------------------------------------------------------------* + * R O T A T I O N * + * * + * Builds a 4x4 modeling matrix that performs a rotation about an * + * arbitrary axis through an arbitrary point. The rotation is * + * right-handed about this axis and "angle" is taken to be in radians. * + * * + *-------------------------------------------------------------------------*/ + Matrix Rotation( const Vector &Axis, const Vector &Origin, float angle ) + { + Matrix R = Rotation( Axis, angle ); // A simple 3x3 rotation. + Matrix M = Ident(4); // A 4x4 including translation. + + // Compute the last row of the matrix (the translation) using the + // 3x3 rotation matrix. We need to compute the last row of the 4x4 + // matrix that performs Translate( -Origin ) * Rotate * Translate( Origin ). + // + // | I p | | R 0 | | I -p | | R p - Rp | + // | | | | | | = | | + // | 0 1 | | 0 1 | | 0 1 | | 0 1 | + // + // So, the desired column is p - R p. + + Vector V( Origin - R * Origin ); + for( int i = 0; i < 3; i++ ) + { + M(i,3) = V(i); + for( int j = 0; j < 3; j++ ) + M(i,j) = R(i,j); + } + return M; + } + + /*-------------------------------------------------------------------------* + * X R O T A T I O N * + * * + * Builds a 3x3 modeling matrix that performs a rotation about the X-axis. * + * * + *-------------------------------------------------------------------------*/ + Matrix Xrotation( float angle ) + { + Matrix M = Ident(3); + float c = cos( angle ); + float s = sin( angle ); + M(1,1) = c; M(1,2) = -s; + M(2,1) = s; M(2,2) = c; + return M; + } + + /*-------------------------------------------------------------------------* + * Y R O T A T I O N * + * * + * Builds a 3x3 modeling matrix that performs a rotation about the Y-axis. * + * * + *-------------------------------------------------------------------------*/ + Matrix Yrotation( float angle ) + { + Matrix M = Ident(3); + float c = cos( angle ); + float s = sin( angle ); + M(0,0) = c; M(0,2) = -s; + M(2,0) = s; M(2,2) = c; + return M; + } + + /*-------------------------------------------------------------------------* + * Z R O T A T I O N * + * * + * Builds a 3x3 modeling matrix that performs a rotation about the Z-axis. * + * * + *-------------------------------------------------------------------------*/ + Matrix Zrotation( float angle ) + { + Matrix M = Ident(3); + float c = cos( angle ); + float s = sin( angle ); + M(0,0) = c; M(0,1) = -s; + M(1,0) = s; M(1,1) = c; + return M; + } + + /*-------------------------------------------------------------------------* + * H O U S E H O L D E R * + * * + * Returns the Householder reflection matrix that reflects through the * + * plane orthogonal to V. The vector V is not assumed to be normalized. * + * * + *-------------------------------------------------------------------------*/ + Matrix Householder( const Vector &V ) + { + Matrix I = Ident( V.Size() ); + float c = 2.0 / ( V * V ); + return I - Outer( c * V, V ); + } + + /*=========================================================================* + * R O T A T I O N Author: Jim Arvo, 1991 * + * * + * This routine maps three values (x1, x2, x3) in the range [0,1] into * + * a 3x3 rotation matrix, M. Uniformly distributed random variables * + * x1, x2, and x3 create uniformly distributed random rotation matrices. * + * To create small uniformly distributed "perturbations", supply * + * samples in the following ranges * + * * + * x1 in [ 0, d ] * + * x2 in [ 0, 1 ] * + * x3 in [ 0, d ] * + * * + * where 0 < d < 1 controls the size of the perturbation. Any of the * + * random variables may be stratified (or "jittered") for a slightly more * + * even distribution. * + * * + *=========================================================================*/ + Matrix Rotation( float x1, float x2, float x3 ) + { + Matrix M(3,3); + float theta = x1 * TwoPi; // Rotation about the pole (Z). + float phi = x2 * TwoPi; // For direction of pole deflection. + float z = x3 * 2.0; // For magnitude of pole deflection. + + // Compute a vector V used for distributing points over the sphere + // via the reflection I - V Transpose(V). This formulation of V + // will guarantee that if x1 and x2 are uniformly distributed, + // the reflected points will be uniform on the sphere. Note that V + // has length sqrt(2) to eliminate the 2 in the Householder matrix. + + float r = sqrt( z ); + float Vx = sin( phi ) * r; + float Vy = cos( phi ) * r; + float Vz = sqrt( 2.0 - z ); + + // Compute the row vector S = Transpose(V) * R, where R is a simple + // rotation by theta about the z-axis. No need to compute Sz since + // it's just Vz. + + float st = sin( theta ); + float ct = cos( theta ); + float Sx = Vx * ct - Vy * st; + float Sy = Vx * st + Vy * ct; + + // Construct the rotation matrix ( V Transpose(V) - I ) R, which + // is equivalent to V S - R. + + M(0,0) = Vx * Sx - ct; + M(0,1) = Vx * Sy - st; + M(0,2) = Vx * Vz; + + M(1,0) = Vy * Sx + st; + M(1,1) = Vy * Sy - ct; + M(1,2) = Vy * Vz; + + M(2,0) = Vz * Sx; + M(2,1) = Vz * Sy; + M(2,2) = 1.0 - z; // This equals Vz * Vz - 1.0 + + return M; + } + + /*-------------------------------------------------------------------------* + * P A R T I A L P I V O T * + * * + * Look for the element with the largest magnitude on or below the * + * diagonal in column "col" of the matrix A. Bring this element to the * + * diagonal by a row interchange. Perform the same row interchange on b. * + * * + *-------------------------------------------------------------------------*/ + static int PartialPivot( int col, Matrix &A, Vector &b ) + { + int n = A.Cols(); + float a_max = Abs( A( col, col ) ); + int i_max = col; + for( int i = col + 1; i < n; i++ ) + { + float temp = Abs( A( i, col ) ); + if( temp > a_max ) + { + a_max = temp; + i_max = i; + } + } + if( a_max == 0.0 ) return 0; + if( i_max != col ) + { + A.SwapRows( col, i_max ); + b.Swap ( col, i_max ); + } + return 1; + } + + /*-------------------------------------------------------------------------* + * G A U S S I A N E L I M I N A T I O N * + * * + * Solves the linear system A x = b using Gaussian elimination, with or * + * without partial pivoting. * + * * + *-------------------------------------------------------------------------*/ + int GaussElimination( const Matrix &A, const Vector &b, Vector &x, pivot_type pivot ) + { + assert( Square( A ) ); + assert( A.Rows() == b.Size() ); + Matrix B( A ); + Vector c( b ); + x.SetSize( A.Cols() ); + int m = B.Rows(); + register int i, j, k; + + // Perform Gaussian elimination on the copies, B and c. + + for( i = 0; i < m; i++ ) + { + if( pivot == pivot_partial ) PartialPivot( i, B, c ); + + for( j = i + 1; j < m; j++ ) + { + double scale = -B(j,i) / B(i,i); + for( k = i; k < m; k++ ) + B(j,k) += scale * B(i,k); + B(j,i) = 0.0; + c(j) += scale * c(i); + } + } + + // Now solve by back substitution. + + for( i = m - 1; i >= 0; i-- ) + { + double a = 0.0; + for( j = i + 1; j < m; j++ ) a += B(i,j) * x(j); + x(i) = ( c(i) - a ) / B(i,i); + } + + return 1; + } + + /*-------------------------------------------------------------------------* + * L E A S T S Q U A R E S * + * * + * Solves the normal equations associated with the system A x = b, which * + * are given by Transp(A) A x = Transp(A) b. * + * * + *-------------------------------------------------------------------------*/ + int LeastSquares( const Matrix &A, const Vector &b, Vector &x ) + { + // + // Set up and solve the normal equations Transp(A) A x = Transp(A) b. + // Note that Transp(A) * b is computed here as b * A. + // + GaussElimination( ATranspA(A), b * A, x ); + return 1; + } + + /*-------------------------------------------------------------------------* + * D E T E R M I N A N T * + * * + * Computes the determinant of the n by n matrix M using Householder * + * transformations. * + * * + *-------------------------------------------------------------------------*/ + double Determinant( const Matrix &M ) + { + static const float MachEps = MachineEpsilon(); + assert( Square(M) ); + + double dot; + int k; + Matrix A = M; // Make a copy that we can destroy. + double det = 1.0; // Multiply diagonal elements as they are generated. + int sign = 1; // Keep track of sign (each reflection has det -1). + int n = M.Cols(); + + for( int i = 0; i < n - 1; i++ ) + { + // Compute the 2-norm of the first column of the (n-i)x(n-i) submatrix. + + dot = 0.0; + for( k = i; k < n; k++ ) dot += Sqr( A(k,i) ); + + double Xnorm = sqrt( dot ); + if( Xnorm == 0.0 ) return 0.0; + + // This norm is another diagonal element of the upper triangular + // matrix, so we multiply it into the running product for det. + + det *= Xnorm; + + // If X is already of the right form we must not perform the + // processing because V will be zero. + + float x1 = Abs( A(i,i) ); + float diff = Abs( Xnorm - x1 ); + if( diff < MachEps * Max( Xnorm, x1 ) ) continue; // This column is okay as is. + + // Each Householder transformation has a determinant of -1, + // so we must keep track of how many we apply. + + sign *= -1; + + // Compute the V vector, which will define the Householder + // transformation via H = I - V transp(V). Leave it in the + // i'th column of A. V = sqrt(2) * Normalized( X - ( Xnorm, 0, 0,... ) ). + + float scale = 1.0 / sqrt( Xnorm * Abs( A(i,i) - Xnorm ) ); // sqrt(2) / || p || + A(i,i) = ( A(i,i) - Xnorm ) * scale; + for( k = i + 1; k < n; k++ ) A(k,i) *= scale; + + // Now apply the transformation I - V Transp(V) to all the remaining columns, + // except for the first row. + + for( int j = i + 1; j < n; j++ ) + { + // Compute Y dot V. + + dot = 0.0; + for( k = i; k < n; k++ ) dot += A(k,i) * A(k,j); + + // Subtract V ( V dot A(*,j) ) from A(*,j), ignoring the first row. + + for( k = i + 1; k < n; k++ ) A(k,j) -= A(k,i) * dot; + + } // for j + + } // for i + + // Now multiply in the very last element of the matrix and + // the accumulated sign. + + return det * A(n-1,n-1) * sign; + } + + /*-------------------------------------------------------------------------* + * C O F A C T O R * + * * + * Computes the (i,j) cofactor of the n by n matrix M. * + * * + *-------------------------------------------------------------------------*/ + double Matrix::Cofactor( int omit_i, int omit_j ) const + { + assert( Square( *this ) ); + assert( omit_i >= 0 && omit_j >= 0 ); + assert( omit_i < Rows() ); + assert( omit_j < Cols() ); + + // Create a new matrix that is smaller by one in both dimensions and + // copy the old matrix into it, omitting the specified row and column. + + Matrix A( Rows() - 1, Cols() - 1 ); + for( int i = 0; i < Rows() - 1; i++ ) + { + int ii = ( i < omit_i ) ? i : i + 1; + for( int j = 0; j < Cols() - 1; j++ ) + { + int jj = ( j < omit_j ) ? j : j + 1; + A( i, j ) = (*this)(ii,jj); + } + } + + // Return the determinant of the smaller matrix. + + return Determinant( A ); + } + + /*-------------------------------------------------------------------------* + * A D J O I N T * + * * + * Computes the adjoint of a matrix. * + * * + *-------------------------------------------------------------------------*/ + Matrix Adjoint( const Matrix &M ) + { + double det; + return Adjoint( M, det ); // Discard the determinant. + } + + Matrix Adjoint( const Matrix &M, double &det ) + { + int n = M.Rows(); + det = 0.0; + Matrix A( n, n ); + assert( Square(M) ); + if( n == 3 ) + { + A(0,0) = M(1,1) * M(2,2) - M(1,2) * M(2,1); + A(0,1) = M(1,2) * M(2,0) - M(1,0) * M(2,2); + A(0,2) = M(1,0) * M(2,1) - M(1,1) * M(2,0); + + A(1,0) = M(0,2) * M(2,1) - M(0,1) * M(2,2); + A(1,1) = M(0,0) * M(2,2) - M(0,2) * M(2,0); + A(1,2) = M(0,1) * M(2,0) - M(0,0) * M(2,1); + + A(2,0) = M(0,1) * M(1,2) - M(0,2) * M(1,1); + A(2,1) = M(0,2) * M(1,0) - M(0,0) * M(1,2); + A(2,2) = M(0,0) * M(1,1) - M(0,1) * M(1,0); + + det = A(0,0) * M(0,0) + A(1,0) * M(1,0) + A(2,0) * M(2,0); + } + else + { + for( register int i = 0; i < n; i++ ) + { + for( register int j = 0; j < n; j++ ) + { + if( Odd( i + j ) ) + A(i,j) = -M.Cofactor(i,j); + else A(i,j) = M.Cofactor(i,j); + } + det += M(i,0) * A(i,0); + } + } + return A; + } + + /*-------------------------------------------------------------------------* + * I N V E R S E * + * * + * Computes the inverse of a square matrix. * + * * + *-------------------------------------------------------------------------*/ + Matrix Inverse( const Matrix &M ) + { + assert( Square( M ) ); + int n = M.Cols(); + Matrix Inv( n, n ); + Vector b( n ), x( n ); + + for( int i = 0; i < n; i++ ) + { + if( i > 0 ) b( i - 1 ) = 0.0; + b(i) = 1.0; + GaussElimination( M, b, x ); + Inv.SetCol( i, x ); + } + return Inv; + } + + /*-------------------------------------------------------------------------* + * T R A C E * + * * + * Computes the trace of a square matrix. * + * * + *-------------------------------------------------------------------------*/ + extern double Trace( const Matrix &M ) + { + assert( Square(M) ); + double trace = M(0,0); + for( int i = 1; i < M.Cols(); i++ ) trace += M(i,i); + return trace; + } +}; + + + +/* + +C +C Subroutine GAUSS solves the the system Ax = b by using Gaussian elimination. +C + +SUBROUTINE GAUSS( A, B, X, LDA, N, IFLAG ) +REAL A( LDA, N ), B( N ), X( N ) + +DO 300 I = 1 , N - 1 +I2 = I +CALL PIVOT( A, B, LDA, N, I2, IFLAG ) +IF ( IFLAG .LT. 0 ) RETURN +DO 200 J = I + 1 , N +TEMP = A( J , I ) / A( I , I ) +A( J , I ) = 0.0 +B( J ) = B( J ) - TEMP * B( I ) +DO 100 K = I + 1 , N +A( J , K ) = A( J , K ) - TEMP * A( I , K ) +100 CONTINUE +200 CONTINUE +300 CONTINUE + +X( N ) = B( N ) / A( N , N ) +DO 500 I = N - 1 , 1 , -1 +TEMP = 0.0 +DO 400 J = I + 1 , N +TEMP = TEMP + A( I , J ) * X( J ) +400 CONTINUE +X( I ) = ( B( I ) - TEMP ) / A( I , I ) +500 CONTINUE + +RETURN +END + + + +SUBROUTINE PIVOT( A, B, LDA, N, J, IFLAG ) +REAL A( LDA, N ), B( N ), AMAX, TEMP +DATA TOL / 1.0E-6 / + +IFLAG = -1 +IF ( J .GT. N ) RETURN +IF ( J .EQ. N .AND. ABS( A(N,N) ) .LT. TOL ) RETURN +IF ( J .EQ. N ) GO TO 40 + +AMAX = ABS( A( J , J ) ) +INDEX = J +10 DO 20 I = J + 1 , N +IF ( ABS( A( I , J ) ) .LE. AMAX ) GO TO 20 +AMAX = ABS( A( I , J ) ) +INDEX = I +20 CONTINUE + +IF ( AMAX .LT. TOL ) RETURN + +TEMP = B( J ) +B( J ) = B( INDEX ) +B( INDEX ) = TEMP + +DO 30 K = 1 , N +TEMP = A( J , K ) +A( J , K ) = A( INDEX , K ) +A( INDEX , K ) = TEMP +30 CONTINUE + +40 IFLAG = 1 +RETURN +END + + +*/ + + + + + diff --git a/src/nvtt/bc6h/arvo/Matrix.h b/src/nvtt/bc6h/arvo/Matrix.h new file mode 100755 index 0000000..1832c8f --- /dev/null +++ b/src/nvtt/bc6h/arvo/Matrix.h @@ -0,0 +1,142 @@ +/*************************************************************************** +* Matrix.h * +* * +* General Vector and Matrix classes, with all the associated methods. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 08/16/2000 Revamped for CIT tools. * +* arvo 10/31/1994 Combined RowVec & ColVec into Vector. * +* arvo 06/30/1993 Added singular value decomposition class. * +* arvo 06/25/1993 Major revisions. * +* arvo 09/08/1991 Initial implementation. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 2000, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#ifndef __MATRIX_INCLUDED__ +#define __MATRIX_INCLUDED__ + +#include +#include "Vector.h" + +namespace ArvoMath { + + class Matrix { + public: + Matrix( const Matrix & ); + Matrix( int num_rows = 0, int num_cols = 0, float value = 0.0 ); + ~Matrix(); + Matrix &operator=( const Matrix &M ); + Matrix &operator=( float s ); + Vector GetCol( int col ) const; + Vector GetRow( int row ) const; + void SetCol( int col, const Vector & ); + void SetRow( int row, const Vector & ); + Matrix GetBlock( int imin, int imax, int jmin, int jmax ) const; + void SetBlock( int imin, int imax, int jmin, int jmax, const Matrix & ); + void SetBlock( int imin, int imax, int jmin, int jmax, const Vector & ); + Matrix &SwapRows( int i1, int i2 ); + Matrix &SwapCols( int j1, int j2 ); + void SetSize( int rows, int cols = 0 ); + double Cofactor( int i, int j ) const; + static const Matrix Null; + + public: // Inlined functions. + inline float operator()( int i, int j ) const { return elem[ i * cols + j ]; } + inline float &operator()( int i, int j ) { return elem[ i * cols + j ]; } + inline int Rows () const { return rows; } + inline int Cols () const { return cols; } + inline float *Array () const { return elem; } + + private: + int rows; // Number of rows in the matrix. + int cols; // Number of columns in the matrix. + float *elem; // Pointer to the actual data. + }; + + + extern Vector operator * ( const Matrix &, const Vector & ); + extern Vector operator * ( const Vector &, const Matrix & ); + extern Vector& operator *= ( Vector &, const Matrix & ); + extern Matrix Outer ( const Vector &, const Vector & ); // Outer product. + extern Matrix operator + ( const Matrix &, const Matrix & ); + extern Matrix operator - ( const Matrix & ); + extern Matrix operator - ( const Matrix &, const Matrix & ); + extern Matrix operator * ( const Matrix &, const Matrix & ); + extern Matrix operator * ( const Matrix &, float ); + extern Matrix operator * ( float , const Matrix & ); + extern Matrix operator / ( const Matrix &, float ); + extern Matrix& operator += ( Matrix &, const Matrix & ); + extern Matrix& operator *= ( Matrix &, float ); + extern Matrix& operator *= ( Matrix &, const Matrix & ); + extern Matrix& operator /= ( Matrix &, float ); + extern Matrix Ident ( int n ); + extern Matrix Householder ( const Vector & ); + extern Matrix Rotation ( const Vector &Axis, float angle ); + extern Matrix Rotation ( const Vector &Axis, const Vector &Origin, float angle ); + extern Matrix Rotation ( float, float, float ); // For random 3D rotations. + extern Matrix Xrotation ( float ); + extern Matrix Yrotation ( float ); + extern Matrix Zrotation ( float ); + extern Matrix Diag ( const Vector & ); + extern Vector Diag ( const Matrix & ); + extern Matrix Diag ( float, float, float ); + extern Matrix Adjoint ( const Matrix & ); + extern Matrix Adjoint ( const Matrix &, double &det ); + extern Matrix AATransp ( const Matrix & ); + extern Matrix ATranspA ( const Matrix & ); + extern double OneNorm ( const Matrix & ); + extern double SupNorm ( const Matrix & ); + extern double Determinant ( const Matrix & ); + extern double Trace ( const Matrix & ); + extern Matrix Transp ( const Matrix & ); + extern Matrix Inverse ( const Matrix & ); + extern int Null ( const Matrix & ); + extern int Square ( const Matrix & ); + extern Vector ToVector ( const Matrix & ); // Only for vector-shaped matrices. + + enum pivot_type { + pivot_off, + pivot_partial, + pivot_total + }; + + extern int GaussElimination( + const Matrix &A, + const Vector &b, // This is the right-hand side. + Vector &x, // This is the matrix we are solving for. + pivot_type = pivot_off + ); + + extern int LeastSquares( + const Matrix &A, + const Vector &b, + Vector &x + ); + + extern int WeightedLeastSquares( + const Matrix &A, + const Vector &b, + const Vector &w, + Vector &x + ); + + std::ostream &operator<<( + std::ostream &out, + const Matrix & + ); +}; + +#endif diff --git a/src/nvtt/bc6h/arvo/Perm.cpp b/src/nvtt/bc6h/arvo/Perm.cpp new file mode 100755 index 0000000..87e98e3 --- /dev/null +++ b/src/nvtt/bc6h/arvo/Perm.cpp @@ -0,0 +1,503 @@ +/*************************************************************************** +* Perm.C * +* * +* This file defines permutation class: that is, a class for creating and * +* manipulating finite sequences of distinct integers. The main feature * +* of the class is the "++" operator that can be used to step through all * +* N! permutations of a sequence of N integers. As the set of permutations * +* forms a multiplicative group, a multiplication operator and an * +* exponentiation operator are also defined. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 07/01/93 Added the Partition class. * +* arvo 03/23/93 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1999, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#include +#include +#include "Perm.h" +#include "ArvoMath.h" +#include "Char.h" + +namespace ArvoMath { + + /*************************************************************************** + * + * L O C A L F U N C T I O N S + * + ***************************************************************************/ + + static void Reverse( int *p, int n ) + { + int k = n >> 1; + int m = n - 1; + for( int i = 0; i < k; i++ ) Swap( p[i], p[m-i] ); + } + + static void Error( char *msg ) + { + fprintf( stderr, "ERROR: Perm, %s.\n", msg ); + } + + /*************************************************************************** + ** + ** M E M B E R F U N C T I O N S + ** + ***************************************************************************/ + + Perm::Perm( int Left, int Right ) + { + a = ( Left < Right ) ? Left : Right; + b = ( Left > Right ) ? Left : Right; + p = new int[ Size() ]; + Reset( *this ); + } + + Perm::Perm( const Perm &Q ) + { + a = Q.Min(); + b = Q.Max(); + p = new int[ Q.Size() ]; + for( int i = 0; i < Size(); i++ ) p[i] = Q[i]; + } + + Perm::Perm( const char *str ) + { + (*this) = str; + } + + Perm &Perm::operator=( const char *str ) + { + int k, m = 0, n = 0; + char dig[10]; + char c; + if( p != NULL ) delete[] p; + p = new int[ strlen(str)/2 + 1 ]; + for(;;) + { + c = *str++; + if( isDigit(c) ) dig[m++] = c; + else if( m > 0 ) + { + dig[m] = NullChar; + sscanf( dig, "%d", &k ); + if( n == 0 ) a = k; else if( k < a ) a = k; + if( n == 0 ) b = k; else if( k > b ) b = k; + p[n++] = k; + m = 0; + } + if( c == NullChar ) break; + } + for( int i = 0; i < n; i++ ) + { + int N = i + a; + int okay = 0; + for( int j = 0; j < n; j++ ) + if( p[j] == N ) { okay = 1; break; } + if( !okay ) + { + Error( "string is not a valid permutation" ); + return *this; + } + } + return *this; + } + + void Perm::Get( char *str ) const + { + for( int i = 0; i < Size(); i++ ) + str += sprintf( str, "%d ", p[i] ); + *str = NullChar; + } + + int Perm::Next() + { + int i, m, k = 0; + int N, M = 0; + + // Look for the first element of p that is larger than its successor. + // If no such element exists, we are done. + + M = p[0]; // M is always the "previous" value. + for( i = 1; i < Size(); i++ ) // Now start with second element. + { + if( p[i] > M ) { k = i; break; } + M = p[i]; + } + if( k == 0 ) return 0; // Already in descending order. + m = k - 1; + + // Find the largest entry before k that is less than p[k]. + // One exists because p[k] is bigger than M, i.e. p[k-1]. + + N = p[k]; + for( i = 0; i < k - 1; i++ ) + { + if( p[i] < N && p[i] > M ) { M = p[i]; m = i; } + } + Swap( p[m], p[k] ); // Entries 0..k-1 are still decreasing. + Reverse( p, k ); // Make first k elements increasing. + return 1; + } + + int Perm::Prev() + { + int i, m, k = 0; + int N, M = 0; + + // Look for the first element of p that is less than its successor. + // If no such element exists, we are done. + + M = p[0]; // M will always be the "previous" value. + for( i = 1; i < Size(); i++ ) // Start with the second element. + { + if( p[i] < M ) { k = i; break; } + M = p[i]; + } + if( k == 0 ) return 0; // Already in ascending order. + m = k - 1; + + // Find the smallest entry before k that is greater than p[k]. + // One exists because p[k] is less than M, i.e. p[k-1]. + + N = p[k]; + for( i = 0; i < k - 1; i++ ) + { + if( p[i] > N && p[i] < M ) { M = p[i]; m = i; } + } + Swap( p[m], p[k] ); // Entries 0..k-1 are still increasing. + Reverse( p, k ); // Make first k elements decreasing. + return 1; + } + + + /*************************************************************************** + ** + ** O P E R A T O R S + ** + ***************************************************************************/ + + int Perm::operator++() + { + return Next(); + } + + int Perm::operator--() + { + return Prev(); + } + + Perm &Perm::operator+=( int n ) + { + int i; + if( n > 0 ) for( i = 0; i < n; i++ ) if( !Next() ) break; + if( n < 0 ) for( i = n; i < 0; i++ ) if( !Prev() ) break; + return *this; + } + + Perm &Perm::operator-=( int n ) + { + int i; + if( n > 0 ) for( i = 0; i < n; i++ ) if( !Prev() ) break; + if( n < 0 ) for( i = n; i < 0; i++ ) if( !Next() ) break; + return *this; + } + + int Perm::operator[]( int n ) const + { + if( n < 0 || Size() <= n ) + { + Error( "permutation index[] out of range" ); + return 0; + } + return p[ n ]; + } + + int Perm::operator()( int n ) const + { + if( n < Min() || Max() < n ) + { + Error( "permutation index() out of range" ); + return 0; + } + return p[ n - Min() ]; + } + + Perm &Perm::operator=( const Perm &Q ) + { + if( Size() != Q.Size() ) + { + delete[] p; + p = new int[ Q.Size() ]; + } + a = Q.Min(); + b = Q.Max(); + for( int i = 0; i < Size(); i++ ) p[i] = Q[i]; + return *this; + } + + Perm Perm::operator*( const Perm &Q ) const + { + if( Min() != Q.Min() ) return Perm(0); + if( Max() != Q.Max() ) return Perm(0); + Perm A( Min(), Max() ); + for( int i = 0; i < Size(); i++ ) A.Elem(i) = p[ Q[i] - Min() ]; + return A; + } + + Perm Perm::operator^( int n ) const + { + Perm A( Min(), Max() ); + int pn = n; + if( n < 0 ) // First compute the inverse. + { + for( int i = 0; i < Size(); i++ ) + A.Elem( p[i] - Min() ) = i + Min(); + pn = -n; + } + for( int i = 0; i < Size(); i++ ) + { + int k = ( n < 0 ) ? A[i] : p[i]; + for( int j = 1; j < pn; j++ ) k = p[ k - Min() ]; + A.Elem(i) = k; + } + return A; + } + + Perm &Perm::operator()( int i, int j ) + { + Swap( p[ i - Min() ], p[ j - Min() ] ); + return *this; + } + + int Perm::operator==( const Perm &Q ) const + { + int i; + if( Min() != Q.Min() ) return 0; + if( Max() != Q.Max() ) return 0; + for( i = 0; i < Size(); i++ ) if( p[i] != Q[i] ) return 0; + return 1; + } + + int Perm::operator<=( const Perm &Q ) const + { + int i; + if( Min() != Q.Min() ) return 0; + if( Max() != Q.Max() ) return 0; + for( i = 0; i < Size(); i++ ) if( p[i] != Q[i] ) return p[i] < Q[i]; + return 1; + } + + void Reset( Perm &P ) + { + for( int i = 0; i < P.Size(); i++ ) P.Elem(i) = P.Min() + i; + } + + int End( const Perm &P ) + { + int c = P[0]; + for( int i = 1; i < P.Size(); i++ ) + { + if( c < P[i] ) return 0; + c = P[i]; + } + return 1; + } + + void Print( const Perm &P ) + { + if( P.Size() > 0 ) + { + printf( "%d", P[0] ); + for( int i = 1; i < P.Size(); i++ ) printf( " %d", P[i] ); + printf( "\n" ); + } + } + + int Even( const Perm &P ) + { + return !Odd( P ); + } + + int Odd( const Perm &P ) + { + int count = 0; + Perm Q( P ); + for( int i = P.Min(); i < P.Max(); i++ ) + { + if( Q(i) == i ) continue; + for( int j = P.Min(); j <= P.Max(); j++ ) + { + if( j == i ) continue; + if( Q(j) == i ) + { + Q(i,j); + count = ( j - i ) + ( count % 2 ); + } + } + } + return count % 2; + } + + + /*************************************************************************** + ** + ** P A R T I T I O N S + ** + ***************************************************************************/ + + Partition::Partition( ) + { + Bin = NULL; + bins = 0; + balls = 0; + } + + Partition::Partition( const Partition &Q ) + { + Bin = new int[ Q.Bins() ]; + bins = Q.Bins(); + balls = Q.Balls(); + for( int i = 0; i < bins; i++ ) Bin[i] = Q[i]; + } + + Partition::Partition( int bins_, int balls_ ) + { + bins = bins_; + balls = balls_; + Bin = new int[ bins ]; + Reset( *this ); + } + + void Partition::operator+=( int bin ) // Add a ball to this bin. + { + if( bin < 0 || bin >= bins ) fprintf( stderr, "ERROR -- bin number out of range.\n" ); + balls++; + Bin[ bin ]++; + } + + int Partition::operator==( const Partition &P ) const // Compare two partitions. + { + if( Balls() != P.Balls() ) return 0; + if( Bins () != P.Bins () ) return 0; + for( int i = 0; i < bins; i++ ) + { + if( Bin[i] != P[i] ) return 0; + } + return 1; + } + + void Partition::operator=( int n ) // Set to the n'th configuration. + { + Reset( *this ); + for( int i = 0; i < n; i++ ) ++(*this); + } + + int Partition::operator!=( const Partition &P ) const + { + return !( *this == P ); + } + + void Partition::operator=( const Partition &Q ) + { + if( bins != Q.Bins() ) + { + delete[] Bin; + Bin = new int[ Q.Bins() ]; + } + bins = Q.Bins(); + balls = Q.Balls(); + for( int i = 0; i < bins; i++ ) Bin[i] = Q[i]; + } + + void Partition::Get( char *str ) const + { + for( int i = 0; i < bins; i++ ) + str += sprintf( str, "%d ", Bin[i] ); + *str = NullChar; + } + + int Partition::operator[]( int i ) const + { + if( i < 0 || i >= bins ) return 0; + else return Bin[i]; + } + + long Partition::NumCombinations() const // How many distinct configurations. + { + // Think of the k "bins" as being k - 1 "partitions" mixed in with + // the n "balls". If the balls and partitions were each distinguishable + // objects, there would be (n + k - 1)! distinct configurations. + // But since both the balls and the partitions are indistinguishable, + // we simply divide by n! (k - 1)!. This is the binomial coefficient + // ( n + k - 1, n ). + // + if( balls == 0 ) return 0; + if( bins == 1 ) return 1; + return (long)floor( BinomialCoeff( balls + bins - 1, balls ) + 0.5 ); + } + + /*************************************************************************** + * O P E R A T O R + + (Next Partition) * + * * + * Rearranges the n "balls" in k "bins" into the next configuration. * + * The first config is assumed to be all balls in the first bin -- i.e. * + * Bin[0]. All possible groupings are generated, each exactly once. The * + * function returns 1 if successful, 0 if the last config has already been * + * reached. (Algorithm by Harold Zatz) * + * * + ***************************************************************************/ + int Partition::operator++() + { + int i; + if( Bin[0] > 0 ) + { + Bin[1] += 1; + Bin[0] -= 1; + } + else + { + for( i = 1; Bin[i] == 0; i++ ); + if( i == bins - 1 ) return 0; + Bin[i+1] += 1; + Bin[0] = Bin[i] - 1; + Bin[i] = 0; + } + return 1; + } + + void Reset( Partition &P ) + { + P.Bin[0] = P.Balls(); + for( int i = 1; i < P.Bins(); i++ ) P.Bin[i] = 0; + } + + int End( const Partition &P ) + { + return P[ P.Bins() - 1 ] == P.Balls(); + } + + void Print( const Partition &P ) + { + if( P.Bins() > 0 ) + { + printf( "%d", P[0] ); + for( int i = 1; i < P.Bins(); i++ ) printf( " %d", P[i] ); + printf( "\n" ); + } + } +}; diff --git a/src/nvtt/bc6h/arvo/Perm.h b/src/nvtt/bc6h/arvo/Perm.h new file mode 100755 index 0000000..2af4776 --- /dev/null +++ b/src/nvtt/bc6h/arvo/Perm.h @@ -0,0 +1,111 @@ +/*************************************************************************** +* Perm.h * +* * +* This file defines permutation class: that is, a class for creating and * +* manipulating finite sequences of distinct integers. The main feature * +* of the class is the "++" operator that can be used to step through all * +* N! permutations of a sequence of N integers. As the set of permutations * +* forms a multiplicative group, a multiplication operator and an * +* exponentiation operator are also defined. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 07/01/93 Added the Partition class. * +* arvo 03/23/93 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1999, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#ifndef __PERM_INCLUDED__ +#define __PERM_INCLUDED__ + +namespace ArvoMath { + + class Perm { + public: + Perm( const Perm & ); // Initialize from a permutation. + Perm( int a = 0, int b = 0 ); // Create permutation of ints a...b. + Perm( const char * ); // Create from string of numbers. + ~Perm() { delete p; } // Destructor. + void Get( char * ) const; // Gets a string representation. + int Size() const { return b - a + 1;} // The number of elements. + int Min () const { return a; } // The smallest value. + int Max () const { return b; } // The largest value. + int operator++(); // Make "next" permutation. + int operator--(); // Make "previous" permutation. + Perm &operator+=( int n ); // Advances by n permutations. + Perm &operator-=( int n ); // Decrement by n permutations. + Perm &operator =( const char * ) ; // Resets from string of numbers. + Perm &operator =( const Perm & ) ; // Copy from another permutation. + Perm &operator()( int i, int j ) ; // Swap entries i and j. + int operator()( int n ) const; // Index from Min() to Max(). + int operator[]( int n ) const; // Index from 0 to Size() - 1. + Perm operator ^( int n ) const; // Exponentiation: -1 means inverse. + Perm operator *( const Perm & ) const; // Multiplication means composition. + int operator==( const Perm & ) const; // True if all elements match. + int operator<=( const Perm & ) const; // Lexicographic order relation. + private: + int& Elem( int i ) { return p[i]; } + int Next(); + int Prev(); + int a, b; + int *p; + friend void Reset( Perm & ); + }; + + + // A "Partition" is a collection of k indistinguishable "balls" in n "bins". + // The Partition class encapsulates this notion and provides a convenient means + // of generating all possible partitions of k objects among n bins exactly once. + // Starting with all objects in bin zero, the ++ operator creates new and distinct + // distributions among the bins until all objects are in the last bin. + + class Partition { + public: + Partition( ); // Creates a null partition. + Partition( const Partition & ); // Initialize from another partition. + Partition( int bins, int balls ); // Specify # of bins & balls. + ~Partition() { delete Bin; } // Descructor. + void Get( char * ) const; // Gets a string representation. + int Bins () const { return bins; } // The number of bins. + int Balls() const { return balls; } // The number of balls. + void operator+=( int bin ); // Add a ball to this bin. + void operator =( int n ); // Set to the n'th configuration. + void operator =( const Partition& ); // Copy from another partition. + int operator==( const Partition& ) const; // Compare two partitions. + int operator!=( const Partition& ) const; // Compare two partitions. + int operator++(); // Make "next" partition. + int operator[]( int i ) const; // Return # of balls in bin i. + long NumCombinations() const; // Number of distinct configurations. + private: + int bins; + int balls; + int* Bin; + friend void Reset( Partition & ); + }; + + + // Predicates for determining when a permutation or partition is the last of + // the sequence, functions for printing, resetting, and miscellaneous operations. + + extern int End ( const Partition & ); // True if all balls in last bin. + extern int End ( const Perm & ); // True if descending. + extern int Even ( const Perm & ); // True if even # of 2-cycles. + extern int Odd ( const Perm & ); // True if odd # of 2-cycles. + extern void Print( const Partition & ); // Write to standard out. + extern void Print( const Perm & ); // Write to standard out. + extern void Reset( Partition & ); // Reset to all balls in bin 0. + extern void Reset( Perm & ); // Reset to ascending order. +}; +#endif diff --git a/src/nvtt/bc6h/arvo/Rand.cpp b/src/nvtt/bc6h/arvo/Rand.cpp new file mode 100755 index 0000000..5f3025b --- /dev/null +++ b/src/nvtt/bc6h/arvo/Rand.cpp @@ -0,0 +1,230 @@ +/*************************************************************************** +* Rand.C (Random Number Generators) * +* * +* Source file for pseudo-random number utilities. Rand is the * +* base class for several different algorithms for generating pseudo-random * +* numbers. Any method can generate individual samples or arrays of * +* samples using "Eval". The random seed can be reset at any time by * +* calling "Seed" with any integer. Random permutations of the integers * +* 0,1,...(n-1) are generated by "Perm(n,P)". * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 08/04/97 Changed to virtual functions. * +* arvo 06/06/93 Optimization, especially for array evaluators. * +* arvo 10/06/91 Converted to C++ * +* arvo 11/20/89 Added "gen_seed" function to handle. * +* arvo 10/30/89 "state" allocation now done in rand_alloc. * +* arvo 07/08/89 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1989, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#include +#include +#include "Rand.h" + +namespace ArvoMath { +#ifndef ABS +#define ABS( x ) ((x) > 0 ? (x) : -(x)) +#endif + + /*-------------------------------------------------------------------------* + * M E T H O D 1 * + * * + * From "Numerical Recipes," by William H. Press, Brian P. Flannery, * + * Saul A. Teukolsky, and William T. Vetterling, p. 197. * + * * + *-------------------------------------------------------------------------*/ + static const long M1 = 714025; + static const long IA = 1366; + static const long IC = 150889; + static const double RM = 1.400512E-6; + + float RandGen_1::Eval() + { + register long *elem; + register long offset; + register float rand; + offset = 1 + ( 97 * index ) / M1; + if( offset > 97 ) offset = 97; + if( offset < 1 ) offset = 1; + elem = shuffle + offset; + rand = ( index = *elem ) * RM; + *elem = ( seed = ( IA * seed + IC ) % M1 ); + return rand; + } + + void RandGen_1::Eval( int n, float *array ) + { + register long *shfl = shuffle; + register long *elem; + register long offset; + for( int i = 0; i < n; i++ ) + { + offset = 1 + ( 97 * index ) / M1; + if( offset > 97 ) offset = 97; + if( offset < 1 ) offset = 1; + elem = shfl + offset; + *array++ = ( index = *elem ) * RM; + *elem = ( seed = ( IA * seed + IC ) % M1 ); + } + } + + void RandGen_1::Seed( long seed ) + { + long t = ( IC + ABS( seed ) + 1 ) % M1; + for( register int k = 1; k <= 97; k++ ) + { + t = ( IA * t + IC ) % M1; + shuffle[k] = ABS( t ); + } + t = ( IA * t + IC ) % M1; + seed = ABS( t ); + index = ABS( t ); + } + + /*-------------------------------------------------------------------------* + * M E T H O D 2 * + * * + * From "The Multiple Prime Random Number Generator," by Alexander Haas, * + * ACM Transactions on Mathematical Software, Vol. 13, No. 4, December * + * 1987, pp. 368-381. * + * * + *-------------------------------------------------------------------------*/ + float RandGen_2::Eval() + { + if( (m += 7 ) >= 9973 ) m -= 9871; + if( (i += 1907 ) >= 99991 ) i -= 89989; + if( (j += 73939) >= 224729 ) j -= 96233; + r = ((r * m + i + j) % 100000) / 10; + return r * 1.00010001E-4; + } + + void RandGen_2::Eval( int n, float *array ) + { + for( register int k = 0; k < n; k++ ) + { + if( (m += 7 ) >= 9973 ) m -= 9871; + if( (i += 1907 ) >= 99991 ) i -= 89989; + if( (j += 73939) >= 224729 ) j -= 96233; + r = ((r * m + i + j) % 100000) / 10; + *array++ = r * 1.00010001E-4; + } + } + + void RandGen_2::Seed( long seed ) + { + r = ABS( seed ); + m = ABS( seed * 7 ); + i = ABS( seed * 11 ); + j = ABS( seed * 13 ); + if( m < 100 ) m += 100; + if( i < 10000 ) i += 10000; + if( j < 128000 ) j += 128000; + } + + /*-------------------------------------------------------------------------* + * M E T H O D 3 * + * * + * From "A More Portable Fortran Random Number Generator," by Linus * + * Schrage, ACM Transactions on Mathematical Software, Vol. 5, No, 2, * + * June 1979, pp. 132-138. * + * * + *-------------------------------------------------------------------------*/ + static const long A3 = 16807; + static const long P3 = 2147483647; + + float RandGen_3::Eval() + { + long xhi = ix >> 16; + long xalo = ( ix & 0xFFFF ) * A3; + long leftlo = xalo >> 16; + long fhi = xhi * A3 + leftlo; + long k = fhi >> 15; + ix = ( ((xalo - (leftlo << 16)) - P3) + + ((fhi - (k << 15)) << 16) ) + k; + if( ix < 0 ) ix += P3; + return ix * 4.656612875E-10; + } + + void RandGen_3::Eval( int n, float *array ) + { + register long xhi, xalo, leftlo; + register long fhi, k; + for( register int i = 0; i < n; i++ ) + { + xhi = ix >> 16; + xalo = ( ix & 0xFFFF ) * A3; + leftlo = xalo >> 16; + fhi = xhi * A3 + leftlo; + k = fhi >> 15; + ix = ( ((xalo - (leftlo << 16)) - P3) + + ((fhi - (k << 15)) << 16) ) + k; + if( ix < 0 ) ix += P3; + *array++ = ix * 4.656612875E-10; + } + } + + void RandGen_3::Seed( long seed ) + { + ix = ABS( seed ); + } + + /*-------------------------------------------------------------------------* + * R A N D : : P E R M (Permutation) * + * * + * This routine fills an integer array of length "len" with a random * + * permutation of the integers 0, 1, 2, ... (len-1). * + * * + * For efficiency, the random numbers are generated in batches of up to * + * "Nmax" at a time. The constant Nmax can be set to any value >= 1. * + * * + *-------------------------------------------------------------------------*/ + static const int Nmax = 20; + + void RandGen::Perm( int len, int perm[] ) + { + float R[ Nmax ]; // A buffer for getting random numbers. + int L = len - 1; // Total number of random numbers needed. + int N = 0; // How many to generate when we call Eval. + int n = 0; // The array index into R. + + // First initialize the array "perm" to the identity permutation. + + for( int j = 0; j < len; j++ ) perm[j] = j; + + // Now swap a random element in the front with the i'th element. + // When i gets down to 0, we're done. + + for( int i = len - 1; i > 0; i-- ) // Element i is a swap candidate. + { + if( n == N ) // Generate more random numbers. + { + N = ( L < Nmax ) ? L : Nmax; // Can't get more than "Nmax". + Eval( N, R ); // Generate N random numbers. + L -= N; // Decrement total counter. + n = 0; // Start index at beginning of R. + } + float r = ( i + 1 ) * R[ n++ ]; // Pick a float in [0,i+1]. + int k = (int)r; // Truncate r to an integer. + if( k < i ) // Disregard k == i and k == i+1. + { + int tmp = perm[i]; // Swap elements i and k. + perm[i] = perm[k]; + perm[k] = tmp; + } + } + } +}; diff --git a/src/nvtt/bc6h/arvo/Rand.h b/src/nvtt/bc6h/arvo/Rand.h new file mode 100755 index 0000000..a8ef5d9 --- /dev/null +++ b/src/nvtt/bc6h/arvo/Rand.h @@ -0,0 +1,114 @@ +/*************************************************************************** +* Rand.h (Random Number Generators) * +* * +* Header file for Rand.C, pseudo-random number utilities. Rand is the * +* base class for several different algorithms for generating pseudo-random * +* numbers. Any method can generate individual samples or arrays of * +* samples using "Eval". The random seed can be reset at any time by * +* calling "Seed" with any integer. Random permutations of the integers * +* 0,1,...(n-1) are generated by "Perm(n,P)". * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 08/04/97 Changed to virtual functions. * +* arvo 06/06/93 Optimization, especially for array evaluators. * +* arvo 10/06/91 Converted to C++ * +* arvo 11/20/89 Added "gen_seed" function to handle. * +* arvo 10/30/89 "state" allocation now done in rand_alloc. * +* arvo 07/08/89 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1989, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#ifndef __RAND_INCLUDED__ +#define __RAND_INCLUDED__ + +namespace ArvoMath { + + // Base class for random number generators. This class contains + // several pure virtual functions, so it cannot be instanced directly. + + class RandGen { + public: + RandGen() {} + virtual float Eval( ) = 0; + virtual void Eval( int n, float x[] ) = 0; + virtual void Seed( long seed ) = 0; + public: + void Perm( int n, int P[] ); + float Interval( float a, float b ); + void Eval( float &x ) { x = Eval(); } + }; + + + // Method 1: From "Numerical Recipes," by William H. Press, Brian P. + // Flannery, Saul A. Teukolsky, and William T. Vetterling, p. 197. + + class RandGen_1 : public RandGen { + public: + RandGen_1( ) { Seed( 1 ); } + RandGen_1( long seed ) { Seed( seed ); } + virtual float Eval( ); + virtual void Eval( int n, float x[] ); + virtual void Seed( long seed ); + private: + long index; + long seed; + long shuffle[ 98 ]; + }; + + + // Method 2: From "The Multiple Prime Random Number Generator," by + // Alexander Haas, ACM Transactions on Mathematical Software, + // Vol. 13, No. 4, December 1987, pp. 368-381. * + + class RandGen_2 : public RandGen { + public: + RandGen_2( ) { Seed( 1 ); } + RandGen_2( long seed ) { Seed( seed ); } + virtual float Eval( ); + virtual void Eval( int n, float x[] ); + virtual void Seed( long seed ); + private: + long r; + long m; + long i; + long j; + }; + + + // Method 3: From "A More Portable Fortran Random Number Generator," + // by Linus Schrage, ACM Transactions on Mathematical Software, + // Vol. 5, No, 2, June 1979, pp. 132-138. * + + class RandGen_3 : public RandGen { + public: + RandGen_3( ) { Seed( 1 ); } + RandGen_3( long seed ) { Seed( seed ); } + virtual float Eval( ); + virtual void Eval( int n, float x[] ); + virtual void Seed( long seed ); + private: + long ix; + }; + + + inline float RandGen::Interval( float a, float b ) + { + return ( a < b ) ? + a + Eval() * ( b - a ) : + b + Eval() * ( a - b ) ; + } +}; +#endif diff --git a/src/nvtt/bc6h/arvo/SI_units.h b/src/nvtt/bc6h/arvo/SI_units.h new file mode 100755 index 0000000..69cc8cc --- /dev/null +++ b/src/nvtt/bc6h/arvo/SI_units.h @@ -0,0 +1,232 @@ +/***************************************************************************** +** +** MODULE NAME SI_units.h International System of Units (SI) +** +** DESCRIPTION +** The purpose of this header file is to provide a simple and efficient +** mechanism for associating physically meaningful units with floating +** point numbers. No extra space is required, and no runtime overhead +** is introduced; all type-checking occurs at compile time. +** +** +** HISTORY +** Name Date Description +** +** arvo 02/09/92 Replaced conversion macros with inline functions. +** arvo 10/16/91 Initial implementation. +** +** +** (c) Copyright 1991, 1992 +** Program of Computer Graphics, Cornell University, Ithaca, NY +** ALL RIGHTS RESERVED +** +*****************************************************************************/ + +#ifndef SI_UNITS_H +#define SI_UNITS_H + +#include + +namespace ArvoMath { + + const float + SI_deci = 1.0E-1, + SI_centi = 1.0E-2, + SI_milli = 1.0E-3, + SI_micro = 1.0E-6, + SI_nano = 1.0E-9, + SI_kilo = 1.0E+3, + SI_mega = 1.0E+6, + SI_giga = 1.0E+9, + SI_tera = 1.0E+12; + + /******************************************************************************* + * * + * I N T E R N A T I O N A L S Y S T E M O F U N I T S * + * * + ******************************************************************************** + * * + * DIMENSION CLASS INITIALIZER SYMBOL BASE UNITS * + * * + * length SI_length meter m m * + * time SI_time second s s * + * mass SI_mass kilogram kg kg * + * angle SI_angle radian rad rad * + * solid angle SI_solid_angle steradian sr sr * + * temperature SI_temperature kelvin K K * + * luminous intensity SI_lum_inten candela cd cd * + * area SI_area meter2 m2 m2 * + * volume SI_volume meter3 m3 m3 * + * frequency SI_frequency hertz Hz 1/s * + * force SI_force newton N m kg/s2 * + * energy SI_energy joule J m2 kg/s2 * + * power SI_power watt W m2 kg/s3 * + * radiance SI_radiance watts_per_m2sr W/m2sr kg/(s3 sr) * + * irradiance SI_irradiance watts_per_m2 W/m2 kg/s3 * + * radiant intensity SI_rad_inten watts_per_sr W/sr m2 kg/(s3 sr) * + * luminance SI_luminance candela_per_m2 cd/m2 cd/m2 * + * illuminance SI_illuminance lux lx cd sr/m2 * + * luminous flux SI_lum_flux lumen lm cd sr * + * luminous energy SI_lum_energy talbot tb cd sr s * + * * + *******************************************************************************/ + + class SI_dimensionless { + public: + float Value() const { return value; } + ostream& Put( ostream &s, char *a ) { return s << value << " " << a; } + protected: + SI_dimensionless() { value = 0; } + SI_dimensionless( float x ){ value = x; } + float value; + }; + + /******************************************************************************* + * The following macro is used for creating new quantity classes and their * + * corresponding initializing functions and abbreviations. This macro is * + * not intended to be used outside of this file -- it is a compact means of * + * defining generic operations for each quantity (e.g. scaling & comparing). * + *******************************************************************************/ + +#define SI_Make( C, Initializer, Symbol ) \ + struct C : SI_dimensionless { \ + C ( ) : SI_dimensionless( ) {}; \ + C ( float x ) : SI_dimensionless( x ) {}; \ + C operator * ( float x ) { return C( value * x ); } \ + C operator / ( float x ) { return C( value / x ); } \ + C operator /= ( float x ) { return C( value /= x ); } \ + C operator *= ( float x ) { return C( value *= x ); } \ + C operator + ( C x ) { return C( value + x.Value() ); } \ + C operator - ( ) { return C(-value ); } \ + C operator - ( C x ) { return C( value - x.Value() ); } \ + C operator += ( C x ) { return C( value += x.Value() ); } \ + C operator -= ( C x ) { return C( value -= x.Value() ); } \ + C operator = ( C x ) { return C( value = x.Value() ); } \ + int operator > ( C x ) { return ( value > x.Value() ); } \ + int operator < ( C x ) { return ( value < x.Value() ); } \ + int operator >= ( C x ) { return ( value >= x.Value() ); } \ + int operator <= ( C x ) { return ( value <= x.Value() ); } \ + float operator / ( C x ) { return ( value / x.Value() ); } \ + }; \ + inline ostream& operator<<(ostream &s, C x) {return x.Put(s,Symbol);} \ + inline C Initializer( float x ) { return C( x ); } \ + inline C operator * ( float x, C y ) { return C( x * y.Value() ); } + + /******************************************************************************* + * The following macros define permissible arithmetic operations among * + * variables with different physical meanings. This ensures that the * + * result of any such operation is ALWAYS another meaningful quantity. * + *******************************************************************************/ + +#define SI_Square( A, B ) \ + inline B operator*( A x, A y ) { return B( x.Value() * y.Value() ); } \ + inline A operator/( B x, A y ) { return A( x.Value() / y.Value() ); } + +#define SI_Recip( A, B ) \ + inline B operator/( float x, A y ) { return B( x / y.Value() ); } \ + inline A operator/( float x, B y ) { return A( x / y.Value() ); } \ + inline float operator*( A x, B y ) { return x.Value() * y.Value(); } \ + inline float operator*( B x, A y ) { return x.Value() * y.Value(); } + +#define SI_Times( A, B, C ) \ + inline C operator*( A x, B y ) { return C( x.Value() * y.Value() ); } \ + inline C operator*( B x, A y ) { return C( x.Value() * y.Value() ); } \ + inline A operator/( C x, B y ) { return A( x.Value() / y.Value() ); } \ + inline B operator/( C x, A y ) { return B( x.Value() / y.Value() ); } + + /******************************************************************************* + * The following macros create classes for a variety of quantities. These * + * include base qunatities such as "time" and "length" as well as derived * + * quantities such as "power" and "volume". Each quantity is provided with * + * an initialization function in SI units and an abbreviation for printing. * + *******************************************************************************/ + + SI_Make( SI_length , meter , "m" ); // Base Units: + SI_Make( SI_mass , kilogram , "kg" ); + SI_Make( SI_time , second , "s" ); + SI_Make( SI_lum_inten , candela , "cd" ); + SI_Make( SI_temperature , kelvin , "K" ); + SI_Make( SI_angle , radian , "rad" ); // Supplementary: + SI_Make( SI_solid_angle , steradian , "sr" ); + SI_Make( SI_area , meter2 , "m2" ); // Derived units: + SI_Make( SI_volume , meter3 , "m3" ); + SI_Make( SI_frequency , hertz , "Hz" ); + SI_Make( SI_force , newton , "N" ); + SI_Make( SI_energy , joule , "J" ); + SI_Make( SI_power , watt , "W" ); + SI_Make( SI_radiance , watts_per_m2sr , "W/m2sr" ); + SI_Make( SI_irradiance , watts_per_m2 , "W/m2" ); + SI_Make( SI_rad_inten , watts_per_sr , "W/sr" ); + SI_Make( SI_luminance , candela_per_m2 , "cd/m2" ); + SI_Make( SI_illuminance , lux , "lx" ); + SI_Make( SI_lum_flux , lumen , "lm" ); + SI_Make( SI_lum_energy , talbot , "tb" ); + SI_Make( SI_time2 , second2 , "s2" ); // Intermediate: + SI_Make( SI_sa_area , meter2_sr , "m2sr" ); + SI_Make( SI_inv_area , inv_meter2 , "1/m2" ); + SI_Make( SI_inv_solid_angle, inv_steradian , "1/sr" ); + SI_Make( SI_length_temp , meters_kelvin , "m K" ); + SI_Make( SI_power_area , watts_m2 , "W m2" ); + SI_Make( SI_power_per_volume, watts_per_m3 , "W/m3" ); + + SI_Square( SI_length , SI_area ); + SI_Square( SI_time , SI_time2 ); + SI_Recip ( SI_time , SI_frequency ); + SI_Recip ( SI_area , SI_inv_area ); + SI_Recip ( SI_solid_angle , SI_inv_solid_angle ); + + SI_Times( SI_area , SI_length , SI_volume ); + SI_Times( SI_force , SI_length , SI_energy ); + SI_Times( SI_power , SI_time , SI_energy ); + SI_Times( SI_lum_flux , SI_time , SI_lum_energy ); + SI_Times( SI_lum_inten , SI_solid_angle , SI_lum_flux ); + SI_Times( SI_radiance , SI_solid_angle , SI_irradiance ); + SI_Times( SI_rad_inten , SI_solid_angle , SI_power ); + SI_Times( SI_irradiance , SI_area , SI_power ); + SI_Times( SI_illuminance , SI_area , SI_lum_flux ); + SI_Times( SI_solid_angle , SI_area , SI_sa_area ); + SI_Times( SI_radiance , SI_sa_area , SI_power ); + SI_Times( SI_irradiance , SI_inv_solid_angle, SI_radiance ); + SI_Times( SI_power , SI_inv_solid_angle, SI_rad_inten ); + SI_Times( SI_length , SI_temperature , SI_length_temp ); + SI_Times( SI_power , SI_area , SI_power_area ); + + /******************************************************************************* + * Following are some useful non-SI units. These units can be used in place of * + * the unit-initializers above. Thus, a variable of type SI_length, for example* + * may be initialized in "meters", "inches", or "centimeters". In all cases, * + * however, the value is converted to the underlying SI unit (e.g. meters). * + *******************************************************************************/ + +#define SI_Convert( SI, New, Old ) inline SI New( float x ) { return x * Old; } + + SI_Convert( SI_time , minute , second( 60.0 ) ); + SI_Convert( SI_time , hour , minute( 60.0 ) ); + SI_Convert( SI_force , dyne , newton( 1.0E-5 ) ); + SI_Convert( SI_energy , erg , joule( 1.0E-7 ) ); + SI_Convert( SI_power , kilowatt , watt( SI_kilo ) ); + SI_Convert( SI_mass , gram , kilogram( SI_milli ) ); + SI_Convert( SI_length , inch , meter( 2.54E-2 ) ); + SI_Convert( SI_length , foot , inch( 12.0 ) ); + SI_Convert( SI_length , centimeter , meter( SI_centi ) ); + SI_Convert( SI_length , micron , meter( SI_micro ) ); + SI_Convert( SI_length , angstrom , meter( 1.0E-10 ) ); + SI_Convert( SI_area , barn , meter2( 1.0E-28 ) ); + SI_Convert( SI_angle , degree , radian( 0.017453 ) ); + SI_Convert( SI_illuminance , phot , lux( 1.0E+4 ) ); + SI_Convert( SI_illuminance , footcandle , lux( 9.29E-2 ) ); + SI_Convert( SI_luminance , stilb , candela_per_m2( 1.0E+4 ) ); + + /******************************************************************************* + * Often there are multiple names for a single quantity. Below are some * + * synonyms for the quantities defined above. These can be used in place of * + * the original quantities and may be clearer in some contexts. * + *******************************************************************************/ + + typedef SI_power SI_radiant_flux; + typedef SI_irradiance SI_radiant_flux_density; + typedef SI_irradiance SI_radiant_exitance; + typedef SI_radiance SI_intensity; + typedef SI_irradiance SI_radiosity; +}; +#endif \ No newline at end of file diff --git a/src/nvtt/bc6h/arvo/SVD.cpp b/src/nvtt/bc6h/arvo/SVD.cpp new file mode 100755 index 0000000..36f0ea6 --- /dev/null +++ b/src/nvtt/bc6h/arvo/SVD.cpp @@ -0,0 +1,398 @@ +/*************************************************************************** +* SVD.C * +* * +* Singular Value Decomposition. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 08/22/2000 Copied to CIT library. * +* arvo 06/28/1993 Rewritten from "Numerical Recipes" C-code. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 2000, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#include +#include +#include "ArvoMath.h" +#include "Vector.h" +#include "Matrix.h" +#include "SVD.h" + +namespace ArvoMath { + static const int MaxIterations = 30; + + static double svd_pythag( double a, double b ) + { + double at = Abs(a); + double bt = Abs(b); + if( at > bt ) + return at * sqrt( 1.0 + Sqr( bt / at ) ); + else if( bt > 0.0 ) + return bt * sqrt( 1.0 + Sqr( at / bt ) ); + else return 0.0; + } + + static inline double SameSign( double a, double b ) + { + double t; + if( b >= 0.0 ) t = Abs( a ); + else t = -Abs( a ); + return t; + } + + static int ComputeRank( const Matrix &D, double epsilon ) + { + int rank = 0; + for( int i = 0; i < D.Rows(); i++ ) + if( Abs(D(i,i)) > epsilon ) rank++; + return rank; + } + + SVD::SVD( ) : Q_(0), D_(0), R_(0) + { + } + + SVD::SVD( const Matrix &M ) : Q_(0), D_(0), R_(0) + { + (*this) = M; + } + + void SVD::operator=( const Matrix &A ) + { + if( A.Rows() >= A.Cols() ) Q_ = A; + else + { + Q_ = Matrix( A.Cols() ); + for( int i = 0; i < A.Rows(); i++ ) + for( int j = 0; j < A.Cols(); j++ ) Q_(i,j) = A(i,j); + } + R_ = Matrix( A.Cols() ); + Decompose( Q_, D_, R_ ); + } + + const Matrix &SVD::Q( double epsilon ) const + { + int rank = 0; + if( epsilon != 0.0 ) rank = ComputeRank( D_, epsilon ); + return Q_; + } + + const Matrix &SVD::D( double epsilon ) const + { + int rank = 0; + if( epsilon != 0.0 ) rank = ComputeRank( D_, epsilon ); + return D_; + } + + const Matrix &SVD::R( double epsilon ) const + { + int rank = 0; + if( epsilon != 0.0 ) rank = ComputeRank( D_, epsilon ); + return R_; + } + + int SVD::Rank( double epsilon ) const + { + return ComputeRank( D_, epsilon ); + } + + int SVD::Decompose( Matrix &Q, Matrix &D, Matrix &R ) + { + int i, j, k, l, m, n, p, q, iter; + double c, f, h, s, x, y, z; + double norm = 0.0; + double g = 0.0; + double scale = 0.0; + + m = Q.Rows(); + n = Q.Cols(); + + Vector Temp( n ); + Vector diag( n ); + + for( i = 0; i < n; i++ ) + { + + Temp(i) = scale * g; + scale = 0.0; + g = 0.0; + s = 0.0; + l = i + 1; + + if( i < m ) + { + for( k = i; k < m; k++ ) scale += Abs( Q(k,i) ); + if( scale != 0.0 ) + { + for( k = i; k < m; k++ ) + { + Q(k,i) /= scale; + s += Sqr( Q(k,i) ); + } + f = Q(i,i); + g = -SameSign( sqrt(s), f ); + h = f * g - s; + Q(i,i) = f - g; + if( i != n - 1 ) + { + for( j = l; j < n; j++ ) + { + s = 0.0; + for( k = i; k < m; k++ ) s += Q(k,i) * Q(k,j); + f = s / h; + for( k = i; k < m; k++ ) Q(k,j) += f * Q(k,i); + } + } + for( k = i; k < m; k++ ) Q(k,i) *= scale; + } + } + + diag(i) = scale * g; + g = 0.0; + s = 0.0; + scale = 0.0; + + if( i < m && i != n - 1 ) + { + for( k = l; k < n; k++ ) scale += Abs( Q(i,k) ); + if( scale != 0.0 ) + { + for( k = l; k < n; k++ ) + { + Q(i,k) /= scale; + s += Sqr( Q(i,k) ); + } + f = Q(i,l); + g = -SameSign( sqrt(s), f ); + h = f * g - s; + Q(i,l) = f - g; + for( k = l; k < n; k++ ) Temp(k) = Q(i,k) / h; + if( i != m - 1 ) + { + for( j = l; j < m; j++ ) + { + s = 0.0; + for( k = l; k < n; k++ ) s += Q(j,k) * Q(i,k); + for( k = l; k < n; k++ ) Q(j,k) += s * Temp(k); + } + } + for( k = l; k < n; k++ ) Q(i,k) *= scale; + } + } + norm = Max( norm, Abs( diag(i) ) + Abs( Temp(i) ) ); + } + + + for( i = n - 1; i >= 0; i-- ) + { + if( i < n - 1 ) + { + if( g != 0.0 ) + { + for( j = l; j < n; j++ ) R(i,j) = ( Q(i,j) / Q(i,l) ) / g; + for( j = l; j < n; j++ ) + { + s = 0.0; + for( k = l; k < n; k++ ) s += Q(i,k) * R(j,k); + for( k = l; k < n; k++ ) R(j,k) += s * R(i,k); + } + } + for( j = l; j < n; j++ ) + { + R(i,j) = 0.0; + R(j,i) = 0.0; + } + } + R(i,i) = 1.0; + g = Temp(i); + l = i; + } + + + for( i = n - 1; i >= 0; i-- ) + { + l = i + 1; + g = diag(i); + if( i < n - 1 ) for( j = l; j < n; j++ ) Q(i,j) = 0.0; + if( g != 0.0 ) + { + g = 1.0 / g; + if( i != n - 1 ) + { + for( j = l; j < n; j++ ) + { + s = 0.0; + for( k = l; k < m; k++ ) s += Q(k,i) * Q(k,j); + f = ( s / Q(i,i) ) * g; + for( k = i; k < m; k++ ) Q(k,j) += f * Q(k,i); + } + } + for( j = i; j < m; j++ ) Q(j,i) *= g; + } + else + { + for( j = i; j < m; j++ ) Q(j,i) = 0.0; + } + Q(i,i) += 1.0; + } + + + for( k = n - 1; k >= 0; k-- ) + { + for( iter = 1; iter <= MaxIterations; iter++ ) + { + int jump; + + for( l = k; l >= 0; l-- ) + { + q = l - 1; + if( Abs( Temp(l) ) + norm == norm ) { jump = 1; break; } + if( Abs( diag(q) ) + norm == norm ) { jump = 0; break; } + } + + if( !jump ) + { + c = 0.0; + s = 1.0; + for( i = l; i <= k; i++ ) + { + f = s * Temp(i); + Temp(i) *= c; + if( Abs( f ) + norm == norm ) break; + g = diag(i); + h = svd_pythag( f, g ); + diag(i) = h; + h = 1.0 / h; + c = g * h; + s = -f * h; + for( j = 0; j < m; j++ ) + { + y = Q(j,q); + z = Q(j,i); + Q(j,q) = y * c + z * s; + Q(j,i) = z * c - y * s; + } + } + } + + z = diag(k); + if( l == k ) + { + if( z < 0.0 ) + { + diag(k) = -z; + for( j = 0; j < n; j++ ) R(k,j) *= -1.0; + } + break; + } + if( iter >= MaxIterations ) return 0; + x = diag(l); + q = k - 1; + y = diag(q); + g = Temp(q); + h = Temp(k); + f = ( ( y - z ) * ( y + z ) + ( g - h ) * ( g + h ) ) / ( 2.0 * h * y ); + g = svd_pythag( f, 1.0 ); + f = ( ( x - z ) * ( x + z ) + h * ( ( y / ( f + SameSign( g, f ) ) ) - h ) ) / x; + c = 1.0; + s = 1.0; + for( j = l; j <= q; j++ ) + { + i = j + 1; + g = Temp(i); + y = diag(i); + h = s * g; + g = c * g; + z = svd_pythag( f, h ); + Temp(j) = z; + c = f / z; + s = h / z; + f = x * c + g * s; + g = g * c - x * s; + h = y * s; + y = y * c; + for( p = 0; p < n; p++ ) + { + x = R(j,p); + z = R(i,p); + R(j,p) = x * c + z * s; + R(i,p) = z * c - x * s; + } + z = svd_pythag( f, h ); + diag(j) = z; + if( z != 0.0 ) + { + z = 1.0 / z; + c = f * z; + s = h * z; + } + f = c * g + s * y; + x = c * y - s * g; + for( p = 0; p < m; p++ ) + { + y = Q(p,j); + z = Q(p,i); + Q(p,j) = y * c + z * s; + Q(p,i) = z * c - y * s; + } + } + Temp(l) = 0.0; + Temp(k) = f; + diag(k) = x; + } + } + + // Sort the singular values into descending order. + + for( i = 0; i < n - 1; i++ ) + { + double biggest = diag(i); // Biggest singular value so far. + int bindex = i; // The row/col it occurred in. + for( j = i + 1; j < n; j++ ) + { + if( diag(j) > biggest ) + { + biggest = diag(j); + bindex = j; + } + } + if( bindex != i ) // Need to swap rows and columns. + { + Q.SwapCols( i, bindex ); // Swap columns in Q. + R.SwapRows( i, bindex ); // Swap rows in R. + diag.Swap ( i, bindex ); // Swap elements in diag. + } + } + + D = Diag( diag ); + return 1; + } + + + const Matrix &SVD::PseudoInverse( double epsilon ) + { + if( Null(P_) ) + { + Matrix D_Inverse( D_ ); + for( int i = 0; i < D_Inverse.Rows(); i++ ) + { + if( Abs( D_Inverse(i,i) ) > epsilon ) + D_Inverse(i,i) = 1.0 / D_Inverse(i,i); + else D_Inverse(i,i) = 0.0; + } + P_ = Q_ * D_Inverse * R_; + } + return P_; + } +}; diff --git a/src/nvtt/bc6h/arvo/SVD.h b/src/nvtt/bc6h/arvo/SVD.h new file mode 100755 index 0000000..d6bf850 --- /dev/null +++ b/src/nvtt/bc6h/arvo/SVD.h @@ -0,0 +1,54 @@ +/*************************************************************************** +* SVD.h * +* * +* Singular Value Decomposition. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 08/22/2000 Split off from Matrix.h * +* arvo 06/28/1993 Rewritten from "Numerical Recipes" C-code. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 2000, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#ifndef __SVD_INCLUDED__ +#define __SVD_INCLUDED__ + +#include "Vector.h" +#include "Matrix.h" + +namespace ArvoMath { + + class SVD { + public: + SVD( ); + SVD( const SVD & ); // Copies the decomposition. + SVD( const Matrix & ); // Performs the decomposition. + ~SVD() {}; + const Matrix &Q( double epsilon = 0.0 ) const; + const Matrix &D( double epsilon = 0.0 ) const; + const Matrix &R( double epsilon = 0.0 ) const; + const Matrix &PseudoInverse( double epsilon = 0.0 ); + int Rank( double epsilon = 0.0 ) const; + void operator=( const Matrix & ); // Performs the decomposition. + private: + int Decompose( Matrix &Q, Matrix &D, Matrix &R ); + Matrix Q_; + Matrix D_; + Matrix R_; + Matrix P_; // Pseudo inverse. + int error; + }; +}; +#endif diff --git a/src/nvtt/bc6h/arvo/SphTri.cpp b/src/nvtt/bc6h/arvo/SphTri.cpp new file mode 100755 index 0000000..40de956 --- /dev/null +++ b/src/nvtt/bc6h/arvo/SphTri.cpp @@ -0,0 +1,292 @@ +/*************************************************************************** +* SphTri.C * +* * +* This file defines the SphericalTriangle class definition, which * +* supports member functions for Monte Carlo sampling, point containment, * +* and other basic operations on spherical triangles. * +* * +* Changes: * +* 01/01/2000 arvo Added New_{Alpha,Beta,Gamma} methods. * +* 12/30/1999 arvo Added VecIrrad method for "Vector Irradiance". * +* 04/08/1995 arvo Further optimized sampling algorithm. * +* 10/11/1994 arvo Added analytic sampling algorithm. * +* 06/14/1994 arvo Initial implementation. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1995, 2000, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#include +#include +#include "SphTri.h" +#include "form.h" +namespace ArvoMath { + /*-------------------------------------------------------------------------* + * Constructor * + * * + * Construct a spherical triangle from three (non-zero) vectors. The * + * vectors needn't be of unit length. * + * * + *-------------------------------------------------------------------------*/ + SphericalTriangle::SphericalTriangle( const Vec3 &A0, const Vec3 &B0, const Vec3 &C0 ) + { + Init( A0, B0, C0 ); + } + + /*-------------------------------------------------------------------------* + * Init * + * * + * Construct the spherical triange from three vertices. Assume that the * + * sphere is centered at the origin. The vectors A, B, and C need not * + * be normalized. * + * * + *-------------------------------------------------------------------------*/ + void SphericalTriangle::Init( const Vec3 &A0, const Vec3 &B0, const Vec3 &C0 ) + { + // Normalize the three vectors -- these are the vertices. + + A_ = Unit( A0 ); + B_ = Unit( B0 ); + C_ = Unit( C0 ); + + // Compute and save the cosines of the edge lengths. + + cos_a = B_ * C_; + cos_b = A_ * C_; + cos_c = A_ * B_; + + // Compute and save the edge lengths. + + a_ = ArcCos( cos_a ); + b_ = ArcCos( cos_b ); + c_ = ArcCos( cos_c ); + + // Compute the cosines of the internal (i.e. dihedral) angles. + + cos_alpha = CosDihedralAngle( C_, A_, B_ ); + cos_beta = CosDihedralAngle( A_, B_, C_ ); + cos_gamma = CosDihedralAngle( A_, C_, B_ ); + + // Compute the (dihedral) angles. + + alpha = ArcCos( cos_alpha ); + beta = ArcCos( cos_beta ); + gamma = ArcCos( cos_gamma ); + + // Compute the solid angle of the spherical triangle. + + area = alpha + beta + gamma - Pi; + + // Compute the orientation of the triangle. + + orient = Sign( A_ * ( B_ ^ C_ ) ); + + // Initialize three variables that are used for sampling the triangle. + + U = Unit( C_ / A_ ); // In plane of AC orthogonal to A. + sin_alpha = sin( alpha ); + product = sin_alpha * cos_c; + } + + /*-------------------------------------------------------------------------* + * Init * + * * + * Initialize all fields. Create a null spherical triangle. * + * * + *-------------------------------------------------------------------------*/ + void SphericalTriangle::Init() + { + a_ = 0; A_ = 0; cos_alpha = 0; cos_a = 0; alpha = 0; + b_ = 0; B_ = 0; cos_beta = 0; cos_b = 0; beta = 0; + c_ = 0; C_ = 0; cos_gamma = 0; cos_c = 0; gamma = 0; + area = 0; + orient = 0; + sin_alpha = 0; + product = 0; + U = 0; + } + + /*-------------------------------------------------------------------------* + * "( A, B, C )" operator. * + * * + * Construct the spherical triange from three vertices. Assume that the * + * sphere is centered at the origin. The vectors A, B, and C need not * + * be normalized. * + * * + *-------------------------------------------------------------------------*/ + SphericalTriangle & SphericalTriangle::operator()( + const Vec3 &A0, + const Vec3 &B0, + const Vec3 &C0 ) + { + Init( A0, B0, C0 ); + return *this; + } + + /*-------------------------------------------------------------------------* + * Inside * + * * + * Determine if the vector W is inside the triangle. W need not be a * + * unit vector * + * * + *-------------------------------------------------------------------------*/ + int SphericalTriangle::Inside( const Vec3 &W ) const + { + Vec3 Z = Orient() * W; + if( Z * ( A() ^ B() ) < 0.0 ) return 0; + if( Z * ( B() ^ C() ) < 0.0 ) return 0; + if( Z * ( C() ^ A() ) < 0.0 ) return 0; + return 1; + } + + /*-------------------------------------------------------------------------* + * Chart * + * * + * Generate samples from the current spherical triangle. If x1 and x2 are * + * random variables uniformly distributed over [0,1], then the returned * + * points are uniformly distributed over the solid angle. * + * * + *-------------------------------------------------------------------------*/ + Vec3 SphericalTriangle::Chart( float x1, float x2 ) const + { + // Use one random variable to select the area of the sub-triangle. + // Save the sine and cosine of the angle phi. + + register float phi = x1 * area - Alpha(); + register float s = sin( phi ); + register float t = cos( phi ); + + // Compute the pair (u,v) that determines the new angle beta. + + register float u = t - cos_alpha; + register float v = s + product ; // sin_alpha * cos_c + + // Compute the cosine of the new edge b. + + float q = ( cos_alpha * ( v * t - u * s ) - v ) / + ( sin_alpha * ( u * t + v * s ) ); + + // Compute the third vertex of the sub-triangle. + + Vec3 C_new = q * A() + Sqrt( 1.0 - q * q ) * U; + + // Use the other random variable to select the height z. + + float z = 1.0 - x2 * ( 1.0 - C_new * B() ); + + // Construct the corresponding point on the sphere. + + Vec3 D = C_new / B(); // Remove B component of C_new. + return z * B() + Sqrt( ( 1.0 - z * z ) / ( D * D ) ) * D; + } + + /*-------------------------------------------------------------------------* + * Coord * + * * + * Compute the two coordinates (x1,x2) corresponding to a point in the * + * spherical triangle. This is the inverse of "Chart". * + * * + *-------------------------------------------------------------------------*/ + Vec2 SphericalTriangle::Coord( const Vec3 &P1 ) const + { + Vec3 P = Unit( P1 ); + + // Compute the new C vertex, which lies on the arc defined by B-P + // and the arc defined by A-C. + + Vec3 C_new = Unit( ( B() ^ P ) ^ ( C() ^ A() ) ); + + // Adjust the sign of C_new. Make sure it's on the arc between A and C. + + if( C_new * ( A() + C() ) < 0.0 ) C_new = -C_new; + + // Compute x1, the area of the sub-triangle over the original area. + + float cos_beta = CosDihedralAngle( A(), B(), C_new ); + float cos_gamma = CosDihedralAngle( A(), C_new , B() ); + float sub_area = Alpha() + acos( cos_beta ) + acos( cos_gamma ) - Pi; + float x1 = sub_area / SolidAngle(); + + // Now compute the second coordinate using the new C vertex. + + float z = P * B(); + float x2 = ( 1.0 - z ) / ( 1.0 - C_new * B() ); + + if( x1 < 0.0 ) x1 = 0.0; if( x1 > 1.0 ) x1 = 1.0; + if( x2 < 0.0 ) x2 = 0.0; if( x2 > 1.0 ) x2 = 1.0; + return Vec2( x1, x2 ); + } + + /*-------------------------------------------------------------------------* + * Dual * + * * + * Construct the dual triangle of the current triangle, which is another * + * spherical triangle. * + * * + *-------------------------------------------------------------------------*/ + SphericalTriangle SphericalTriangle::Dual() const + { + Vec3 dual_A = B() ^ C(); if( dual_A * A() < 0.0 ) dual_A *= -1.0; + Vec3 dual_B = A() ^ C(); if( dual_B * B() < 0.0 ) dual_B *= -1.0; + Vec3 dual_C = A() ^ B(); if( dual_C * C() < 0.0 ) dual_C *= -1.0; + return SphericalTriangle( dual_A, dual_B, dual_C ); + } + + /*-------------------------------------------------------------------------* + * VecIrrad * + * * + * Return the "vector irradiance" due to a light source of unit brightness * + * whose spherical projection is this spherical triangle. The negative of * + * this vector dotted with the surface normal gives the (scalar) * + * irradiance at the origin. * + * * + *-------------------------------------------------------------------------*/ + Vec3 SphericalTriangle::VecIrrad() const + { + Vec3 Phi = + a() * Unit( B() ^ C() ) + + b() * Unit( C() ^ A() ) + + c() * Unit( A() ^ B() ) ; + if( Orient() ) Phi *= -1.0; + return Phi; + } + + /*-------------------------------------------------------------------------* + * New_Alpha * + * * + * Returns a new spherical triangle derived from the original one by * + * moving the "C" vertex along the edge "BC" until the new "alpha" angle * + * equals the given argument. * + * * + *-------------------------------------------------------------------------*/ + SphericalTriangle SphericalTriangle::New_Alpha( float alpha ) const + { + Vec3 V1( A() ), V2( B() ), V3( C() ); + Vec3 E1 = Unit( V2 ^ V1 ); + Vec3 E2 = E1 ^ V1; + Vec3 G = ( cos(alpha) * E1 ) + ( sin(alpha) * E2 ); + Vec3 D = Unit( V3 / V2 ); + Vec3 C2 = ((G * D) * V2) - ((G * V2) * D); + if( Triple( V1, V2, C2 ) > 0.0 ) C2 *= -1.0; + return SphericalTriangle( V1, V2, C2 ); + } + + std::ostream &operator<<( std::ostream &out, const SphericalTriangle &T ) + { + out << "SphericalTriangle:\n" + << " " << T.A() << "\n" + << " " << T.B() << "\n" + << " " << T.C() << std::endl; + return out; + } + +}; diff --git a/src/nvtt/bc6h/arvo/SphTri.h b/src/nvtt/bc6h/arvo/SphTri.h new file mode 100755 index 0000000..7336dc7 --- /dev/null +++ b/src/nvtt/bc6h/arvo/SphTri.h @@ -0,0 +1,124 @@ +/*************************************************************************** +* SphTri.h * +* * +* This file defines the SphericalTriangle class definition, which * +* supports member functions for Monte Carlo sampling, point containment, * +* and other basic operations on spherical triangles. * +* * +* Changes: * +* 01/01/2000 arvo Added New_{Alpha,Beta,Gamma} methods. * +* 12/30/1999 arvo Added VecIrrad method for "Vector Irradiance". * +* 04/08/1995 arvo Further optimized sampling algorithm. * +* 10/11/1994 arvo Added analytic sampling algorithm. * +* 06/14/1994 arvo Initial implementation. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1995, 2000, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#ifndef __SPHTRI_INCLUDED__ +#define __SPHTRI_INCLUDED__ + +#include "Vec3.h" +#include "Vec2.h" + +namespace ArvoMath { + + /* + * The (Oblique) Spherical Triangle ABC. Edge lengths (segments of great + * circles) are a, b, and c. The (dihedral) angles are Alpha, Beta, and Gamma. + * + * B + * o + * / \ + * / \ + * /Beta \ + * / \ + * c / \ a + * / \ + * / \ + * / \ + * / \ + * / \ + * /Alpha Gamma \ + * o-----------------------o + * A b C + * + */ + + class SphericalTriangle { + + public: // methods + SphericalTriangle() { Init(); } + SphericalTriangle( const SphericalTriangle &T ) { *this = T; } + SphericalTriangle( const Vec3 &, const Vec3 &, const Vec3 & ); + SphericalTriangle & operator()( const Vec3 &, const Vec3 &, const Vec3 & ); + ~SphericalTriangle( ) {} + void operator=( const SphericalTriangle &T ) { *this = T; } + Vec3 Chart ( float x, float y ) const; // Const-Jacobian map from square. + Vec2 Coord ( const Vec3 &P ) const; // Get 2D coords of a point. + int Orient( ) const { return orient; } + int Inside( const Vec3 & ) const; + float SolidAngle() const { return area; } + float SignedSolidAngle() const { return -orient * area; } // CC is pos. + const Vec3 &A() const { return A_ ; } + const Vec3 &B() const { return B_ ; } + const Vec3 &C() const { return C_ ; } + float a() const { return a_ ; } + float b() const { return b_ ; } + float c() const { return c_ ; } + float Cos_a() const { return cos_a ; } + float Cos_b() const { return cos_b ; } + float Cos_c() const { return cos_c ; } + float Alpha() const { return alpha ; } + float Beta () const { return beta ; } + float Gamma() const { return gamma ; } + float CosAlpha() const { return cos_alpha; } + float CosBeta () const { return cos_beta ; } + float CosGamma() const { return cos_gamma; } + Vec3 VecIrrad() const; // Returns the vector irradiance. + SphericalTriangle Dual() const; + SphericalTriangle New_Alpha( float alpha ) const; + SphericalTriangle New_Beta ( float beta ) const; + SphericalTriangle New_Gamma( float gamma ) const; + + private: // methods + void Init( ); + void Init( const Vec3 &A, const Vec3 &B, const Vec3 &C ); + + private: // data + Vec3 A_, B_, C_, U; // The vertices (and a temp vector). + float a_, b_, c_; // The edge lengths. + float alpha, beta, gamma; // The angles. + float cos_a, cos_b, cos_c; + float cos_alpha, cos_beta, cos_gamma; + float area; + float sin_alpha, product; // Used in sampling algorithm. + int orient; // Orientation. + }; + + inline double CosDihedralAngle( const Vec3 &A, const Vec3 &B, const Vec3 &C ) + { + float x = Unit( A ^ B ) * Unit( C ^ B ); + if( x < -1.0 ) x = -1.0; + if( x > 1.0 ) x = 1.0; + return x; + } + + inline double DihedralAngle( const Vec3 &A, const Vec3 &B, const Vec3 &C ) + { + return acos( CosDihedralAngle( A, B, C ) ); + } + + extern std::ostream &operator<<( std::ostream &out, const SphericalTriangle & ); +}; +#endif diff --git a/src/nvtt/bc6h/arvo/Token.cpp b/src/nvtt/bc6h/arvo/Token.cpp new file mode 100755 index 0000000..9575d92 --- /dev/null +++ b/src/nvtt/bc6h/arvo/Token.cpp @@ -0,0 +1,913 @@ +/*************************************************************************** +* Token.h * +* * +* The Token class ecapsulates a lexical analyzer for C++-like syntax. * +* A token instance is associated with one or more text files, and * +* grabs C++ tokens from them sequentially. There are many member * +* functions designed to make parsing easy, such as "==" operators for * +* strings and characters, and automatic conversion of numeric tokens * +* into numeric values. * +* * +* Files can be nested via #include directives, and both styles of C++ * +* comments are supported. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 10/05/99 Fixed bug in TokFrame string allocation. * +* arvo 01/15/95 Added ifdef, ifndef, else, and endif. * +* arvo 02/13/94 Added Debug() member function. * +* arvo 01/22/94 Several sections rewritten. * +* arvo 06/19/93 Converted to C++ * +* arvo 07/15/89 Rewritten for scene description parser. * +* arvo 01/22/89 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1999, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#include +#include +#include "Token.h" +#include "Char.h" + +namespace ArvoMath { + + FILE* Token::debug = NULL; // Static data member of Token class. + int Token::argc = 0; + char** Token::argv = NULL; + + typedef TokMacro *TokMacroPtr; + + static const int True = 1; + static const int False = 0; + static const int HashConst = 217; // Size of hash-table for macros. + + + TokFrame::TokFrame() + { + next = NULL; + source = NULL; + fname = NULL; + line = 0; + column = 0; + } + + TokFrame::~TokFrame() + { + if( fname != NULL ) delete[] fname; + if( source != NULL ) fclose( source ); + } + + void TokFrame::operator=( const TokFrame &frame ) + { + next = frame.next; + source = frame.source; + fname = strdup( frame.fname ); + line = frame.line; + column = frame.column; + } + + static int HashName( const char *str ) + { + static int prime[5] = { 7, 11, 17, 23, 3 }; + int k = 0; + int h = 0; + while( *str != NullChar ) + { + h += (*str++) * prime[k++]; + if( k == 5 ) k = 0; + } + if( h < 0 ) h = 0; // Check for overflow. + return h % HashConst; + } + + TokMacro *Token::MacroLookup( const char *str ) const + { + if( table == NULL ) return NULL; + int i = HashName( str ); + for( TokMacro *m = table[i]; m != NULL; m = m->next ) + { + if( strcmp( str, m->macro ) == 0 ) return m; + } + return NULL; + } + + int Token::MacroReplace( char *str, int &length, TokType &type ) const + { + TokMacro *m = MacroLookup( str ); + if( m == NULL ) return 0; + strcpy( str, m->repl ); + length = strlen( str ); + type = m->type; + return 1; + } + + /*-------------------------------------------------------------------------* + * D e b u g P r i n t * + * * + * This routine is used to record the entire token stream in a file to * + * use as a debugging aid. It does not affect the action of the lexer; * + * it merely records a "shadow" copy of all the tokens that are read by * + * ANY Token instance. The data that is written to the file is * + * * + * * + * * + *-------------------------------------------------------------------------*/ + static void DebugPrint( const Token &tok, FILE *fp ) + { + fprintf( fp, "%3d %3d ", tok.Line(), tok.Column() ); + fprintf( fp, "%s " , tok.FileName() ); + fprintf( fp, "%s\n" , tok.Spelling() ); + fflush ( fp ); + } + + /*-------------------------------------------------------------------------* + * T o k e n (Constructors) * + * * + *-------------------------------------------------------------------------*/ + Token::Token( const char *file_name ) + { + Init(); + Open( file_name ); + } + + Token::Token( FILE *fp ) + { + Init(); + Open( fp ); + } + + Token::Token( ) + { + Init(); + } + + /*-------------------------------------------------------------------------* + * T o k e n (Destructor) * + * * + * Close all files and deletes all frames and paths. * + * * + *-------------------------------------------------------------------------*/ + Token::~Token( ) + { + // Don't try to delete "frame" as its a member of this class, not + // something that we've allocated. + TokFrame *f = frame.next; + while( f != NULL ) + { + TokFrame *n = f->next; + delete f; + f = n; + } + ClearPaths(); + } + + /*-------------------------------------------------------------------------* + * O p e n * + * * + * Establish a new file to read from, either by name, or by pointer. * + * * + *-------------------------------------------------------------------------*/ + void Token::Open( const char *file_name ) + { + FILE *fp = fopen( file_name, "r" ); + if( fp == NULL ) return; + Open( fp ); + frame.fname = strdup( file_name ); + } + + void Token::Open( FILE *fp ) + { + frame.source = fp; + frame.line = 1; + frame.column = 0; + pushed = NullChar; + } + + /*-------------------------------------------------------------------------* + * O p e r a t o r == * + * * + * A token can be compared with a string, a single character, or a type. * + * * + *-------------------------------------------------------------------------*/ + int Token::operator==( const char *s ) const + { + const char *t = spelling; + if( case_sensitive ) + { + do { if( *s != *t ) return False; } + while( *s++ && *t++ ); + } + else + { + do { if( ToUpper(*s) != ToUpper(*t) ) return False; } + while( *s++ && *t++ ); + } + return True; + } + + int Token::operator==( char c ) const + { + if( length != 1 ) return False; + if( case_sensitive ) return spelling[0] == c; + else return ToUpper(spelling[0]) == ToUpper(c); + } + + int Token::operator==( TokType _type_ ) const + { + int match = 0; + switch( _type_ ) + { + case T_char : match = ( type == T_string && Len() == 1 ); break; + case T_numeric: match = ( type == T_integer || type == T_float ); break; + default : match = ( type == _type_ ); break; + } + return match; + } + + /*-------------------------------------------------------------------------* + * O p e r a t o r != * + * * + * Define negations of the three types of "==" tests. * + * * + *-------------------------------------------------------------------------*/ + int Token::operator!=( const char *s ) const { return !( *this == s ); } + int Token::operator!=( char c ) const { return !( *this == c ); } + int Token::operator!=( TokType t ) const { return !( *this == t ); } + + /*-------------------------------------------------------------------------* + * E r r o r * + * * + * Print error message to "stderr" followed by optional "name". * + * * + *-------------------------------------------------------------------------*/ + void Token::Error( TokError error, const char *name ) + { + char *s; + switch( error ) + { + case T_malformed_float : s = "malformed real number "; break; + case T_unterm_string : s = "unterminated string "; break; + case T_unterm_comment : s = "unterminated comment "; break; + case T_file_not_found : s = "include file not found: "; break; + case T_unknown_directive : s = "unknown # directive "; break; + case T_string_expected : s = "string expected "; break; + case T_putback_error : s = "putback overflow "; break; + case T_name_too_long : s = "file name is too long "; break; + case T_no_endif : s = "#endif directive missing"; break; + case T_extra_endif : s = "#endif with no #ifdef "; break; + case T_extra_else : s = "#else with no #ifdef "; break; + default : s = "unknown error type "; break; + } + fprintf( stderr, "LEXICAL ERROR, line %d, column %d: %s", + frame.line, frame.column, s ); + if( name == NULL ) + fprintf( stderr, " \n" ); + else fprintf( stderr, "%s\n", name ); + exit( 1 ); + } + + /*-------------------------------------------------------------------------* + * G e t c * + * * + * This routine fetches one character at a time from the current file * + * being read. It is responsible for keeping track of the column number * + * and for handling single characters that have been "put back". * + * * + *-------------------------------------------------------------------------*/ + int Token::Getc( int &c ) + { + if( pushed != NullChar ) // Return the pushed character. + { + c = pushed; + pushed = NullChar; + } + else // Get a new character from the source file. + { + c = getc( frame.source ); + frame.column++; + } + return c; + } + + /*-------------------------------------------------------------------------* + * N o n W h i t e * + * * + * This routine implements a simple finite state machine that skips * + * white space and recognizes the two styles of comments used in C++. * + * It returns the first non-white character not part of a comment. * + * * + *-------------------------------------------------------------------------*/ + int Token::NonWhite( int &c ) + { +start_state: + Getc( c ); + if( c == Space ) goto start_state; + if( c == Tab ) goto start_state; + if( c == NewLine ) goto start_new_line; + if( c == Slash ) goto start_comment; + goto return_char; + +start_comment: + Getc( c ); + if( c == Star ) goto in_comment1; + if( c == Slash ) goto in_comment2; + Unget( c ); + c = Slash; + goto return_char; + +in_comment1: + Getc( c ); + if( c == Star ) goto end_comment1; + if( c == NewLine ) goto newline_in_comment; + if( c == EOF ) goto return_char; + goto in_comment1; + +end_comment1: + Getc( c ); + if( c == Slash ) goto start_state; + if( c == NewLine ) goto newline_in_comment; + if( c == EOF ) goto unterm_comment; + goto in_comment1; + +in_comment2: + Getc( c ); + if( c == NewLine ) goto start_new_line; + if( c == EOF ) goto return_char; + goto in_comment2; + +unterm_comment: + Error( T_unterm_comment ); + c = EOF; + goto return_char; + +start_new_line: + frame.line++; + frame.column = 0; + goto start_state; + +newline_in_comment: + frame.line++; + frame.column = 0; + goto in_comment1; + +return_char: + Tcolumn = frame.column; // This is where the token starts. + return c; + } + + /*-------------------------------------------------------------------------* + * N e x t R a w T o k * + * * + *-------------------------------------------------------------------------*/ + int Token::NextRawTok( ) + { + static int Trans0[] = { 0, 1, 3, 3, 3 }; // Found a digit. + static int Trans1[] = { 5, 6, 4, 6, 7 }; // Found a sign. + static int Trans2[] = { 1, 6, 7, 6, 7 }; // Found decimal point. + static int Trans3[] = { 2, 2, 7, 6, 7 }; // Found an exponent. + static int Trans4[] = { 5, 6, 7, 6, 7 }; // Found something else. + char *tok = spelling; + int state; + int c; + + length = 0; + type = T_null; + + // Skip comments and whitespace. + + if( NonWhite( c ) == EOF ) goto endtok; + + // Is this the beginning of an identifier? If so, get the rest. + + if( isAlpha( c ) ) + { + type = T_ident; + do { + *tok++ = c; + length++; + if( Getc( c ) == EOF ) goto endtok; + } + while( isAlpha( c ) || isDigit( c ) || c == Underscore ); + Unget( c ); + goto endtok; + } + + // Is this the beginning of a number? + + else if( isDigit( c ) || c == Minus || c == Period ) + { + char c1 = c; + state = 0; + for(;;) + { + *tok++ = c; + length++; + switch( Getc( c ) ) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': state = Trans0[ state ]; break; + case '+': + case '-': state = Trans1[ state ]; break; + case '.': state = Trans2[ state ]; break; + case 'e': + case 'E': state = Trans3[ state ]; break; + default : state = Trans4[ state ]; break; + } + switch( state ) + { + case 5 : Unget( c ); + type = ( c1 == Period ) ? T_float : T_integer; + goto endtok; + case 6 : Unget( c ); type = T_float ; goto endtok; + case 7 : Error( T_malformed_float ) ; break; + default: continue; + } + } // for + } // if numeric + + // Is this the beginning of an operator? + + if( c == '*' || c == '>' || c == '<' || c == '+' || c == '-' || c == '!' ) + { + char oldc = c; + type = T_other; + *tok++ = c; + length++; + if( Getc( c ) == EOF ) goto endtok; + if( c == oldc || c == EqualSign ) + { + *tok++ = c; + length++; + } + else Unget( c ); + goto endtok; + } + + // Is this the beginning of a string? + + else if( c == DoubleQuote ) + { + type = T_string; + while( Getc( c ) != EOF && length < MaxTokenLen ) + { + if( c == DoubleQuote ) goto endtok; + *tok++ = c; + length++; + } + Error( T_unterm_string ); + } + + // Is this the beginning of a "#" directive? + + else if( c == Hash ) + { + type = T_directive; + NonWhite( c ); + while( isAlpha( c ) ) + { + *tok++ = c; + length++; + Getc( c ); + } + Unget( c ); + goto endtok; + } + + // This must be a one-character token. + + else + { + *tok++ = c; + length = 1; + type = T_other; + } + +endtok: // Jump to here when token is completed. + + *tok = NullChar; // Terminate the string. + if( debug != NULL ) DebugPrint( *this, debug ); + + return length; + } + + /*-------------------------------------------------------------------------* + * N e x t T o k * + * * + *-------------------------------------------------------------------------*/ + int Token::NextTok( ) + { + NextRawTok(); + + // If the token is an identifier, see if it's a macro. + // If the macro substitution is null, get another token. + + if( type == T_ident ) + { + if( table != NULL ) + { + if( MacroReplace( spelling, length, type ) && debug != NULL ) + DebugPrint( *this, debug ); + } + if( type == T_nullmacro ) NextTok(); + } + return length; + } + + /*-------------------------------------------------------------------------* + * O p e r a t o r - - * + * * + * Puts back the last token found. Only one token can be put back. * + * * + *-------------------------------------------------------------------------*/ + Token & Token::operator--( ) // Put the last token back. + { + if( put_back ) Error( T_putback_error ); // Can only handle one putback. + put_back = 1; + return *this; + } + + Token & Token::operator--( int ) // Postfix decrement. + { + fprintf( stderr, "Postfix decrement is not implemented for the Token class.\n" ); + return *this; + } + + /*-------------------------------------------------------------------------* + * H a n d l e D i r e c t i v e * + * * + * Directive beginning with "#" must be handled by the lexer, as they * + * determine the current source file via "#include", etc. * + * * + * Returns 1 if, after handling this directive, we now have the next * + * token. * + * * + *-------------------------------------------------------------------------*/ + int Token::HandleDirective( ) + { + FILE *fp; + char name[128]; + if( *this == "define" ) + { + NextRawTok(); + strcpy( tempbuff, Spelling() ); // This is the macro name. + int line = Line(); + NextRawTok(); + if( Line() == line ) + AddMacro( tempbuff, Spelling(), Type() ); + else + { + // If next token is on a different line; we went too far. + AddMacro( tempbuff, "", T_nullmacro ); + return 1; // Signal that we already have the next token. + } + } + else if( *this == "include" ) + { + NextRawTok(); + if( *this == "<" ) + { + GetName( name, sizeof(name) ); + PushFrame( ResolveName( name ), name ); + } + else if( type == T_string ) + { + fp = fopen( spelling, "r" ); + if( fp == NULL ) Error( T_file_not_found, spelling ); + else PushFrame( fp, spelling ); + } + else Error( T_string_expected ); + } + else if( *this == "ifdef" ) + { + NextRawTok(); + TokMacro *m = MacroLookup( Spelling() ); + if( m == NULL ) // Skip until else or endif. + { + while( *this != T_null ) + { + NextRawTok(); + if( *this != T_directive ) continue; + if( *this == "endif" ) break; + if( *this == "else" ) { if_nesting++; break; } // Like m != NULL. + } + if( *this == T_null ) Error( T_no_endif ); + return 0; // Ready to get the next token. + } + else if_nesting++; + } + else if( *this == "ifndef" ) + { + NextRawTok(); + TokMacro *m = MacroLookup( Spelling() ); + if( m != NULL ) // Skip until else or endif. + { + while( *this != T_null ) + { + NextRawTok(); + if( *this != T_directive ) continue; + if( *this == "endif" ) break; + if( *this == "else" ) { if_nesting++; break; } // Like m == NULL. + } + if( *this == T_null ) Error( T_no_endif ); + return 0; // Ready to get the next token. + } + else if_nesting++; + } + else if( *this == "else" ) // Skip until #endif. + { + if( if_nesting == 0 ) Error( T_extra_else ); + while( *this != T_null ) + { + NextRawTok(); + if( *this == T_directive && *this == "endif" ) break; + } + if( *this == T_null ) Error( T_no_endif ); + if_nesting--; + return 0; // Ready to get next token. + } + else if( *this == "endif" ) + { + if( if_nesting == 0 ) Error( T_extra_endif ); + if_nesting--; + return 0; // Ready to get next token. + } + else if( *this == "error" ) + { + int line = Line(); + NextTok(); // Allow macro substitution. + if( Line() == line ) + { + fprintf( stderr, "(preprocessor, line %d) %s\n", line, Spelling() ); + return 0; // Ready to get next token. + } + else + { + // If next token is on a different line; we went too far. + fprintf( stderr, "(null preprocessor message, line %d)\n", line ); + return 1; // Signal that we already have the next token. + } + } + return 0; + } + + + /*-------------------------------------------------------------------------* + * O p e r a t o r + + * + * * + * Grab the next token from the current source file. If at end of file, * + * pick up where we left off in the previous file. If there is no * + * previous file, return "T_null". * + * * + *-------------------------------------------------------------------------*/ + Token & Token::operator++( ) + { + if( put_back ) + { + put_back = 0; + return *this; + } + + // If we've reached the end of an include file, pop the stack. + + for(;;) + { + NextTok(); + if( type == T_directive ) + { + if( HandleDirective() ) break; + } + else if( type == T_null ) + { + fclose( frame.source ); + if( !PopFrame() ) break; + } + else break; // We have a real token. + } + + // Now fill in the value fields if the token is a number. + + switch( type ) + { + case T_integer : ivalue = atoi( spelling ); break; + case T_float : fvalue = atof( spelling ); break; + case T_null : if( if_nesting > 0 ) Error( T_no_endif ); break; + default : break; + } + + return *this; + } + + Token & Token::operator++( int ) + { + fprintf( stderr, "Postfix increment is not implemented for the Token class.\n" ); + return *this; + } + + /*-------------------------------------------------------------------------* + * T o k e n Push & Pop Frame * + * * + * These functions are used to create and destroy the context "frames" * + * that are used to handle nested files (via "include"). * + * * + *-------------------------------------------------------------------------*/ + void Token::PushFrame( FILE *fp, char *fname ) + { + // Create a copy of the current (top-level) frame. + + TokFrame *n = new TokFrame; + *n = frame; + + // Now overwrite the top-level frame with the new state. + + frame.next = n; + frame.source = fp; + frame.line = 1; + frame.column = 0; + frame.fname = strdup( fname ); + pushed = NullChar; + } + + int Token::PopFrame() + { + if( frame.next == NULL ) return 0; + TokFrame *old = frame.next; + frame = *old; + delete old; // Delete the frame that we just copied from. + return 1; + } + + /*-------------------------------------------------------------------------* + * Miscellaneous Functions * + * * + *-------------------------------------------------------------------------*/ + void Token::Init() + { + case_sensitive = 1; + put_back = 0; + pushed = NullChar; + if_nesting = 0; + frame.source = NULL; + frame.next = NULL; + frame.fname = NULL; + first = NULL; + last = NULL; + table = NULL; + pushed = NullChar; + SearchArgs(); // Search command-line args for macro definitions. + } + + const char* Token::Spelling() const + { + return spelling; + } + + char Token::Char() const + { + return spelling[0]; + } + + const char* Token::FileName() const + { + static char *null_string = ""; + if( frame.fname == NULL ) return null_string; + else return frame.fname; + } + + float Token::Fvalue() const + { + float val = 0.0; + if( type == T_float ) val = fvalue; + if( type == T_integer ) val = ivalue; + return val; + } + + void Token::GetName( char *name, int max ) + { + int c; + for( int i = 1; i < max; i++ ) + { + if( NonWhite(c) == '>' ) + { + *name = NullChar; + return; + } + *name++ = c; + } + Error( T_name_too_long ); + } + + void Token::AddPath( const char *new_path ) + { + char *name = strdup( new_path ); + if( name == NULL ) return; + TokPath *p = new TokPath; + p->next = NULL; + p->path = name; + if( first == NULL ) first = p; + else last->next = p; + last = p; + } + + void Token::ClearPaths() + { + TokPath *p = first; + while( p != NULL ) + { + TokPath *q = p->next; + delete[] p->path; // delete the string. + delete p; // delete the path structure. + p = q; + } + first = NULL; + last = NULL; + } + + FILE *Token::ResolveName( const char *name ) + { + char resolved[128]; + for( const TokPath *p = first; p != NULL; p = p->next ) + { + strcpy( resolved, p->path ); + strcat( resolved, "/" ); + strcat( resolved, name ); + FILE *fp = fopen( resolved, "r" ); + if( fp != NULL ) return fp; + } + Error( T_file_not_found, name ); + return NULL; + } + + void Token::CaseSensitive( int on_off = 1 ) + { + case_sensitive = on_off; + } + + void Token::Debug( FILE *fp ) + { + debug = fp; + } + + void Token::AddMacro( const char *macro, const char *repl, TokType t ) + { + if( table == NULL ) // Create and initialize the table. + { + table = new TokMacroPtr[ HashConst ]; + for( int j = 0; j < HashConst; j++ ) table[j] = NULL; + } + int i = HashName( macro ); + TokMacro *m = new TokMacro; + m->next = table[i]; + m->macro = strdup( macro ); + m->repl = strdup( repl ); + m->type = t; + table[i] = m; + } + + void Token::Args( int argc_, char *argv_[] ) + { + argc = argc_; // Set the static variables. + argv = argv_; + } + + void Token::SearchArgs( ) + { + TokType type = T_null; + for( int i = 1; i < argc; i++ ) + { + if( strcmp( argv[i], "-macro" ) == 0 ) + { + if( i+2 >= argc ) + { + fprintf( stderr, "(Token) ERROR macro argument(s) missing\n" ); + return; + } + char *macro = argv[i+1]; + char *repl = argv[i+2]; + if( isAlpha ( repl[0] ) ) type = T_ident ; else + if( isInteger( repl ) ) type = T_integer; else + type = T_float ; + AddMacro( macro, repl, type ); + i += 2; + } + } + } +}; diff --git a/src/nvtt/bc6h/arvo/Token.h b/src/nvtt/bc6h/arvo/Token.h new file mode 100755 index 0000000..eabdacc --- /dev/null +++ b/src/nvtt/bc6h/arvo/Token.h @@ -0,0 +1,203 @@ +/*************************************************************************** +* Token.h * +* * +* The Token class ecapsulates a lexical analyzer for C++-like syntax. * +* A token instance is associated with one or more text files, and * +* grabs C++ tokens from them sequentially. There are many member * +* functions designed to make parsing easy, such as "==" operators for * +* strings and characters, and automatic conversion of numeric tokens * +* into numeric values. * +* * +* Files can be nested via #include directives, and both styles of C++ * +* comments are supported. * +* * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 10/05/99 Fixed bug in TokFrame string allocation. * +* arvo 01/15/95 Added ifdef, ifndef, else, and endif. * +* arvo 02/13/94 Added Debug() member function. * +* arvo 01/22/94 Several sections rewritten. * +* arvo 06/19/93 Converted to C++ * +* arvo 07/15/89 Rewritten for scene description parser. * +* arvo 01/22/89 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1999, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#ifndef __TOKEN_INCLUDED__ +#define __TOKEN_INCLUDED__ + +#include +#include + +namespace ArvoMath { + + const int MaxTokenLen = 128; + + typedef enum { + T_null, + T_char, // A string of length 1. + T_string, + T_integer, + T_float, + T_ident, + T_other, + T_numeric, // Either T_float or T_int (use with == operator). + T_directive, // Directives like #include are not returned to the user. + T_nullmacro + } TokType; + + typedef enum { + T_malformed_float, + T_unterm_string, + T_unterm_comment, + T_file_not_found, + T_unknown_directive, + T_string_expected, + T_putback_error, + T_name_too_long, + T_no_endif, + T_extra_endif, + T_extra_else + } TokError; + + class TokFrame { + public: + TokFrame(); + TokFrame( const TokFrame &frame ) { *this = frame; } + ~TokFrame(); + void operator=( const TokFrame & ); + public: + TokFrame *next; + FILE *source; + char *fname; + int line; + int column; + }; + + struct TokPath { + char *path; + TokPath *next; + }; + + struct TokMacro { + char *macro; + char *repl; + TokType type; + TokMacro *next; + }; + + class Token { + + public: + // Constructors and destructor. + + Token(); + Token( const char *file_name ); + Token( FILE *file_pointer ); + ~Token(); + + // Const data members for querying token information. + + TokType Type() const { return type; } // The type of token found. + int Len() const { return length; } // The length of the token. + int Line() const { return frame.line; } // The line it was found on. + int Column() const { return Tcolumn; } // The column it began in. + long Ivalue() const { return ivalue; } // Token value if an integer. + float Fvalue() const; // Token value if int or float. + char Char() const; // The token (if a Len() == 1). + + // Operators. + + int operator == ( const char* ) const; // 1 if strings match. + int operator != ( const char* ) const; // 0 if strings match. + int operator == ( char ) const; // 1 if token is this char. + int operator != ( char ) const; // 0 if token is this char. + int operator == ( TokType ) const; // 1 if token is of this type. + int operator != ( TokType ) const; // 0 if token is of this type. + Token & operator ++ ( ); // (prefix) Get the next token. + Token & operator -- ( ); // (prefix) Put back one token. + Token & operator ++ ( int ); // (postfix) Undefined. + Token & operator -- ( int ); // (postfix) Undefined. + + // State-setting member functions. + + void Open( FILE * ); // Read already opened file. + void Open( const char * ); // Open the named file. + void CaseSensitive( int on_off ); // Applies to == and != operators. + void AddPath( const char * ); // Adds path for <...> includes. + void ClearPaths(); // Remove all search paths. + + // Miscellaneous. + + const char* Spelling() const; // The token itself. + const char* FileName() const; // Current file being lexed. + static void Debug( FILE * ); // Write all token streams to a file. + static void Args ( int argc, char *argv[] ); // Search args for macro settings. + void AddMacro( const char*, const char*, TokType type ); + void SearchArgs(); + + private: + + // Private member functions. + + void Init(); + int Getc ( int & ); + void Unget( int c ) { pushed = c; } + void Error( TokError error, const char *name = NULL ); + int NonWhite( int & ); + int HandleDirective(); + int NextRawTok(); // No macro substitutions. + int NextTok(); + void PushFrame( FILE *fp, char *fname = NULL ); + int PopFrame(); + void GetName( char *name, int max ); + FILE *ResolveName( const char *name ); + TokMacro *MacroLookup( const char *str ) const; + int MacroReplace( char *str, int &length, TokType &type ) const; + + // Private data members. + + TokPath *first; + TokPath *last; + TokMacro **table; + TokFrame frame; + TokType type; + long ivalue; + float fvalue; + int length; + int Tcolumn; + int put_back; + int case_sensitive; + int pushed; + int if_nesting; + char spelling[ MaxTokenLen ]; + char tempbuff[ MaxTokenLen ]; + + // Static data members. + + static int argc; + static char **argv; + static FILE *debug; + }; + + + // Predicate-style functions for testing token types. + + inline int Null ( const Token &t ) { return t.Type() == T_null; } + inline int Numeric( const Token &t ) { return t.Type() == T_numeric; } + inline int StringP( const Token &t ) { return t.Type() == T_string; } +}; +#endif diff --git a/src/nvtt/bc6h/arvo/Vec2.cpp b/src/nvtt/bc6h/arvo/Vec2.cpp new file mode 100755 index 0000000..cca6723 --- /dev/null +++ b/src/nvtt/bc6h/arvo/Vec2.cpp @@ -0,0 +1,94 @@ +/*************************************************************************** +* Vec2.C * +* * +* Basic operations on 2-dimensional vectors. This special case is useful * +* because nearly all operations are performed inline. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 05/22/98 Added TimedVec2, extending Vec2. * +* arvo 06/17/93 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1999, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#include +#include "ArvoMath.h" +#include "Vec2.h" +#include "form.h" + +namespace ArvoMath { + + const Vec2 Vec2::Zero; + const Vec2 Vec2::Xaxis( 1, 0 ); + const Vec2 Vec2::Yaxis( 0, 1 ); + + // Most routines are now inline. + + float Normalize( Vec2 &A ) + { + float d = Len( A ); + if( d != 0.0 ) + { + A.X() /= d; + A.Y() /= d; + } + return d; + } + + Vec2 Min( const Vec2 &A, const Vec2 &B ) + { + return Vec2( Min( A.X(), B.X() ), Min( A.Y(), B.Y() ) ); + } + + Vec2 Max( const Vec2 &A, const Vec2 &B ) + { + return Vec2( Max( A.X(), B.X() ), Max( A.Y(), B.Y() ) ); + } + + std::ostream &operator<<( std::ostream &out, const Vec2 &A ) + { + out << form( " %9.5f %9.5f\n", A.X(), A.Y() ); + return out; + } + + std::ostream &operator<<( std::ostream &out, const Mat2x2 &M ) + { + out << form( " %9.5f %9.5f\n", M(0,0), M(0,1) ) + << form( " %9.5f %9.5f\n", M(1,0), M(1,1) ) + << std::endl; + return out; + } + + Mat2x2::Mat2x2( const Vec2 &c1, const Vec2 &c2 ) + { + m[0][0] = c1.X(); + m[1][0] = c1.Y(); + m[0][1] = c2.X(); + m[1][1] = c2.Y(); + } + + // Return solution x of the system Ax = b. + Vec2 Solve( const Mat2x2 &A, const Vec2 &b ) + { + float MachEps = MachineEpsilon(); + Vec2 x; + double d = det( A ); + double n = Norm1( A ); + if( n <= MachEps || Abs(d) <= MachEps * n ) return Vec2::Zero; + x.X() = A(1,1) * b.X() - A(0,1) * b.Y(); + x.Y() = -A(1,0) * b.X() + A(0,0) * b.Y(); + return x / d; + } +}; diff --git a/src/nvtt/bc6h/arvo/Vec2.h b/src/nvtt/bc6h/arvo/Vec2.h new file mode 100755 index 0000000..7aca458 --- /dev/null +++ b/src/nvtt/bc6h/arvo/Vec2.h @@ -0,0 +1,358 @@ +/*************************************************************************** +* Vec2.h * +* * +* Basic operations on 2-dimensional vectors. This special case is useful * +* because nearly all operations are performed inline. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 05/22/98 Added TimedVec2, extending Vec2. * +* arvo 06/17/93 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1999, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#ifndef __VEC2_INCLUDED__ +#define __VEC2_INCLUDED__ + +#include +#include +#include "ArvoMath.h" + +namespace ArvoMath { + + class Vec2; // 2-D floating-point vector. + class TimedVec2; // 2-D vector with a time stamp. + class Mat2x2; // 2x2 floating-point matrix. + + class Vec2 { + public: + Vec2( ) { x = 0.0; y = 0.0; } + Vec2( float a, float b ) { x = a; y = b; } + Vec2( const Vec2 &A ) { x = A.X(); y = A.Y(); } + ~Vec2() {} + Vec2 &operator=( float s ) { return Set( s, s ); } + Vec2 &operator=( const Vec2 &A ) { return Set( A.X(), A.Y() ); } + float X() const { return x; } + float Y() const { return y; } + float &X() { return x; } + float &Y() { return y; } + float operator[]( int i ) const { return *( &x + i ); } + float &operator[]( int i ) { return *( &x + i ); } + Vec2 &Set( float a, float b ) { x = a; y = b; return *this; } + Vec2 &Set( const Vec2 &A ) { return Set( A.X(), A.Y() ); } + public: + static const Vec2 Zero; + static const Vec2 Xaxis; + static const Vec2 Yaxis; + protected: + float x, y; + }; + + // This class simply adds a time field to the Vec2 class so that time-stamped + // coordinates can be easily inserted into objects such as Polylines. + + class TimedVec2 : public Vec2 { + public: + TimedVec2() { time = 0; } + TimedVec2( const Vec2 &p , long u = 0 ) { Set( p ); time = u; } + TimedVec2( float x, float y, long u = 0 ) { Set(x,y); time = u; } + ~TimedVec2() {} + Vec2 &Coord() { return *this; } + Vec2 Coord() const { return *this; } + long Time () const { return time; } + void SetTime( long u ) { time = u; } + protected: + long time; + }; + + class Mat2x2 { + public: + Mat2x2( ) { Set( 0, 0, 0, 0 ); } + Mat2x2( float a, float b, float c, float d ) { Set( a, b, c, d ); } + Mat2x2( const Vec2 &c1, const Vec2 &c2 ); + ~Mat2x2( ) {} + Mat2x2 &operator*=( float scale ); + Mat2x2 operator* ( float scale ) const; + void Set( float a, float b, float c, float d ) + { m[0][0] = a; m[0][1] = b; m[1][0] = c; m[1][1] = d; } + float operator()( int i, int j ) const { return m[i][j]; } + float &operator()( int i, int j ) { return m[i][j]; } + private: + float m[2][2]; + }; + + + //========================================== + //=== Miscellaneous external functions === + //========================================== + + extern float Normalize( Vec2 &A ); + extern Vec2 Min ( const Vec2 &A, const Vec2 &B ); + extern Vec2 Max ( const Vec2 &A, const Vec2 &B ); + + + //========================================== + //=== Norm-related functions === + //========================================== + + inline double LenSqr ( const Vec2 &A ) { return Sqr(A[0]) + Sqr(A[1]); } + inline double Len ( const Vec2 &A ) { return sqrt( LenSqr( A ) ); } + inline double OneNorm( const Vec2 &A ) { return Abs( A.X() ) + Abs( A.Y() ); } + inline double TwoNorm( const Vec2 &A ) { return Len(A); } + inline float SupNorm( const Vec2 &A ) { return MaxAbs( A.X(), A.Y() ); } + + + //========================================== + //=== Addition === + //========================================== + + inline Vec2 operator+( const Vec2 &A, const Vec2 &B ) + { + return Vec2( A.X() + B.X(), A.Y() + B.Y() ); + } + + inline Vec2& operator+=( Vec2 &A, const Vec2 &B ) + { + A.X() += B.X(); + A.Y() += B.Y(); + return A; + } + + + //========================================== + //=== Subtraction === + //========================================== + + inline Vec2 operator-( const Vec2 &A, const Vec2 &B ) + { + return Vec2( A.X() - B.X(), A.Y() - B.Y() ); + } + + inline Vec2 operator-( const Vec2 &A ) + { + return Vec2( -A.X(), -A.Y() ); + } + + inline Vec2& operator-=( Vec2 &A, const Vec2 &B ) + { + A.X() -= B.X(); + A.Y() -= B.Y(); + return A; + } + + + //========================================== + //=== Multiplication === + //========================================== + + inline Vec2 operator*( float c, const Vec2 &A ) + { + return Vec2( c * A.X(), c * A.Y() ); + } + + inline Vec2 operator*( const Vec2 &A, float c ) + { + return Vec2( c * A.X(), c * A.Y() ); + } + + inline float operator*( const Vec2 &A, const Vec2 &B ) // Inner product + { + return A.X() * B.X() + A.Y() * B.Y(); + } + + inline Vec2& operator*=( Vec2 &A, float c ) + { + A.X() *= c; + A.Y() *= c; + return A; + } + + //========================================== + //=== Division === + //========================================== + + inline Vec2 operator/( const Vec2 &A, float c ) + { + return Vec2( A.X() / c, A.Y() / c ); + } + + inline Vec2 operator/( const Vec2 &A, const Vec2 &B ) + { + return A - B * (( A * B ) / LenSqr( B )); + } + + + //========================================== + //=== Comparison === + //========================================== + + inline int operator==( const Vec2 &A, const Vec2 &B ) + { + return A.X() == B.X() && A.Y() == B.Y(); + } + + inline int operator!=( const Vec2 &A, const Vec2 &B ) + { + return A.X() != B.X() || A.Y() != B.Y(); + } + + inline int operator<=( const Vec2 &A, const Vec2 &B ) + { + return A.X() <= B.X() && A.Y() <= B.Y(); + } + + inline int operator<( const Vec2 &A, const Vec2 &B ) + { + return A.X() < B.X() && A.Y() < B.Y(); + } + + inline int operator>=( const Vec2 &A, const Vec2 &B ) + { + return A.X() >= B.X() && A.Y() >= B.Y(); + } + + inline int operator>( const Vec2 &A, const Vec2 &B ) + { + return A.X() > B.X() && A.Y() > B.Y(); + } + + //========================================== + //=== Miscellaneous === + //========================================== + + inline float operator|( const Vec2 &A, const Vec2 &B ) // Inner product + { + return A * B; + } + + inline Vec2 Unit( const Vec2 &A ) + { + float c = LenSqr( A ); + if( c > 0.0 ) c = 1.0 / sqrt( c ); + return c * A; + } + + inline Vec2 Unit( const Vec2 &A, float &len ) + { + float c = LenSqr( A ); + if( c > 0.0 ) + { + len = sqrt( c ); + return A / len; + } + len = 0.0; + return A; + } + + inline Vec2 Unit( float x, float y ) + { + return Unit( Vec2( x, y ) ); + } + + inline double dist( const Vec2 &A, const Vec2 &B ) + { + return Len( A - B ); + } + + inline float operator^( const Vec2 &A, const Vec2 &B ) + { + return A.X() * B.Y() - A.Y() * B.X(); + } + + inline int Quadrant( const Vec2 &A ) + { + if( A.Y() >= 0.0 ) return A.X() >= 0.0 ? 1 : 2; + return A.X() >= 0.0 ? 4 : 3; + } + + inline Vec2 OrthogonalTo( const Vec2 &A ) // A vector orthogonal to that given. + { + return Vec2( -A.Y(), A.X() ); + } + + inline Vec2 Interpolate( const Vec2 &A, const Vec2 &B, float t ) + { + // Compute a point along the segment joining points A and B + // according to the normalized parameter t in [0,1]. + return ( 1.0 - t ) * A + t * B; + } + + //========================================== + //=== Operations involving Matrices === + //========================================== + + inline Mat2x2 Outer( const Vec2 &A, const Vec2 &B ) // Outer product. + { + Mat2x2 C; + C(0,0) = A.X() * B.X(); + C(0,1) = A.X() * B.Y(); + C(1,0) = A.Y() * B.X(); + C(1,1) = A.Y() * B.Y(); + return C; + } + + inline Vec2 operator*( const Mat2x2 &M, const Vec2 &A ) + { + return Vec2( + M(0,0) * A.X() + M(0,1) * A.Y(), + M(1,0) * A.X() + M(1,1) * A.Y() + ); + } + + inline Mat2x2 &Mat2x2::operator*=( float scale ) + { + m[0][0] *= scale; + m[0][1] *= scale; + m[1][0] *= scale; + m[1][1] *= scale; + return *this; + } + + inline Mat2x2 Mat2x2::operator*( float scale ) const + { + return Mat2x2( + scale * m[0][0], scale * m[0][1], + scale * m[1][0], scale * m[1][1] + ); + } + + inline Mat2x2 operator*( float scale, const Mat2x2 &M ) + { + return M * scale; + } + + inline float Norm1( const Mat2x2 &A ) + { + return Max( Abs(A(0,0)) + Abs(A(0,1)), Abs(A(1,0)) + Abs(A(1,1)) ); + } + + inline double det( const Mat2x2 &A ) + { + return A(0,0) * A(1,1) - A(1,0) * A(0,1); + } + + extern Vec2 Solve( // Return solution x of the system Ax = b. + const Mat2x2 &A, + const Vec2 &b + ); + + //========================================== + //=== Output routines === + //========================================== + + extern std::ostream &operator<<( std::ostream &out, const Vec2 & ); + extern std::ostream &operator<<( std::ostream &out, const Mat2x2 & ); +}; +#endif diff --git a/src/nvtt/bc6h/arvo/Vec3.cpp b/src/nvtt/bc6h/arvo/Vec3.cpp new file mode 100755 index 0000000..1033f84 --- /dev/null +++ b/src/nvtt/bc6h/arvo/Vec3.cpp @@ -0,0 +1,119 @@ +/*************************************************************************** +* Vec3.C * +* * +* Basic operations on 3-dimensional vectors. This special case is useful * +* because many operations are performed inline. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 10/27/94 Reorganized (removed Col & Row distinction). * +* arvo 06/14/93 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1994, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#include +#include +#include "ArvoMath.h" +#include "Vec3.h" +#include "form.h" + +namespace ArvoMath { + + float Normalize( Vec3 &A ) + { + float d = Len( A ); + if( d > 0.0 ) + { + double c = 1.0 / d; + A.X() *= c; + A.Y() *= c; + A.Z() *= c; + } + return( d ); + } + + double Angle( const Vec3 &A, const Vec3 &B ) + { + double t = LenSqr(A) * LenSqr(B); + if( t <= 0.0 ) return 0.0; + return ArcCos( (A * B) / sqrt(t) ); + } + + /*-------------------------------------------------------------------------* + * O R T H O N O R M A L * + * * + * On Input A, B....: Two linearly independent 3-space vectors. * + * * + * On Return A.......: Unit vector pointing in original A direction. * + * B.......: Unit vector orthogonal to A and in subspace spanned * + * by original A and B vectors. * + * C.......: Unit vector orthogonal to both A and B, chosen so * + * that A-B-C forms a right-handed coordinate system. * + * * + *-------------------------------------------------------------------------*/ + int Orthonormal( Vec3 &A, Vec3 &B, Vec3 &C ) + { + if( Normalize( A ) == 0.0 ) return 1; + B /= A; + if( Normalize( B ) == 0.0 ) return 1; + C = A ^ B; + return 0; + } + + int Orthonormal( Vec3 &A, Vec3 &B ) + { + if( Normalize( A ) == 0.0 ) return 1; + B /= A; + if( Normalize( B ) == 0.0 ) return 1; + return 0; + } + + /*-------------------------------------------------------------------------* + * O R T H O G O N A L T O * + * * + * Returns a vector that is orthogonal to A (but of arbitrary length). * + * * + *-------------------------------------------------------------------------*/ + Vec3 OrthogonalTo( const Vec3 &A ) + { + float c = 0.5 * SupNorm( A ); + if( c == 0.0 ) return Vec3( 1.0, 0.0, 0.0 ); + if( c <= Abs(A.X()) ) return Vec3( -A.Y(), A.X(), 0.0 ); + if( c <= Abs(A.Y()) ) return Vec3( 0.0, -A.Z(), A.Y() ); + return Vec3( A.Z(), 0.0, -A.X() ); + } + + Vec3 Min( const Vec3 &A, const Vec3 &B ) + { + return Vec3( + Min( A.X(), B.X() ), + Min( A.Y(), B.Y() ), + Min( A.Z(), B.Z() )); + } + + Vec3 Max( const Vec3 &A, const Vec3 &B ) + { + return Vec3( + Max( A.X(), B.X() ), + Max( A.Y(), B.Y() ), + Max( A.Z(), B.Z() )); + } + + std::ostream &operator<<( std::ostream &out, const Vec3 &A ) + { + out << form( " %9.5f %9.5f %9.5f", A.X(), A.Y(), A.Z() ) << std::endl; + return out; + } +}; diff --git a/src/nvtt/bc6h/arvo/Vec3.h b/src/nvtt/bc6h/arvo/Vec3.h new file mode 100755 index 0000000..b9d539f --- /dev/null +++ b/src/nvtt/bc6h/arvo/Vec3.h @@ -0,0 +1,517 @@ +/*************************************************************************** +* Vec3.h * +* * +* Basic operations on 3-dimensional vectors. This special case is useful * +* because many operations are performed inline. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 10/27/94 Reorganized (removed Col & Row distinction). * +* arvo 06/14/93 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1994, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#ifndef __VEC3_INCLUDED__ +#define __VEC3_INCLUDED__ + +#include +#include +#include "Vec2.h" + +namespace ArvoMath { + + class Vec3 { + public: + Vec3( float c = 0.0 ) { x = c; y = c; z = c; } + Vec3( float a, float b, float c ) { x = a; y = b; z = c; } + Vec3( const Vec3 &A ) { x = A.X(); y = A.Y(); z = A.Z(); } + void operator=( float c ) { x = c; y = c; z = c; } + void operator=( const Vec3 &A ) { x = A.X(); y = A.Y(); z = A.Z(); } + void operator=( const Vec2 &A ) { x = A.X(); y = A.Y(); z = 0.0; } + ~Vec3() {} + float X() const { return x; } + float Y() const { return y; } + float Z() const { return z; } + float & X() { return x; } + float & Y() { return y; } + float & Z() { return z; } + float operator[]( int i ) const { return *( &x + i ); } + float & operator[]( int i ) { return *( &x + i ); } + private: + float x, y, z; + }; + + //class Mat3x3 { + //public: + // inline Mat3x3( ); + // Mat3x3( const Mat3x3 &M ) { *this = M; } + // Mat3x3( const Vec3 &, const Vec3 &, const Vec3 & ); // Three columns. + // ~Mat3x3( ) {} + // float operator()( int i, int j ) const { return m[i][j]; } + // float & operator()( int i, int j ) { return m[i][j]; } + // Mat3x3 & operator=( float ); + // Mat3x3 & operator=( const Mat3x3 & ); + // inline void ScaleRows( float, float, float ); + // inline void ScaleCols( float, float, float ); + // void Col( int n, const Vec3 & ); + // const float *Base() const { return &(m[0][0]); } + //private: + // float m[3][3]; + //}; + + //class Mat4x4 { + //public: + // Mat4x4( ); + // Mat4x4( const Mat4x4 &M ) { *this = M; } + // Mat4x4( const Mat3x3 &M ) ; + // ~Mat4x4( ) {} + // float operator()( int i, int j ) const { return m[i][j]; } + // float & operator()( int i, int j ) { return m[i][j]; } + // Mat4x4 & operator=( float ); + // Mat4x4 & operator=( const Mat4x4 & ); + // void Row( int i, int j, const Vec3 & ); + // void Col( int i, int j, const Vec3 & ); + // void ScaleRows( float, float, float, float ); + // void ScaleCols( float, float, float, float ); + // const float *Base() const { return &(m[0][0]); } + //private: + // float m[4][4]; + //}; + + + //========================================== + //=== External operators === + //========================================== + + //extern Vec3 operator * ( const Mat4x4 &, const Vec3 & ); + //extern Vec3 operator * ( const Vec3 &, const Mat4x4 & ); + //extern Mat3x3 operator * ( float , const Mat3x3 & ); + //extern Mat3x3 operator * ( const Mat3x3 &, float ); + //extern Mat3x3 operator / ( const Mat3x3 &, double ); + //extern Mat3x3 & operator *=( Mat3x3 &, float ); + //extern Mat3x3 & operator *=( Mat3x3 &, const Mat3x3 & ); + //extern Mat3x3 operator * ( const Mat3x3 &, const Mat3x3 & ); + //extern Mat3x3 operator + ( const Mat3x3 &, const Mat3x3 & ); + //extern Mat3x3 & operator +=( Mat3x3 &, const Mat3x3 & ); + //extern Mat3x3 operator - ( const Mat3x3 &, const Mat3x3 & ); + //extern Mat3x3 & operator -=( Mat3x3 &, const Mat3x3 & ); + //extern Mat4x4 operator * ( float , const Mat4x4 & ); + //extern Mat4x4 operator * ( const Mat4x4 &, float ); + //extern Mat4x4 operator / ( const Mat4x4 &, float ); + //extern Mat4x4 & operator *=( Mat4x4 &, float ); + //extern Mat4x4 operator * ( const Mat4x4 &, const Mat4x4 & ); + //extern Mat4x4 operator + ( const Mat4x4 &, const Mat4x4 & ); + //extern Mat4x4 & operator +=( Mat4x4 &, const Mat4x4 & ); + //extern Mat4x4 operator - ( const Mat4x4 &, const Mat4x4 & ); + //extern Mat4x4 & operator -=( Mat4x4 &, const Mat4x4 & ); + + + //========================================== + //=== Miscellaneous external functions === + //========================================== + + //extern Vec3 OrthogonalTo( const Vec3 & ); // A vector orthogonal to that given. + //extern Vec3 Min ( const Vec3 &, const Vec3 & ); + //extern Vec3 Max ( const Vec3 &, const Vec3 & ); + //extern double Angle ( const Vec3 &, const Vec3 & ); + //extern int Orthonormal ( Vec3 &, Vec3 & ); + //extern int Orthonormal ( Vec3 &, Vec3 &, Vec3 & ); + //extern float Trace ( const Mat3x3 & ); + //extern float Normalize ( Vec3 & ); + //extern float Norm1 ( const Mat3x3 & ); + //extern float SupNorm ( const Mat3x3 & ); + //extern double Determinant ( const Mat3x3 & ); + //extern Mat3x3 Transp ( const Mat3x3 & ); + //extern Mat3x3 Householder ( const Vec3 &, const Vec3 & ); + //extern Mat3x3 Householder ( const Vec3 & ); + //extern Mat3x3 Rotation3x3 ( float, float, float ); // Values in [0,1]. + //extern Mat3x3 Inverse ( const Mat3x3 & ); + //extern Mat3x3 Diag3x3 ( const Vec3 & ); + //extern Mat3x3 Diag3x3 ( float, float, float ); + //extern Mat3x3 Rotation3x3 ( const Vec3 &Axis, float angle ); + //extern Mat4x4 Rotation4x4 ( const Vec3 &Axis, const Vec3 &Origin, float angle ); + + + //========================================== + //=== Norm-related functions === + //========================================== + + inline double LenSqr ( const Vec3 &A ) { return Sqr(A[0]) + Sqr(A[1]) + Sqr(A[2]); } + inline double Len ( const Vec3 &A ) { return Sqrt( LenSqr( A ) ); } + inline double Norm1 ( const Vec3 &A ) { return Abs(A[0]) + Abs(A[1]) + Abs(A[2]); } + inline double Norm2 ( const Vec3 &A ) { return Len( A ); } + inline float SupNorm( const Vec3 &A ) { return MaxAbs( A[0], A[1], A[2] ); } + + + //========================================== + //=== Addition === + //========================================== + + inline Vec3 operator+( const Vec3 &A, const Vec3 &B ) + { + return Vec3( A.X() + B.X(), A.Y() + B.Y(), A.Z() + B.Z() ); + } + + inline Vec3& operator+=( Vec3 &A, const Vec3 &B ) + { + A.X() += B.X(); + A.Y() += B.Y(); + A.Z() += B.Z(); + return A; + } + + + //========================================== + //=== Subtraction === + //========================================== + + inline Vec3 operator-( const Vec3 &A, const Vec3 &B ) + { + return Vec3( A.X() - B.X(), A.Y() - B.Y(), A.Z() - B.Z() ); + } + + inline Vec3 operator-( const Vec3 &A ) + { + return Vec3( -A.X(), -A.Y(), -A.Z() ); + } + + inline Vec3& operator-=( Vec3 &A, const Vec3 &B ) + { + A.X() -= B.X(); + A.Y() -= B.Y(); + A.Z() -= B.Z(); + return A; + } + + + //========================================== + //=== Multiplication === + //========================================== + + inline Vec3 operator*( float a, const Vec3 &x ) + { + return Vec3( a * x.X(), a * x.Y(), a * x.Z() ); + } + + inline Vec3 operator*( const Vec3 &x, float a ) + { + return Vec3( a * x.X(), a * x.Y(), a * x.Z() ); + } + + inline float operator*( const Vec3 &A, const Vec3 &B ) // Inner product. + { + return A.X() * B.X() + A.Y() * B.Y() + A.Z() * B.Z(); + } + + inline Vec3& operator*=( Vec3 &A, float a ) + { + A.X() *= a; + A.Y() *= a; + A.Z() *= a; + return A; + } + + //inline Vec3& operator*=( Vec3 &A, const Mat3x3 &M ) // A = M * A + //{ + // float x = M(0,0) * A.X() + M(0,1) * A.Y() + M(0,2) * A.Z(); + // float y = M(1,0) * A.X() + M(1,1) * A.Y() + M(1,2) * A.Z(); + // float z = M(2,0) * A.X() + M(2,1) * A.Y() + M(2,2) * A.Z(); + // A.X() = x; + // A.Y() = y; + // A.Z() = z; + // return A; + //} + + //inline Vec3& operator*=( Vec3 &A, const Mat4x4 &M ) // A = M * A + //{ + // float x = M(0,0) * A.X() + M(0,1) * A.Y() + M(0,2) * A.Z() + M(0,3); + // float y = M(1,0) * A.X() + M(1,1) * A.Y() + M(1,2) * A.Z() + M(1,3); + // float z = M(2,0) * A.X() + M(2,1) * A.Y() + M(2,2) * A.Z() + M(2,3); + // A.X() = x; + // A.Y() = y; + // A.Z() = z; + // return A; + //} + + + //========================================== + //=== Division === + //========================================== + + inline Vec3 operator/( const Vec3 &A, double c ) + { + double t = 1.0 / c; + return Vec3( A.X() * t, A.Y() * t, A.Z() * t ); + } + + inline Vec3& operator/=( Vec3 &A, double a ) + { + A.X() /= a; + A.Y() /= a; + A.Z() /= a; + return A; + } + + inline Vec3 operator/( const Vec3 &A, const Vec3 &B ) // Remove component parallel to B. + { + Vec3 C; // Cumbersome due to compiler falure. + double x = LenSqr( B ); + if( x > 0.0 ) C = A - B * (( A * B ) / x); else C = A; + return C; + } + + inline void operator/=( Vec3 &A, const Vec3 &B ) // Remove component parallel to B. + { + double x = LenSqr( B ); + if( x > 0.0 ) A -= B * (( A * B ) / x); + } + + + //========================================== + //=== Miscellaneous === + //========================================== + + inline float operator|( const Vec3 &A, const Vec3 &B ) // Inner product. + { + return A * B; + } + + inline Vec3 Unit( const Vec3 &A ) + { + double d = LenSqr( A ); + return d > 0.0 ? A / sqrt(d) : Vec3(0,0,0); + } + + inline Vec3 Unit( float x, float y, float z ) + { + return Unit( Vec3( x, y, z ) ); + } + + inline Vec3 Ortho( const Vec3 &A, const Vec3 &B ) + { + return Unit( A / B ); + } + + inline int operator==( const Vec3 &A, float x ) + { + return (A[0] == x) && (A[1] == x) && (A[2] == x); + } + + inline Vec3 operator^( const Vec3 &A, const Vec3 &B ) + { + return Vec3( + A.Y() * B.Z() - A.Z() * B.Y(), + A.Z() * B.X() - A.X() * B.Z(), + A.X() * B.Y() - A.Y() * B.X() ); + } + + inline double dist( const Vec3 &A, const Vec3 &B ) + { + return Len( A - B ); + } + + inline double Dihedral( const Vec3 &A, const Vec3 &B, const Vec3 &C ) + { + return ArcCos( Unit( A ^ B ) * Unit( C ^ B ) ); + } + + inline Vec3 operator>>( const Vec3 &A, const Vec3 &B ) // Project A onto B. + { + Vec3 C; + double x = LenSqr( B ); + if( x > 0.0 ) C = B * (( A * B ) / x); + return C; + } + + inline Vec3 operator<<( const Vec3 &A, const Vec3 &B ) // Project B onto A. + { + return B >> A; + } + + inline double Triple( const Vec3 &A, const Vec3 &B, const Vec3 &C ) + { + return ( A ^ B ) * C; + } + + + //========================================== + //=== Operations involving Matrices === + //========================================== + + //inline Mat3x3 Outer( const Vec3 &A, const Vec3 &B ) // Outer product. + //{ + // Mat3x3 C; + // C(0,0) = A.X() * B.X(); + // C(0,1) = A.X() * B.Y(); + // C(0,2) = A.X() * B.Z(); + // C(1,0) = A.Y() * B.X(); + // C(1,1) = A.Y() * B.Y(); + // C(1,2) = A.Y() * B.Z(); + // C(2,0) = A.Z() * B.X(); + // C(2,1) = A.Z() * B.Y(); + // C(2,2) = A.Z() * B.Z(); + // return C; + //} + + //inline Vec3 operator*( const Mat3x3 &M, const Vec3 &A ) + //{ + // return Vec3( + // M(0,0) * A[0] + M(0,1) * A[1] + M(0,2) * A[2], + // M(1,0) * A[0] + M(1,1) * A[1] + M(1,2) * A[2], + // M(2,0) * A[0] + M(2,1) * A[1] + M(2,2) * A[2]); + //} + + //inline Vec3 operator*( const Vec3 &A, const Mat3x3 &M ) + //{ + // return Vec3( + // A[0] * M(0,0) + A[1] * M(1,0) + A[2] * M(2,0), + // A[0] * M(0,1) + A[1] * M(1,1) + A[2] * M(2,1), + // A[0] * M(0,2) + A[1] * M(1,2) + A[2] * M(2,2)); + //} + + ////========================================== + ////=== Operations on Matrices === + ////========================================== + + //inline Mat3x3 operator+( const Mat3x3 &A, const Mat3x3 &B ) + //{ + // Mat3x3 C; + // C(0,0) = A(0,0) + B(0,0); C(0,1) = A(0,1) + B(0,1); C(0,2) = A(0,2) + B(0,2); + // C(1,0) = A(1,0) + B(1,0); C(1,1) = A(1,1) + B(1,1); C(1,2) = A(1,2) + B(1,2); + // C(2,0) = A(2,0) + B(2,0); C(2,1) = A(2,1) + B(2,1); C(2,2) = A(2,2) + B(2,2); + // return C; + //} + + //inline Mat3x3 operator-( const Mat3x3 &A, const Mat3x3 &B ) + //{ + // Mat3x3 C; + // C(0,0) = A(0,0) - B(0,0); C(0,1) = A(0,1) - B(0,1); C(0,2) = A(0,2) - B(0,2); + // C(1,0) = A(1,0) - B(1,0); C(1,1) = A(1,1) - B(1,1); C(1,2) = A(1,2) - B(1,2); + // C(2,0) = A(2,0) - B(2,0); C(2,1) = A(2,1) - B(2,1); C(2,2) = A(2,2) - B(2,2); + // return C; + //} + + //inline Mat3x3 operator*( const Mat3x3 &A, const Mat3x3 &B ) + //{ + // Mat3x3 C; + // C(0,0) = A(0,0) * B(0,0) + A(0,1) * B(1,0) + A(0,2) * B(2,0); + // C(0,1) = A(0,0) * B(0,1) + A(0,1) * B(1,1) + A(0,2) * B(2,1); + // C(0,2) = A(0,0) * B(0,2) + A(0,1) * B(1,2) + A(0,2) * B(2,2); + // C(1,0) = A(1,0) * B(0,0) + A(1,1) * B(1,0) + A(1,2) * B(2,0); + // C(1,1) = A(1,0) * B(0,1) + A(1,1) * B(1,1) + A(1,2) * B(2,1); + // C(1,2) = A(1,0) * B(0,2) + A(1,1) * B(1,2) + A(1,2) * B(2,2); + // C(2,0) = A(2,0) * B(0,0) + A(2,1) * B(1,0) + A(2,2) * B(2,0); + // C(2,1) = A(2,0) * B(0,1) + A(2,1) * B(1,1) + A(2,2) * B(2,1); + // C(2,2) = A(2,0) * B(0,2) + A(2,1) * B(1,2) + A(2,2) * B(2,2); + // return C; + //} + + //inline void Mat3x3::ScaleRows( float a, float b, float c ) + //{ + // m[0][0] *= a; m[0][1] *= a; m[0][2] *= a; + // m[1][0] *= b; m[1][1] *= b; m[1][2] *= b; + // m[2][0] *= c; m[2][1] *= c; m[2][2] *= c; + //} + + //inline void Mat3x3::ScaleCols( float a, float b, float c ) + //{ + // m[0][0] *= a; m[0][1] *= b; m[0][2] *= c; + // m[1][0] *= a; m[1][1] *= b; m[1][2] *= c; + // m[2][0] *= a; m[2][1] *= b; m[2][2] *= c; + //} + + + //========================================== + //=== Special Matrices === + //========================================== + + //inline Mat3x3::Mat3x3() + //{ + // m[0][0] = 0; m[0][1] = 0; m[0][2] = 0; + // m[1][0] = 0; m[1][1] = 0; m[1][2] = 0; + // m[2][0] = 0; m[2][1] = 0; m[2][2] = 0; + //} + + //inline Mat3x3 Ident3x3() + //{ + // Mat3x3 I; + // I(0,0) = 1.0; + // I(1,1) = 1.0; + // I(2,2) = 1.0; + // return I; + //} + + //inline Mat4x4 Ident4x4() + //{ + // Mat4x4 I; + // I(0,0) = 1.0; + // I(1,1) = 1.0; + // I(2,2) = 1.0; + // I(3,3) = 1.0; + // return I; + //} + + //inline void Adjoint( const Mat3x3 &M, Mat3x3 &A ) + //{ + // A(0,0) = M(1,1) * M(2,2) - M(1,2) * M(2,1); + // A(0,1) = M(1,2) * M(2,0) - M(1,0) * M(2,2); + // A(0,2) = M(1,0) * M(2,1) - M(1,1) * M(2,0); + + // A(1,0) = M(0,2) * M(2,1) - M(0,1) * M(2,2); + // A(1,1) = M(0,0) * M(2,2) - M(0,2) * M(2,0); + // A(1,2) = M(0,1) * M(2,0) - M(0,0) * M(2,1); + + // A(2,0) = M(0,1) * M(1,2) - M(0,2) * M(1,1); + // A(2,1) = M(0,2) * M(1,0) - M(0,0) * M(1,2); + // A(2,2) = M(0,0) * M(1,1) - M(0,1) * M(1,0); + //} + + //inline void TranspAdjoint( const Mat3x3 &M, Mat3x3 &A ) + //{ + // A(0,0) = M(1,1) * M(2,2) - M(1,2) * M(2,1); + // A(1,0) = M(1,2) * M(2,0) - M(1,0) * M(2,2); + // A(2,0) = M(1,0) * M(2,1) - M(1,1) * M(2,0); + + // A(0,1) = M(0,2) * M(2,1) - M(0,1) * M(2,2); + // A(1,1) = M(0,0) * M(2,2) - M(0,2) * M(2,0); + // A(2,1) = M(0,1) * M(2,0) - M(0,0) * M(2,1); + + // A(0,2) = M(0,1) * M(1,2) - M(0,2) * M(1,1); + // A(1,2) = M(0,2) * M(1,0) - M(0,0) * M(1,2); + // A(2,2) = M(0,0) * M(1,1) - M(0,1) * M(1,0); + //} + + //inline void Adjoint( const Mat3x3 &M, Mat3x3 &A, double &det ) + //{ + // Adjoint( M, A ); + // det = A(0,0) * M(0,0) + A(1,0) * M(1,0) + A(2,0) * M(2,0); + //} + + //inline void TranspAdjoint( const Mat3x3 &M, Mat3x3 &A, double &det ) + //{ + // TranspAdjoint( M, A ); + // det = A(0,0) * M(0,0) + A(0,1) * M(1,0) + A(0,2) * M(2,0); + //} + + + //========================================== + //=== Output routines === + //========================================== + + extern std::ostream &operator<<( std::ostream &out, const Vec3 & ); + //extern std::ostream &operator<<( std::ostream &out, const Mat3x3 & ); + //extern std::ostream &operator<<( std::ostream &out, const Mat4x4 & ); +}; +#endif diff --git a/src/nvtt/bc6h/arvo/Vector.cpp b/src/nvtt/bc6h/arvo/Vector.cpp new file mode 100755 index 0000000..af3bc11 --- /dev/null +++ b/src/nvtt/bc6h/arvo/Vector.cpp @@ -0,0 +1,366 @@ +/*************************************************************************** +* Vector.C * +* * +* General Vector and Matrix classes, with all the associated methods. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 08/16/2000 Revamped for CIT tools. * +* arvo 10/31/1994 Combined RowVec & ColVec into Vector. * +* arvo 06/30/1993 Added singular value decomposition class. * +* arvo 06/25/1993 Major revisions. * +* arvo 09/08/1991 Initial implementation. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 2000, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#include +#include +#include "ArvoMath.h" +#include "Vector.h" +#include "form.h" + +namespace ArvoMath { + + const Vector Vector::Null(0); + + /*-------------------------------------------------------------------------* + * * + * C O N S T R U C T O R S * + * * + *-------------------------------------------------------------------------*/ + Vector::Vector( const float *x, int n ) + { + Create( n ); + for( register int i = 0; i < size; i++ ) elem[i] = x[i]; + } + + Vector::Vector( const Vector &A ) + { + Create( A.Size() ); + for( register int i = 0; i < A.Size(); i++ ) elem[i] = A(i); + } + + Vector::Vector( int n ) + { + Create( n ); + for( register int i = 0; i < n; i++ ) elem[i] = 0.0; + } + + Vector::Vector( float x, float y ) + { + Create( 2 ); + elem[0] = x; + elem[1] = y; + } + + Vector::Vector( float x, float y, float z ) + { + Create( 3 ); + elem[0] = x; + elem[1] = y; + elem[2] = z; + } + + void Vector::SetSize( int new_size ) + { + if( size != new_size ) + { + delete[] elem; + Create( new_size ); + for( register int i = 0; i < new_size; i++ ) elem[i] = 0.0; + } + } + + Vector &Vector::Swap( int i, int j ) + { + float temp = elem[i]; + elem[i] = elem[j]; + elem[j] = temp; + return *this; + } + + Vector Vector::GetBlock( int i, int j ) const + { + assert( 0 <= i && i <= j && j < size ); + int n = j - i + 1; + Vector V( n ); + register float *v = V.Array(); + register float *e = elem + i; + for( register int k = 0; k < n; k++ ) *v++ = *e++; + return V; + } + + void Vector::SetBlock( int i, int j, const Vector &V ) + { + assert( 0 <= i && i <= j && j < size ); + int n = j - i + 1; + assert( n == V.Size() ); + register float *v = V.Array(); + register float *e = elem + i; + for( register int k = 0; k < n; k++ ) *e++ = *v++; + } + + /*-------------------------------------------------------------------------* + * * + * O P E R A T O R S * + * * + *-------------------------------------------------------------------------*/ + double operator*( const Vector &A, const Vector &B ) + { + assert( A.Size() == B.Size() ); + double sum = A(0) * B(0); + for( register int i = 1; i < A.Size(); i++ ) sum += A(i) * B(i); + return sum; + } + + void Vector::operator=( float c ) + { + for( register int i = 0; i < size; i++ ) elem[i] = c; + } + + Vector operator*( const Vector &A, float s ) + { + Vector C( A.Size() ); + for( register int i = 0; i < A.Size(); i++ ) C(i) = A(i) * s; + return C; + } + + Vector operator*( float s, const Vector &A ) + { + Vector C( A.Size() ); + for( register int i = 0; i < A.Size(); i++ ) C(i) = A(i) * s; + return C; + } + + Vector operator/( const Vector &A, float s ) + { + assert( s != 0.0 ); + Vector C( A.Size() ); + for( register int i = 0; i < A.Size(); i++ ) C(i) = A(i) / s; + return C; + } + + Vector& operator+=( Vector &A, const Vector &B ) + { + assert( A.Size() == B.Size() ); + for( register int i = 0; i < A.Size(); i++ ) A(i) += B(i); + return A; + } + + Vector& operator*=( Vector &A, float scale ) + { + for( register int i = 0; i < A.Size(); i++ ) A(i) *= scale; + return A; + } + + Vector& operator/=( Vector &A, float scale ) + { + for( register int i = 0; i < A.Size(); i++ ) A(i) /= scale; + return A; + } + + Vector& Vector::operator=( const Vector &A ) + { + SetSize( A.Size() ); + for( register int i = 0; i < size; i++ ) elem[i] = A(i); + return *this; + } + + Vector operator+( const Vector &A, const Vector &B ) + { + assert( A.Size() == B.Size() ); + Vector C( A.Size() ); + for( register int i = 0; i < A.Size(); i++ ) C(i) = A(i) + B(i); + return C; + } + + Vector operator-( const Vector &A, const Vector &B ) + { + assert( A.Size() == B.Size() ); + Vector C( A.Size() ); + for( register int i = 0; i < A.Size(); i++ ) C(i) = A(i) - B(i); + return C; + } + + Vector operator-( const Vector &A ) // Unary minus. + { + Vector B( A.Size() ); + for( register int i = 0; i < A.Size(); i++ ) B(i) = -A(i); + return B; + } + + Vector operator^( const Vector &A, const Vector &B ) + { + Vector C(3); + assert( A.Size() == B.Size() ); + if( A.Size() == 2 ) // Assume z components of A and B are zero. + { + C(0) = 0.0; + C(1) = 0.0; + C(2) = A(0) * B(1) - A(1) * B(0); + } + else + { + assert( A.Size() == 3 ); + C(0) = A(1) * B(2) - A(2) * B(1); + C(1) = A(2) * B(0) - A(0) * B(2); + C(2) = A(0) * B(1) - A(1) * B(0); + } + return C; + } + + /*-------------------------------------------------------------------------* + * * + * M I S C E L L A N E O U S F U N C T I O N S * + * * + *-------------------------------------------------------------------------*/ + Vector Min( const Vector &A, const Vector &B ) + { + assert( A.Size() == B.Size() ); + Vector C( A.Size() ); + for( register int i = 0; i < A.Size(); i++ ) C(i) = Min( A(i), B(i) ); + return C; + } + + Vector Max( const Vector &A, const Vector &B ) + { + assert( A.Size() == B.Size() ); + Vector C( A.Size() ); + for( register int i = 0; i < A.Size(); i++ ) C(i) = Max( A(i), B(i) ); + return C; + } + + Vector Unit( const Vector &A ) + { + double norm = TwoNorm( A ); + assert( norm > 0.0 ); + return A * ( 1.0 / norm ); + } + + double Normalize( Vector &A ) + { + double norm = TwoNorm( A ); + assert( norm > 0.0 ); + for( register int i = 0; i < A.Size(); i++ ) A(i) /= norm; + return norm; + } + + int Null( const Vector &A ) + { + return A.Size() == 0; + } + + double TwoNormSqr( const Vector &A ) + { + double sum = A(0) * A(0); + for( register int i = 1; i < A.Size(); i++ ) sum += A(i) * A(i); + return sum; + } + + double TwoNorm( const Vector &A ) + { + return sqrt( TwoNormSqr( A ) ); + } + + double dist( const Vector &A, const Vector &B ) + { + return TwoNorm( A - B ); + } + + double OneNorm( const Vector &A ) + { + double norm = Abs( A(0) ); + for( register int i = 1; i < A.Size(); i++ ) norm += Abs( A(i) ); + return norm; + } + + double SupNorm( const Vector &A ) + { + double norm = Abs( A(0) ); + for( register int i = 1; i < A.Size(); i++ ) + { + double a = Abs( A(i) ); + if( a > norm ) norm = a; + } + return norm; + } + + Vec2 ToVec2( const Vector &V ) + { + assert( V.Size() == 2 ); + return Vec2( V(0), V(1) ); + } + + Vec3 ToVec3( const Vector &V ) + { + assert( V.Size() == 3 ); + return Vec3( V(0), V(1), V(2) ); + } + + Vector ToVector( const Vec2 &V ) + { + return Vector( V.X(), V.Y() ); + } + + Vector ToVector( const Vec3 &V ) + { + return Vector( V.X(), V.Y(), V.Z() ); + } + + // + // Returns a vector that is orthogonal to A (but of arbitrary length). + // + Vector OrthogonalTo( const Vector &A ) + { + Vector B( A.Size() ); + double c = 0.5 * SupNorm( A ); + + if( A.Size() < 2 ) + { + // Just return the zero-vector. + } + else if( c == 0.0 ) + { + B(0) = 1.0; + } + else for( register int i = 0; i < A.Size(); i++ ) + { + if( Abs( A(i)) > c ) + { + int k = ( i > 0 ) ? i - 1 : i + 1; + B(k) = -A(i); + B(i) = A(k); + break; + } + } + return B; + } + + std::ostream &operator<<( std::ostream &out, const Vector &A ) + { + if( A.Size() == 0 ) + { + out << "NULL"; + } + else for( register int i = 0; i < A.Size(); i++ ) + { + out << form( "%3d: %10.5g\n", i, A(i) ); + } + out << std::endl; + return out; + } + + +}; diff --git a/src/nvtt/bc6h/arvo/Vector.h b/src/nvtt/bc6h/arvo/Vector.h new file mode 100755 index 0000000..01e66df --- /dev/null +++ b/src/nvtt/bc6h/arvo/Vector.h @@ -0,0 +1,103 @@ +/*************************************************************************** +* Vector.h * +* * +* General Vector and Matrix classes, with all the associated methods. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 08/16/2000 Revamped for CIT tools. * +* arvo 10/31/1994 Combined RowVec & ColVec into Vector. * +* arvo 06/30/1993 Added singular value decomposition class. * +* arvo 06/25/1993 Major revisions. * +* arvo 09/08/1991 Initial implementation. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 2000, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#ifndef __VECTOR_INCLUDED__ +#define __VECTOR_INCLUDED__ + +#include +#include "Vec2.h" +#include "Vec3.h" + +namespace ArvoMath { + class Vector { + public: + Vector( int size = 0 ); + Vector( const Vector & ); + Vector( float, float ); + Vector( float, float, float ); + Vector( const float *x, int n ); + Vector &operator=( const Vector & ); + void operator=( float ); + void SetSize( int ); + Vector &Swap( int i, int j ); + Vector GetBlock( int i, int j ) const; + void SetBlock( int i, int j, const Vector & ); + static const Vector Null; + + public: // Inlined functions. + inline float operator()( int i ) const { return elem[i]; } + inline float& operator()( int i ) { return elem[i]; } + inline float* Array() const { return elem; } + inline int Size () const { return size; } + inline ~Vector() { delete[] elem; } + + private: + void Create( int n = 0 ) { size = n; elem = new float[n]; } + int size; + float* elem; + }; + + extern Vector operator + ( const Vector &, const Vector & ); + extern Vector operator - ( const Vector &, const Vector & ); // Binary minus. + extern Vector operator - ( const Vector & ); // Unary minus. + extern Vector operator * ( const Vector &, float ); + extern Vector operator * ( float , const Vector & ); + extern Vector operator / ( const Vector &, float ); + extern Vector operator / ( const Vector &, const Vector & ); + extern Vector operator ^ ( const Vector &, const Vector & ); + extern Vector& operator += ( Vector &, const Vector & ); + extern Vector& operator *= ( Vector &, float ); + extern Vector& operator /= ( Vector &, float ); + extern Vector Min ( const Vector &, const Vector & ); + extern Vector Max ( const Vector &, const Vector & ); + extern double operator * ( const Vector &, const Vector & ); // Inner product. + extern double dist ( const Vector &, const Vector & ); + extern Vector OrthogonalTo( const Vector & ); // Returns some orthogonal vector. + extern Vector Unit ( const Vector & ); + extern double Normalize ( Vector & ); + extern double OneNorm ( const Vector & ); + extern double TwoNorm ( const Vector & ); + extern double TwoNormSqr ( const Vector & ); + extern double SupNorm ( const Vector & ); + extern int Null ( const Vector & ); + extern Vec2 ToVec2 ( const Vector & ); + extern Vec3 ToVec3 ( const Vector & ); + extern Vector ToVector ( const Vec2 & ); + extern Vector ToVector ( const Vec3 & ); + + std::ostream &operator<<( + std::ostream &out, + const Vector & + ); +}; +#endif + + + + + + diff --git a/src/nvtt/bc6h/arvo/form.h b/src/nvtt/bc6h/arvo/form.h new file mode 100755 index 0000000..48aef94 --- /dev/null +++ b/src/nvtt/bc6h/arvo/form.h @@ -0,0 +1,26 @@ +#ifndef __FORM_INCLUDED__ +#define __FORM_INCLUDED__ + +#include +#include +#include +#include + +namespace ArvoMath { + + inline const char *form(char *fmt, ...) + { + static char printbfr[65536]; + va_list arglist; + + va_start(arglist,fmt); + int length = vsprintf(printbfr,fmt,arglist); + va_end(arglist); + + assert(length > 65536); + + return printbfr; + } +}; + +#endif diff --git a/src/nvtt/bc6h/bits.h b/src/nvtt/bc6h/bits.h new file mode 100755 index 0000000..bf177ca --- /dev/null +++ b/src/nvtt/bc6h/bits.h @@ -0,0 +1,73 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +#ifndef _BITS_H +#define _BITS_H + +// read/write a bitstream + +#include + +class Bits +{ +public: + + Bits(char *data, int maxdatabits) { assert (data && maxdatabits > 0); bptr = bend = 0; bits = data; maxbits = maxdatabits; readonly = 0;} + Bits(const char *data, int availdatabits) { assert (data && availdatabits > 0); bptr = 0; bend = availdatabits; cbits = data; maxbits = availdatabits; readonly = 1;} + + void write(int value, int nbits) { + assert (nbits >= 0 && nbits < 32); + assert (sizeof(int)>= 4); + for (int i=0; i>i); + } + int read(int nbits) { + assert (nbits >= 0 && nbits < 32); + assert (sizeof(int)>= 4); + int out = 0; + for (int i=0; i= 0 && ptr < maxbits); bptr = ptr; } + int getsize() { return bend; } + +private: + int bptr; // next bit to read + int bend; // last written bit + 1 + char *bits; // ptr to user bit stream + const char *cbits; // ptr to const user bit stream + int maxbits; // max size of user bit stream + char readonly; // 1 if this is a read-only stream + + int readone() { + assert (bptr < bend); + if (bptr >= bend) return 0; + int bit = (readonly ? cbits[bptr>>3] : bits[bptr>>3]) & (1 << (bptr & 7)); + ++bptr; + return bit != 0; + } + void writeone(int bit) { + if (readonly) + throw "Writing a read-only bit stream"; + assert (bptr < maxbits); + if (bptr >= maxbits) return; + if (bit&1) + bits[bptr>>3] |= 1 << (bptr & 7); + else + bits[bptr>>3] &= ~(1 << (bptr & 7)); + if (bptr++ >= bend) bend = bptr; + } +}; + +#endif \ No newline at end of file diff --git a/src/nvtt/bc6h/exr.cpp b/src/nvtt/bc6h/exr.cpp new file mode 100755 index 0000000..9321734 --- /dev/null +++ b/src/nvtt/bc6h/exr.cpp @@ -0,0 +1,51 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +// Simple .exr file reader/writer + +#include + +#include +#include + +#include "exr.h" + +using namespace std; +using namespace Imf; +using namespace Imath; + +void Exr::fileinfo(const string inf, int &width, int &height) +{ + RgbaInputFile file (inf.c_str()); + Box2i dw = file.dataWindow(); + + width = dw.max.x - dw.min.x + 1; + height = dw.max.y - dw.min.y + 1; +} + +void Exr::readRgba(const string inf, Array2D &pix, int &w, int &h) +{ + RgbaInputFile file (inf.c_str()); + Box2i dw = file.dataWindow(); + w = dw.max.x - dw.min.x + 1; + h = dw.max.y - dw.min.y + 1; + pix.resizeErase (h, w); + file.setFrameBuffer (&pix[0][0] - dw.min.x - dw.min.y * w, 1, w); + file.readPixels (dw.min.y, dw.max.y); +} + +void Exr::writeRgba(const string outf, const Array2D &pix, int w, int h) +{ + RgbaOutputFile file (outf.c_str(), w, h, WRITE_RGBA); + file.setFrameBuffer (&pix[0][0], 1, w); + file.writePixels (h); +} \ No newline at end of file diff --git a/src/nvtt/bc6h/exr.h b/src/nvtt/bc6h/exr.h new file mode 100755 index 0000000..b2568ee --- /dev/null +++ b/src/nvtt/bc6h/exr.h @@ -0,0 +1,37 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +#ifndef _EXR_H +#define _EXR_H + +// exr-friendly routines + +#include + +#include "ImfArray.h" +#include "ImfRgba.h" + +using namespace std; +using namespace Imf; + +class Exr +{ +public: + Exr() {}; + ~Exr() {}; + + static void fileinfo(const string inf, int &width, int &height); + static void readRgba(const string inf, Array2D &pix, int &w, int &h); + static void writeRgba(const string outf, const Array2D &pix, int w, int h); +}; + +#endif \ No newline at end of file diff --git a/src/nvtt/bc6h/shapes_two.h b/src/nvtt/bc6h/shapes_two.h new file mode 100755 index 0000000..d9a52ef --- /dev/null +++ b/src/nvtt/bc6h/shapes_two.h @@ -0,0 +1,133 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +#ifndef _SHAPES_TWO_H +#define _SHAPES_TWO_H + +// shapes for two regions + +#define NREGIONS 2 +#define NSHAPES 64 +#define SHAPEBITS 6 + +static int shapes[NSHAPES*16] = +{ +0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, +0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, +0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, +0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, + +0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, +0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, +0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, +0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, + +0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, +0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, +0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, + +0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + +0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, +1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, +1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, +1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, + +0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, +0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, +0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, +0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, + +0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, +0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, +0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, +0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, + +0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, +0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, +1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, +1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, + +0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, +0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, +0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, +0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, + +0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, +1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, +0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, +1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, + +0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, +0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, +1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, +1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, + +0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, +1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, +1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, +0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, + +0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, +1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, +0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, + +0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, +1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, +1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, +0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, + +0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, +1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, +1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, +1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, + +0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, +1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, +0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, +0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, + +}; + +#define REGION(x,y,si) shapes[((si)&3)*4+((si)>>2)*64+(x)+(y)*16] + +static int shapeindex_to_compressed_indices[NSHAPES*2] = +{ + 0,15, 0,15, 0,15, 0,15, + 0,15, 0,15, 0,15, 0,15, + 0,15, 0,15, 0,15, 0,15, + 0,15, 0,15, 0,15, 0,15, + + 0,15, 0, 2, 0, 8, 0, 2, + 0, 2, 0, 8, 0, 8, 0,15, + 0, 2, 0, 8, 0, 2, 0, 2, + 0, 8, 0, 8, 0, 2, 0, 2, + + 0,15, 0,15, 0, 6, 0, 8, + 0, 2, 0, 8, 0,15, 0,15, + 0, 2, 0, 8, 0, 2, 0, 2, + 0, 2, 0,15, 0,15, 0, 6, + + 0, 6, 0, 2, 0, 6, 0, 8, + 0,15, 0,15, 0, 2, 0, 2, + 0,15, 0,15, 0,15, 0,15, + 0,15, 0, 2, 0, 2, 0,15 + +}; +#define SHAPEINDEX_TO_COMPRESSED_INDICES(si,region) shapeindex_to_compressed_indices[(si)*2+(region)] + +#endif diff --git a/src/nvtt/bc6h/tile.h b/src/nvtt/bc6h/tile.h new file mode 100755 index 0000000..f3bd2d6 --- /dev/null +++ b/src/nvtt/bc6h/tile.h @@ -0,0 +1,115 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +#ifndef _TILE_H +#define _TILE_H + +#include +#include +#include +#include +#include "arvo/Vec3.h" + +#include "utils.h" + +#define DBL_MAX (1.0e37) // doesn't have to be really dblmax, just bigger than any possible squared error + +using namespace Imf; +using namespace ArvoMath; + +//#define USE_IMPORTANCE_MAP 1 // define this if you want to increase importance of some pixels in tile +class Tile +{ +private: + // NOTE: this returns the appropriately-clamped BIT PATTERN of the half as an INTEGRAL float value + static float half2float(half h) + { + return (float) Utils::ushort_to_format(h.bits()); + } + // NOTE: this is the inverse of the above operation + static half float2half(float f) + { + half h; + h.setBits(Utils::format_to_ushort((int)f)); + return h; + } + // look for adjacent pixels that are identical. if there are enough of them, increase their importance + void generate_importance_map() + { + // initialize + for (int y=0; y= size_x || yn < 0 || yn >= size_y) + return false; + return( (data[y][x].X() == data[yn][xn].X()) && + (data[y][x].Y() == data[yn][xn].Y()) && + (data[y][x].Z() == data[yn][xn].Z()) ); + } +#ifdef USE_IMPORTANCE_MAP + bool match_4_neighbor(int x, int y) + { + return is_equal(x,y,x-1,y) || is_equal(x,y,x+1,y) || is_equal(x,y,x,y-1) || is_equal(x,y,x,y+1); + } +#else + bool match_4_neighbor(int x, int y) + { + return false; + } +#endif + +public: + Tile() {}; + ~Tile(){}; + Tile(int xs, int ys) {size_x = xs; size_y = ys;} + + static const int TILE_H = 4; + static const int TILE_W = 4; + static const int TILE_TOTAL = TILE_H * TILE_W; + Vec3 data[TILE_H][TILE_W]; + float importance_map[TILE_H][TILE_W]; + int size_x, size_y; // actual size of tile + + // pixels -> tile + void inline insert(const Array2D &pixels, int x, int y) + { + for (int y0=0; y0 pixels + void inline extract(Array2D &pixels, int x, int y) + { + for (int y0=0; y0 +#include +#include + +static int denom7_weights_64[] = {0, 9, 18, 27, 37, 46, 55, 64}; // divided by 64 +static int denom15_weights_64[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64}; // divided by 64 + +int Utils::lerp(int a, int b, int i, int denom) +{ + assert (denom == 3 || denom == 7 || denom == 15); + assert (i >= 0 && i <= denom); + + int round = 32, shift = 6, *weights; + + switch(denom) + { + case 3: denom *= 5; i *= 5; // fall through to case 15 + case 15: weights = denom15_weights_64; break; + case 7: weights = denom7_weights_64; break; + default: assert(0); + } + + return (a*weights[denom-i] +b*weights[i] + round) >> shift; +} + +Vec3 Utils::lerp(const Vec3& a, const Vec3 &b, int i, int denom) +{ + assert (denom == 3 || denom == 7 || denom == 15); + assert (i >= 0 && i <= denom); + + int shift = 6, *weights; + + switch(denom) + { + case 3: denom *= 5; i *= 5; // fall through to case 15 + case 15: weights = denom15_weights_64; break; + case 7: weights = denom7_weights_64; break; + default: assert(0); + } + + // no need to round these as this is an exact division + return (a*weights[denom-i] +b*weights[i]) / float(1 << shift); +} + + +/* + For unsigned f16, clamp the input to [0,F16MAX]. Thus u15. + For signed f16, clamp the input to [-F16MAX,F16MAX]. Thus s16. + + The conversions proceed as follows: + + unsigned f16: get bits. if high bit set, clamp to 0, else clamp to F16MAX. + signed f16: get bits. extract exp+mantissa and clamp to F16MAX. return -value if sign bit was set, else value + unsigned int: get bits. return as a positive value. + signed int. get bits. return as a value in -32768..32767. + + The inverse conversions are just the inverse of the above. +*/ + +// clamp the 3 channels of the input vector to the allowable range based on FORMAT +// note that each channel is a float storing the allowable range as a bit pattern converted to float +// that is, for unsigned f16 say, we would clamp each channel to the range [0, F16MAX] + +void Utils::clamp(Vec3 &v) +{ + for (int i=0; i<3; ++i) + { + switch(Utils::FORMAT) + { + case UNSIGNED_F16: + if (v[i] < 0.0) v[i] = 0; + else if (v[i] > F16MAX) v[i] = F16MAX; + break; + + case SIGNED_F16: + if (v[i] < -F16MAX) v[i] = -F16MAX; + else if (v[i] > F16MAX) v[i] = F16MAX; + break; + + default: + assert (0); + } + } +} + +// convert a u16 value to s17 (represented as an int) based on the format expected +int Utils::ushort_to_format(unsigned short input) +{ + int out, s; + + // clamp to the valid range we are expecting + switch (Utils::FORMAT) + { + case UNSIGNED_F16: + if (input & F16S_MASK) out = 0; + else if (input > F16MAX) out = F16MAX; + else out = input; + break; + + case SIGNED_F16: + s = input & F16S_MASK; + input &= F16EM_MASK; + if (input > F16MAX) out = F16MAX; + else out = input; + out = s ? -out : out; + break; + } + return out; +} + +// convert a s17 value to u16 based on the format expected +unsigned short Utils::format_to_ushort(int input) +{ + unsigned short out; + + // clamp to the valid range we are expecting + switch (Utils::FORMAT) + { + case UNSIGNED_F16: + assert (input >= 0 && input <= F16MAX); + out = input; + break; + + case SIGNED_F16: + assert (input >= -F16MAX && input <= F16MAX); + // convert to sign-magnitude + int s; + if (input < 0) { s = F16S_MASK; input = -input; } + else { s = 0; } + out = s | input; + break; + } + return out; +} + +// quantize the input range into equal-sized bins +int Utils::quantize(float value, int prec) +{ + int q, ivalue, s; + + assert (prec > 1); // didn't bother to make it work for 1 + + value = (float)floor(value + 0.5); + + int bias = (prec > 10) ? ((1<<(prec-1))-1) : 0; // bias precisions 11..16 to get a more accurate quantization + + switch (Utils::FORMAT) + { + case UNSIGNED_F16: + assert (value >= 0 && value <= F16MAX); + ivalue = (int)value; + q = ((ivalue << prec) + bias) / (F16MAX+1); + assert (q >= 0 && q < (1 << prec)); + break; + + case SIGNED_F16: + assert (value >= -F16MAX && value <= F16MAX); + // convert to sign-magnitude + ivalue = (int)value; + if (ivalue < 0) { s = 1; ivalue = -ivalue; } else s = 0; + + q = ((ivalue << (prec-1)) + bias) / (F16MAX+1); + if (s) + q = -q; + assert (q > -(1 << (prec-1)) && q < (1 << (prec-1))); + break; + } + + return q; +} + +int Utils::finish_unquantize(int q, int prec) +{ + if (Utils::FORMAT == UNSIGNED_F16) + return (q * 31) >> 6; // scale the magnitude by 31/64 + else if (Utils::FORMAT == SIGNED_F16) + return (q < 0) ? -(((-q) * 31) >> 5) : (q * 31) >> 5; // scale the magnitude by 31/32 + else + return q; +} + +// unquantize each bin to midpoint of original bin range, except +// for the end bins which we push to an endpoint of the bin range. +// we do this to ensure we can represent all possible original values. +// the asymmetric end bins do not affect PSNR for the test images. +// +// code this function assuming an arbitrary bit pattern as the encoded block +int Utils::unquantize(int q, int prec) +{ + int unq, s; + + assert (prec > 1); // not implemented for prec 1 + + switch (Utils::FORMAT) + { + // modify this case to move the multiplication by 31 after interpolation. + // Need to use finish_unquantize. + + // since we have 16 bits available, let's unquantize this to 16 bits unsigned + // thus the scale factor is [0-7c00)/[0-10000) = 31/64 + case UNSIGNED_F16: + if (prec >= 15) + unq = q; + else if (q == 0) + unq = 0; + else if (q == ((1<> prec; + break; + + // here, let's stick with S16 (no apparent quality benefit from going to S17) + // range is (-7c00..7c00)/(-8000..8000) = 31/32 + case SIGNED_F16: + // don't remove this test even though it appears equivalent to the code below + // as it isn't -- the code below can overflow for prec = 16 + if (prec >= 16) + unq = q; + else + { + if (q < 0) { s = 1; q = -q; } else s = 0; + + if (q == 0) + unq = 0; + else if (q >= ((1<<(prec-1))-1)) + unq = s ? -S16MAX : S16MAX; + else + { + unq = (q * (S16MAX+1) + (S16MAX+1)/2) >> (prec-1); + if (s) + unq = -unq; + } + } + break; + } + return unq; +} + +static int clamp(double r, double low, double high) +{ + if (r < low) return low; + else if (r > high) return high; + else return r; +} + +// match the tonemapping function used by exrdisplay +static void tonemap(const Vec3 &in, double exposure, Vec3 &out) +{ + double r,g,b; + half h; + + // convert from bit pattern back to half and then to double + h.setBits(in.X()); r = h; + h.setBits(in.Y()); g = h; + h.setBits(in.Z()); b = h; + + // 1) Compensate for fogging by subtracting defog + // from the raw pixel values. + // Response: We work with defog of 0.0, so this is a no-op + + // 2) Multiply the defogged pixel values by + // 2^(exposure + 2.47393). + double exposure_scale = pow(2.0, exposure + 2.47393); + r *= exposure_scale; + g *= exposure_scale; + b *= exposure_scale; + + // 3) Values, which are now 1.0, are called "middle gray". + // If defog and exposure are both set to 0.0, then + // middle gray corresponds to a raw pixel value of 0.18. + // In step 6, middle gray values will be mapped to an + // intensity 3.5 f-stops below the display's maximum + // intensity. + // Response: no apparent content. + + // 4) Apply a knee function. The knee function has two + // parameters, kneeLow and kneeHigh. Pixel values + // below 2^kneeLow are not changed by the knee + // function. Pixel values above kneeLow are lowered + // according to a logarithmic curve, such that the + // value 2^kneeHigh is mapped to 2^3.5 (in step 6, + // this value will be mapped to the the display's + // maximum intensity). + // Response: kneeLow = 0.0 (2^0.0 => 1); kneeHigh = 5.0 (2^5 =>32) + if (r > 1.0) + r = 1.0 + log ((r-1.0) * 0.184874 + 1) / 0.184874; + if (g > 1.0) + g = 1.0 + log ((g-1.0) * 0.184874 + 1) / 0.184874; + if (b > 1.0) + b = 1.0 + log ((b-1.0) * 0.184874 + 1) / 0.184874; +// +// 5) Gamma-correct the pixel values, assuming that the +// screen's gamma is 0.4545 (or 1/2.2). + r = pow (r, 0.4545); + g = pow (g, 0.4545); + b = pow (b, 0.4545); + +// 6) Scale the values such that pixels middle gray +// pixels are mapped to 84.66 (or 3.5 f-stops below +// the display's maximum intensity). +// +// 7) Clamp the values to [0, 255]. + r *= 84.66f; + g *= 84.66f; + b *= 84.66f; + + out.X() = clamp (r, 0, 255); + out.Y() = clamp (g, 0, 255); + out.Z() = clamp (b, 0, 255); +} + +static void mpsnrmap(const Vec3 &in, int exposure, Vec3 &out) +{ + double r,g,b; + half h; + + // convert from bit pattern back to half and then to double + h.setBits(in.X()); r = h; + h.setBits(in.Y()); g = h; + h.setBits(in.Z()); b = h; + + assert (exposure > -32 && exposure < 32); + if (exposure > 0) + { + r *= 1 << exposure; + g *= 1 << exposure; + b *= 1 << exposure; + } + else if (exposure < 0) + { + exposure = -exposure; + r /= 1 << exposure; + g /= 1 << exposure; + b /= 1 << exposure; + } + r = 255 * pow (r, 0.4545); + g = 255 * pow (g, 0.4545); + b = 255 * pow (b, 0.4545); + + out.X() = clamp (r, 0, 255); + out.Y() = clamp (g, 0, 255); + out.Z() = clamp (b, 0, 255); +} + +// pick a norm! +#define NORM_EUCLIDEAN 1 + +double Utils::norm(const Vec3 &a, const Vec3 &b) +{ +#ifdef NORM_EUCLIDEAN + Vec3 err = a - b; + return err * err; +#endif +#ifdef NORM_ABS + Vec3 err = a - b; + return fabs(err.X()) + fabs(err.Y()) + fabs(err.Z()); +#endif +#ifdef NORM_EUCLIDEAN_EXPOSURE_UNWEIGHED + double toterr = 0; + Vec3 mapa, mapb, err; + for (int i=-6; i <= 6; i += 3) // figure how many exposure samples needed. I'd argue if you take too many it's same as euclidean + { + tonemap(a, i, mapa); + tonemap(b, i, mapb); + err = mapa - mapb; + toterr += err * err; + } + return toterr; +#endif +#ifdef NORM_EUCLIDEAN_EXPOSURE_WEIGHED + double toterr = 0; + Vec3 mapa, mapb, err; + double rwt = 0.299; + double gwt = 0.587; + double bwt = 0.114; + for (int i=-6; i <= 6; i += 3) // figure how many exposure samples needed. I'd argue if you take too many it's same as euclidean + { + tonemap(a, i, mapa); + tonemap(b, i, mapb); + mapa.X() *= rwt; mapa.Y() *= gwt; mapa.Z() *= bwt; + mapb.X() *= rwt; mapb.Y() *= gwt; mapb.Z() *= bwt; + err = mapa - mapb; + toterr += err * err; + } + return toterr; +#endif +} + +double Utils::mpsnr_norm(const Vec3 &a, int exposure, const Vec3 &b) +{ + double toterr = 0; + Vec3 mapa, mapb, err; + + mpsnrmap(a, exposure, mapa); + mpsnrmap(b, exposure, mapb); + + err = mapa - mapb; + toterr += err * err; + + return toterr; +} + +// parse [{:}]{,} +// the pointer starts here ^ +// name is 1 or 2 chars and matches field names. start and end are decimal numbers +void Utils::parse(char *encoding, int &ptr, Field &field, int &endbit, int &len) +{ + if (ptr <= 0) return; + --ptr; + if (encoding[ptr] == ',') --ptr; + assert (encoding[ptr] == ']'); + --ptr; + endbit = 0; + int scale = 1; + while (encoding[ptr] != ':' && encoding[ptr] != '[') + { + assert(encoding[ptr] >= '0' && encoding[ptr] <= '9'); + endbit += (encoding[ptr--] - '0') * scale; + scale *= 10; + } + int startbit = 0; scale = 1; + if (encoding[ptr] == '[') + startbit = endbit; + else + { + ptr--; + while (encoding[ptr] != '[') + { + assert(encoding[ptr] >= '0' && encoding[ptr] <= '9'); + startbit += (encoding[ptr--] - '0') * scale; + scale *= 10; + } + } + len = startbit - endbit + 1; // startbit>=endbit note + --ptr; + if (encoding[ptr] == 'm') field = FIELD_M; + else if (encoding[ptr] == 'd') field = FIELD_D; + else { + // it's wxyz + assert (encoding[ptr] >= 'w' && encoding[ptr] <= 'z'); + int foo = encoding[ptr--] - 'w'; + // now it is r g or b + if (encoding[ptr] == 'r') foo += 10; + else if (encoding[ptr] == 'g') foo += 20; + else if (encoding[ptr] == 'b') foo += 30; + else assert(0); + field = (Field) foo; + } +} + + diff --git a/src/nvtt/bc6h/utils.h b/src/nvtt/bc6h/utils.h new file mode 100755 index 0000000..308f3e6 --- /dev/null +++ b/src/nvtt/bc6h/utils.h @@ -0,0 +1,79 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +// utility class holding common routines +#ifndef _UTILS_H +#define _UTILS_H + +#include "arvo/Vec3.h" + +using namespace ArvoMath; + +#ifndef MIN +#define MIN(x,y) ((x)<(y)?(x):(y)) +#endif + +#ifndef MAX +#define MAX(x,y) ((x)>(y)?(x):(y)) +#endif + +#define PALETTE_LERP(a, b, i, denom) Utils::lerp(a, b, i, denom) + +#define SIGN_EXTEND(x,nb) ((((signed(x))&(1<<((nb)-1)))?((~0)<<(nb)):0)|(signed(x))) + +enum Field { FIELD_M = 1, // mode + FIELD_D = 2, // distribution/shape + FIELD_RW = 10+0, FIELD_RX = 10+1, FIELD_RY = 10+2, FIELD_RZ = 10+3, // red channel endpoints or deltas + FIELD_GW = 20+0, FIELD_GX = 20+1, FIELD_GY = 20+2, FIELD_GZ = 20+3, // green channel endpoints or deltas + FIELD_BW = 30+0, FIELD_BX = 30+1, FIELD_BY = 30+2, FIELD_BZ = 30+3, // blue channel endpoints or deltas +}; + +// some constants +#define F16S_MASK 0x8000 // f16 sign mask +#define F16EM_MASK 0x7fff // f16 exp & mantissa mask +#define U16MAX 0xffff +#define S16MIN (-0x8000) +#define S16MAX 0x7fff +#define INT16_MASK 0xffff +#define F16MAX (0x7bff) // MAXFLT bit pattern for halfs + +enum Format { UNSIGNED_F16, SIGNED_F16 }; + +class Utils +{ +public: + static Format FORMAT; // this is a global -- we're either handling unsigned or unsigned half values + + // error metrics + static double norm(const Vec3 &a, const Vec3 &b); + static double mpsnr_norm(const Vec3 &a, int exposure, const Vec3 &b); + + // conversion & clamp + static int ushort_to_format(unsigned short input); + static unsigned short format_to_ushort(int input); + + // clamp to format + static void Utils::clamp(Vec3 &v); + + // quantization and unquantization + static int finish_unquantize(int q, int prec); + static int unquantize(int q, int prec); + static int quantize(float value, int prec); + + static void parse(char *encoding, int &ptr, Field &field, int &endbit, int &len); + + // lerping + static int lerp(int a, int b, int i, int denom); + static Vec3 lerp(const Vec3& a, const Vec3 &b, int i, int denom); +}; + +#endif \ No newline at end of file diff --git a/src/nvtt/bc6h/zoh.cpp b/src/nvtt/bc6h/zoh.cpp new file mode 100755 index 0000000..224ee74 --- /dev/null +++ b/src/nvtt/bc6h/zoh.cpp @@ -0,0 +1,205 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +// the zoh compressor and decompressor + +#include +#include +#include +#include + +#include "ImfArray.h" +#include "ImfRgba.h" + +#include "tile.h" +#include "zoh.h" +#include "exr.h" + +#ifndef MIN +#define MIN(x,y) ((x)<(y)?(x):(y)) +#endif + +using namespace std; + +bool ZOH::isone(const char *block) +{ + char code = block[0] & 0x1F; + + return (code == 0x03 || code == 0x07 || code == 0x0b || code == 0x0f); +} + +void ZOH::compress(const Tile &t, char *block) +{ + char oneblock[ZOH::BLOCKSIZE], twoblock[ZOH::BLOCKSIZE]; + + double mseone = ZOH::compressone(t, oneblock); + double msetwo = ZOH::compresstwo(t, twoblock); + + if (mseone <= msetwo) + memcpy(block, oneblock, ZOH::BLOCKSIZE); + else + memcpy(block, twoblock, ZOH::BLOCKSIZE); +} + +void ZOH::decompress(const char *block, Tile &t) +{ + if (ZOH::isone(block)) + ZOH::decompressone(block, t); + else + ZOH::decompresstwo(block, t); +} + +void ZOH::compress(string inf, string zohf) +{ + Array2D pixels; + int w, h; + char block[ZOH::BLOCKSIZE]; + + Exr::readRgba(inf, pixels, w, h); + FILE *zohfile = fopen(zohf.c_str(), "wb"); + if (zohfile == NULL) throw "Unable to open .zoh file for write"; + + // stuff for progress bar O.o + int ntiles = ((h+Tile::TILE_H-1)/Tile::TILE_H)*((w+Tile::TILE_W-1)/Tile::TILE_W); + int tilecnt = 0; + int ndots = 25; + int dotcnt = 0; + printf("Progress ["); + for (int i=0; i (ntiles * dotcnt)/ndots) { printf("."); fflush(stdout); ++dotcnt; } + } + } + + printf("]\n"); // advance to next line finally + + if (fclose(zohfile)) throw "Close failed on .zoh file"; +} + +static int str2int(std::string s) +{ + int thing; + std::stringstream str (stringstream::in | stringstream::out); + str << s; + str >> thing; + return thing; +} + +// zoh file name is ...-w-h.zoh, extract width and height +static void extract(string zohf, int &w, int &h) +{ + size_t n = zohf.rfind('.', zohf.length()-1); + size_t n1 = zohf.rfind('-', n-1); + size_t n2 = zohf.rfind('-', n1-1); + string width = zohf.substr(n2+1, n1-n2-1); + w = str2int(width); + string height = zohf.substr(n1+1, n-n1-1); + h = str2int(height); +} + +static int mode_to_prec[] = { + 10,7,11,10, + 10,7,11,11, + 10,7,11,12, + 10,7,9,16, + 10,7,8,-1, + 10,7,8,-1, + 10,7,8,-1, + 10,7,6,-1, +}; + +static int shapeindexhist[32], modehist[32], prechistone[16], prechisttwo[16], oneregion, tworegions; + +static void stats(char block[ZOH::BLOCKSIZE]) +{ + char mode = block[0] & 0x1F; if ((mode & 0x3) == 0) mode = 0; if ((mode & 0x3) == 1) mode = 1; modehist[mode]++; + int prec = mode_to_prec[mode]; + assert (prec != -1); + if (!ZOH::isone(block)) + { + tworegions++; + prechisttwo[prec]++; + int shapeindex = ((block[0] & 0xe0) >> 5) | ((block[1] & 0x3) << 3); + shapeindexhist[shapeindex]++; + } + else + { + oneregion++; + prechistone[prec]++; + } +} + +static void printstats() +{ + printf("\nPrecision histogram 10b to 16b one region: "); for (int i=10; i<=16; ++i) printf("%d,", prechistone[i]); + printf("\nPrecision histogram 6b to 11b two regions: "); for (int i=6; i<=11; ++i) printf("%d,", prechisttwo[i]); + printf("\nMode histogram: "); for (int i=0; i<32; ++i) printf("%d,", modehist[i]); + printf("\nShape index histogram: "); for (int i=0; i<32; ++i) printf("%d,", shapeindexhist[i]); + printf("\nOne region %5.2f%% Two regions %5.2f%%", 100.0*oneregion/float(oneregion+tworegions), 100.0*tworegions/float(oneregion+tworegions)); + printf("\n"); +} + +void ZOH::decompress(string zohf, string outf) +{ + Array2D pixels; + int w, h; + char block[ZOH::BLOCKSIZE]; + + extract(zohf, w, h); + FILE *zohfile = fopen(zohf.c_str(), "rb"); + if (zohfile == NULL) throw "Unable to open .zoh file for read"; + pixels.resizeErase(h, w); + + // convert to tiles and decompress each tile + for (int y=0; y + +#include "tile.h" + +using namespace std; + +// UNUSED ZOH MODES are 0x13, 0x17, 0x1b, 0x1f + +#define EXTERNAL_RELEASE 1 // define this if we're releasing this code externally + +#define NREGIONS_TWO 2 +#define NREGIONS_ONE 1 +#define NCHANNELS 3 + +// Note: this code only reads OpenEXR files, which are only in F16 format. +// if unsigned is selected, the input is clamped to >= 0. +// if f16 is selected, the range is clamped to 0..0x7bff. + +struct FltEndpts +{ + Vec3 A; + Vec3 B; +}; + +struct IntEndpts +{ + int A[NCHANNELS]; + int B[NCHANNELS]; +}; + +struct ComprEndpts +{ + unsigned int A[NCHANNELS]; + unsigned int B[NCHANNELS]; +}; + +class ZOH +{ +public: + static const int BLOCKSIZE=16; + static const int BITSIZE=128; + static Format FORMAT; + + static void compress(string inf, string zohf); + static void decompress(string zohf, string outf); + static void compress(const Tile &t, char *block); + static void decompress(const char *block, Tile &t); + + static double compressone(const Tile &t, char *block); + static double compresstwo(const Tile &t, char *block); + static void decompressone(const char *block, Tile &t); + static void decompresstwo(const char *block, Tile &t); + + static double refinetwo(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_TWO], char *block); + static double roughtwo(const Tile &tile, int shape, FltEndpts endpts[NREGIONS_TWO]); + + static double refineone(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_ONE], char *block); + static double roughone(const Tile &tile, int shape, FltEndpts endpts[NREGIONS_ONE]); + + static bool isone(const char *block); +}; + +#endif \ No newline at end of file diff --git a/src/nvtt/bc6h/zoh.sln b/src/nvtt/bc6h/zoh.sln new file mode 100755 index 0000000..74b9fc2 --- /dev/null +++ b/src/nvtt/bc6h/zoh.sln @@ -0,0 +1,21 @@ +Microsoft Visual Studio Solution File, Format Version 8.00 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "zoh", "zoh.vcproj", "{C6F6CD96-D0C0-4A35-B1BC-53E0A3CB712F}" + ProjectSection(ProjectDependencies) = postProject + EndProjectSection +EndProject +Global + GlobalSection(SolutionConfiguration) = preSolution + Debug = Debug + Release = Release + EndGlobalSection + GlobalSection(ProjectConfiguration) = postSolution + {C6F6CD96-D0C0-4A35-B1BC-53E0A3CB712F}.Debug.ActiveCfg = Debug|Win32 + {C6F6CD96-D0C0-4A35-B1BC-53E0A3CB712F}.Debug.Build.0 = Debug|Win32 + {C6F6CD96-D0C0-4A35-B1BC-53E0A3CB712F}.Release.ActiveCfg = Release|Win32 + {C6F6CD96-D0C0-4A35-B1BC-53E0A3CB712F}.Release.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + EndGlobalSection + GlobalSection(ExtensibilityAddIns) = postSolution + EndGlobalSection +EndGlobal diff --git a/src/nvtt/bc6h/zoh.vcproj b/src/nvtt/bc6h/zoh.vcproj new file mode 100755 index 0000000..31a7f30 --- /dev/null +++ b/src/nvtt/bc6h/zoh.vcproj @@ -0,0 +1,281 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/nvtt/bc6h/zohc.cpp b/src/nvtt/bc6h/zohc.cpp new file mode 100755 index 0000000..79cbb30 --- /dev/null +++ b/src/nvtt/bc6h/zohc.cpp @@ -0,0 +1,301 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +// WORK: the widecolorgamut.exr image is the poster child for not doing independent compression of each 4x4 tile. (The image is available from www.openexr.org.) +// At the lower-left vertex, the companded image shows a visible artifact due to the vertex being compressed with 6 bit endpoint accuracy +// but the constant tile right next to it being compressed with 16 bit endpoint accuracy. It's an open problem to figure out how to deal with that in the best possible way. +// +// WORK: we removed 4 codes since we couldn't come up with anything to use them for that showed a worthwhile improvement in PSNR. Clearly the compression format can be improved since +// we're only using 7/8 of the available code space. But how? +// +// NOTE: HDR compression formats that compress luminance and chrominance separatey and multiply them together to get the final decompressed channels tend to do poorly at extreme values. + +#include +#include +#include +#include + +#include +#include "exr.h" +#include "zoh.h" +#include "utils.h" + +using namespace std; + +static int mpsnr_low = -10, mpsnr_high = 10; + +static void dump(char *tag, Array2D &in1, int x, int y) +{ + printf("\n%s\n", tag); + for (int y0=0; y0<4; ++y0) + { + for (int x0=0; x0<4; ++x0) + printf("%6d%6d%6d ", Utils::ushort_to_format((in1[y+y0][x+x0].r).bits()), Utils::ushort_to_format((in1[y+y0][x+x0].g).bits()), Utils::ushort_to_format((in1[y+y0][x+x0].b).bits())); + printf("\n"); + } +} + +static void analyze(string in1, string in2) +{ + Array2D pin1, pin2; + int w1, h1, w2, h2; + + Exr::readRgba(in1, pin1, w1, h1); + Exr::readRgba(in2, pin2, w2, h2); + + // choose the smaller of the two dimensions (since the old compressor would truncate to multiple-of-4 sizes) + int w = MIN(w1, w2); + int h = MIN(h1, h2); + + double nsamples = 0; + double mabse = 0, mse = 0, mpsnre = 0; + int errdist[17]; + int errs[3*16]; + + for (int i=0; i<17; ++i) + errdist[i] = 0; + + int psnrhist[100]; + for (int i=0; i<100; ++i) + psnrhist[i] = 0; + bool first = true; + + for (int y = 0; y < h; y+=4) + for (int x = 0; x < w; x+=4) + { + int xw = MIN(w-x, 4); + int yw = MIN(h-y, 4); + int np = 0; + + Vec3 a, b; + + for (int y0=0; y0 0 ? err : -err; + mabse += (double)abse; + mse += (double)abse * abse; + msetile += (double)abse * abse; + + int lsb; + + for (lsb=0; abse>0; ++lsb, abse >>= 1) + ; + + errdist[lsb]++; + } + + double psnrtile, rmsetile; + + rmsetile = sqrt(msetile / double(np)); + psnrtile = (rmsetile == 0) ? 99.0 : 20.0 * log10(32767.0/rmsetile); + + int psnrquant = (int) floor (psnrtile); // 10 means [10,11) psnrs, e.g. + // clamp just in case + psnrquant = (psnrquant < 0) ? 0 : (psnrquant > 99) ? 99 : psnrquant; + psnrhist[psnrquant]++; + if (first && psnrquant < 20) + { + first = false; + printf("Tiles with PSNR's worse than 20dB\n"); + } + if (psnrquant < 20) + printf("X %4d Y %4d PSNR %7.2f\n", x, y, psnrtile); + } + + nsamples = w * h * 3; + + mabse /= nsamples; + mse /= nsamples; + + double rmse, psnr; + + rmse = sqrt(mse); + psnr = (rmse == 0) ? 999.0 : 20.0 * log10(32767.0/rmse); + + mpsnre /= (mpsnr_high-mpsnr_low+1) * w * h; + + double mpsnr = (mpsnre == 0) ? 999.0 : 10.0 * log10(3.0 * 255.0 * 255.0 / mpsnre); + + printf("Image size compared: %dw x %dh\n", w, h); + if (w != w1 || w != w2 || h != h1 || h != h2) + printf("--- NOTE: only the overlap between the 2 images (%d,%d) and (%d,%d) was compared\n", w1, h1, w2, h2); + printf("Total pixels: %12.0f\n", nsamples/3); + printf("Mean absolute error: %f\n", mabse); + printf("Root mean squared error: %f\n", rmse); + printf("Peak signal to noise ratio in dB: %f\n", psnr); + printf("mPSNR for exposure range %d..%d: %8.3f\n", mpsnr_low, mpsnr_high, mpsnr); + printf("Histogram of number of channels with indicated LSB error\n"); + for (int i = 0; i < 17; ++i) + if (errdist[i]) + printf("%2d LSB error: %10d\n", i, errdist[i]); +#if 0 + printf("Histogram of per-tile PSNR\n"); + for (int i = 0; i < 100; ++i) + if (psnrhist[i]) + printf("[%2d,%2d) %6d\n", i, i+1, psnrhist[i]); +#endif +} + +static bool ext(string inf, char *extension) +{ + size_t n = inf.rfind('.', inf.length()-1); + if (n != string::npos) + return inf.substr(n, inf.length()) == extension; + else if (*extension != '\0') + return false; + else + return true; // extension is null and we didn't find a . +} + +template +std::string toString(const T &thing) +{ + std::stringstream os; + os << thing; + return os.str(); +} + +static int str2int(std::string s) +{ + int thing; + std::stringstream str (stringstream::in | stringstream::out); + str << s; + str >> thing; + return thing; +} + +static void usage() +{ + cout << endl << + "Usage:" << endl << + "zohc infile.exr outroot generates outroot-w-h.bc6, outroot-bc6.exr" << endl << + "zohc foo-w-h.bc6 outroot generates outroot-bc6.exr" << endl << + "zohc infile.exr outfile.exr [e1 e2] compares the two images; optionally specify the mPSNR exposure range" << endl << endl << + "Flags:" << endl << + "-u treat the input as unsigned. negative values are clamped to zero. (default)" << endl << + "-s treat the input as signed." << endl; +} + +Format Utils::FORMAT = UNSIGNED_F16; + +int main(int argc, char* argv[]) +{ +#ifdef EXTERNAL_RELEASE + cout << "BC6H OpenEXR RGB Compressor/Decompressor version 1.61 (May 27 2010)." << endl << + "Bug reports, questions, and suggestions to wdonovan a t nvidia d o t com." << endl << endl; +#endif + try + { + char * args[4]; + int nargs = 0; + bool is_unsigned = true; + bool is_float = true; + + // process flags, copy any non flag arg to args[] + for (int i = 1; i < argc; ++i) + { + if ((argv[i])[0] == '-') + switch ((argv[i])[1]) { + case 'u': is_unsigned = true; break; + case 's': is_unsigned = false; break; + default: throw "bad flag arg"; + } + else + { + if (nargs >= 6) throw "Incorrect number of args"; + args[nargs++] = argv[i]; + } + } + + Utils::FORMAT = (!is_unsigned) ? SIGNED_F16 : UNSIGNED_F16; + + if (nargs < 2) throw "Incorrect number of args"; + + string inf(args[0]), outroot(args[1]); + + cout << "Input format is: " << (is_unsigned ? "UNSIGNED FLOAT_16" : "SIGNED FLOAT_16") << endl; + + if (ext(outroot, "")) + { + if (ext(inf, ".exr")) + { + int width, height; + Exr::fileinfo(inf, width, height); + string outf, zohf; + outf = outroot + "-bc6.exr"; + zohf = outroot + "-" + toString(width) + "-" + toString(height) + ".bc6"; + cout << "Compressing " << inf << " to " << zohf << endl; + ZOH::compress(inf, zohf); + cout << "Decompressing " << zohf << " to " << outf << endl; + ZOH::decompress(zohf, outf); + analyze(inf, outf); + } + else if (ext(inf, ".bc6")) + { + string outf; + outf = outroot + "-bc6.exr"; + cout << "Decompressing " << inf << " to " << outf << endl; + ZOH::decompress(inf, outf); + } + else throw "Invalid file args"; + } + else if (ext(inf, ".exr") && ext(outroot, ".exr")) + { + if (nargs == 4) + { + string low(args[2]), high(args[3]); + mpsnr_low = str2int(low); + mpsnr_high = str2int(high); + if (mpsnr_low > mpsnr_high) throw "Invalid exposure range"; + } + analyze(inf, outroot); + } + else throw "Invalid file args"; + } + catch(const exception& e) + { + // Print error message and usage instructions + cerr << e.what() << endl; + usage(); + return 1; + } + catch(char * msg) + { + cerr << msg << endl; + usage(); + return 1; + } + return 0; +} diff --git a/src/nvtt/bc6h/zohone.cpp b/src/nvtt/bc6h/zohone.cpp new file mode 100755 index 0000000..e6d2b87 --- /dev/null +++ b/src/nvtt/bc6h/zohone.cpp @@ -0,0 +1,804 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +// one region zoh compress/decompress code +// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis + +#include "bits.h" +#include "tile.h" +#include "zoh.h" +#include "arvo/Vec3.h" +#include "arvo/Matrix.h" +#include "arvo/SVD.h" +#include "utils.h" + +#include + +using namespace ArvoMath; + +#define NINDICES 16 +#define INDEXBITS 4 +#define HIGH_INDEXBIT (1<<(INDEXBITS-1)) +#define DENOM (NINDICES-1) + +#define NSHAPES 1 + +static int shapes[NSHAPES] = +{ + 0x0000 +}; // only 1 shape + +#define REGION(x,y,shapeindex) ((shapes[shapeindex]&(1<<(15-(x)-4*(y))))!=0) + +#define POS_TO_X(pos) ((pos)&3) +#define POS_TO_Y(pos) (((pos)>>2)&3) + +#define NDELTA 2 + +struct Chanpat +{ + int prec[NDELTA]; // precision pattern for one channel +}; + +struct Pattern +{ + Chanpat chan[NCHANNELS];// allow different bit patterns per channel -- but we still want constant precision per channel + int transformed; // if 0, deltas are unsigned and no transform; otherwise, signed and transformed + int mode; // associated mode value + int modebits; // number of mode bits + char *encoding; // verilog description of encoding for this mode +}; + +#define MAXMODEBITS 5 +#define MAXMODES (1<> 2) & 3 and x = index & 3 +static void swap_indices(IntEndpts endpts[NREGIONS_ONE], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex) +{ + int index_positions[NREGIONS_ONE]; + + index_positions[0] = 0; // since WLOG we have the high bit of the shapes at 0 + + for (int region = 0; region < NREGIONS_ONE; ++region) + { + int x = index_positions[region] & 3; + int y = (index_positions[region] >> 2) & 3; + assert(REGION(x,y,shapeindex) == region); // double check the table + if (indices[y][x] & HIGH_INDEXBIT) + { + // high bit is set, swap the endpts and indices for this region + int t; + for (int i=0; i> endbit, len); break; + case FIELD_RW: out.write(rw >> endbit, len); break; + case FIELD_RX: out.write(rx >> endbit, len); break; + case FIELD_GW: out.write(gw >> endbit, len); break; + case FIELD_GX: out.write(gx >> endbit, len); break; + case FIELD_BW: out.write(bw >> endbit, len); break; + case FIELD_BX: out.write(bx >> endbit, len); break; + + case FIELD_D: + case FIELD_RY: + case FIELD_RZ: + case FIELD_GY: + case FIELD_GZ: + case FIELD_BY: + case FIELD_BZ: + default: assert(0); + } + } +} + +static void read_header(Bits &in, ComprEndpts endpts[NREGIONS_ONE], Pattern &p) +{ + // reading isn't quite symmetric with writing -- we don't know the encoding until we decode the mode + int mode = in.read(2); + if (mode != 0x00 && mode != 0x01) + mode = (in.read(3) << 2) | mode; + + int pat_index = mode_to_pat[mode]; + + assert (pat_index >= 0 && pat_index < NPATTERNS); + assert (in.getptr() == patterns[pat_index].modebits); + + p = patterns[pat_index]; + + int d; + int rw, rx; + int gw, gx; + int bw, bx; + + d = 0; + rw = rx = 0; + gw = gx = 0; + bw = bx = 0; + + int ptr = strlen(p.encoding); + + while (ptr) + { + Field field; + int endbit, len; + + Utils::parse(p.encoding, ptr, field, endbit, len); + + switch(field) + { + case FIELD_M: break; // already processed so ignore + case FIELD_RW: rw |= in.read(len) << endbit; break; + case FIELD_RX: rx |= in.read(len) << endbit; break; + case FIELD_GW: gw |= in.read(len) << endbit; break; + case FIELD_GX: gx |= in.read(len) << endbit; break; + case FIELD_BW: bw |= in.read(len) << endbit; break; + case FIELD_BX: bx |= in.read(len) << endbit; break; + + case FIELD_D: + case FIELD_RY: + case FIELD_RZ: + case FIELD_GY: + case FIELD_GZ: + case FIELD_BY: + case FIELD_BZ: + default: assert(0); + } + } + + assert (in.getptr() == 128 - 63); + + endpts[0].A[0] = rw; endpts[0].B[0] = rx; + endpts[0].A[1] = gw; endpts[0].B[1] = gx; + endpts[0].A[2] = bw; endpts[0].B[2] = bx; +} + +// compress index 0 +static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out) +{ + for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos) + { + int x = POS_TO_X(pos); + int y = POS_TO_Y(pos); + + out.write(indices[y][x], INDEXBITS - ((pos == 0) ? 1 : 0)); + } +} + +static void emit_block(const ComprEndpts endpts[NREGIONS_ONE], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block) +{ + Bits out(block, ZOH::BITSIZE); + + write_header(endpts, p, out); + + write_indices(indices, shapeindex, out); + + assert(out.getptr() == ZOH::BITSIZE); +} + +static void generate_palette_quantized(const IntEndpts &endpts, int prec, Vec3 palette[NINDICES]) +{ + // scale endpoints + int a, b; // really need a IntVec3... + + a = Utils::unquantize(endpts.A[0], prec); + b = Utils::unquantize(endpts.B[0], prec); + + // interpolate + for (int i = 0; i < NINDICES; ++i) + palette[i].X() = Utils::finish_unquantize(PALETTE_LERP(a, b, i, DENOM), prec); + + a = Utils::unquantize(endpts.A[1], prec); + b = Utils::unquantize(endpts.B[1], prec); + + // interpolate + for (int i = 0; i < NINDICES; ++i) + palette[i].Y() = Utils::finish_unquantize(PALETTE_LERP(a, b, i, DENOM), prec); + + a = Utils::unquantize(endpts.A[2], prec); + b = Utils::unquantize(endpts.B[2], prec); + + // interpolate + for (int i = 0; i < NINDICES; ++i) + palette[i].Z() = Utils::finish_unquantize(PALETTE_LERP(a, b, i, DENOM), prec); +} + +// position 0 was compressed +static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W]) +{ + for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos) + { + int x = POS_TO_X(pos); + int y = POS_TO_Y(pos); + + indices[y][x]= in.read(INDEXBITS - ((pos == 0) ? 1 : 0)); + } +} + +void ZOH::decompressone(const char *block, Tile &t) +{ + Bits in(block, ZOH::BITSIZE); + + Pattern p; + IntEndpts endpts[NREGIONS_ONE]; + ComprEndpts compr_endpts[NREGIONS_ONE]; + + read_header(in, compr_endpts, p); + int shapeindex = 0; // only one shape + + decompress_endpts(compr_endpts, endpts, p); + + Vec3 palette[NREGIONS_ONE][NINDICES]; + for (int r = 0; r < NREGIONS_ONE; ++r) + generate_palette_quantized(endpts[r], p.chan[0].prec[0], &palette[r][0]); + + // read indices + int indices[Tile::TILE_H][Tile::TILE_W]; + + read_indices(in, shapeindex, indices); + + assert(in.getptr() == ZOH::BITSIZE); + + // lookup + for (int y = 0; y < Tile::TILE_H; y++) + for (int x = 0; x < Tile::TILE_W; x++) + t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]]; +} + +// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr +static double map_colors(const Vec3 colors[], const float importance[], int np, const IntEndpts &endpts, int prec) +{ + Vec3 palette[NINDICES]; + double toterr = 0; + Vec3 err; + + generate_palette_quantized(endpts, prec, palette); + + for (int i = 0; i < np; ++i) + { + double err, besterr; + + besterr = Utils::norm(colors[i], palette[0]) * importance[i]; + + for (int j = 1; j < NINDICES && besterr > 0; ++j) + { + err = Utils::norm(colors[i], palette[j]) * importance[i]; + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + besterr = err; + } + toterr += besterr; + } + return toterr; +} + +// assign indices given a tile, shape, and quantized endpoints, return toterr for each region +static void assign_indices(const Tile &tile, int shapeindex, IntEndpts endpts[NREGIONS_ONE], int prec, + int indices[Tile::TILE_H][Tile::TILE_W], double toterr[NREGIONS_ONE]) +{ + // build list of possibles + Vec3 palette[NREGIONS_ONE][NINDICES]; + + for (int region = 0; region < NREGIONS_ONE; ++region) + { + generate_palette_quantized(endpts[region], prec, &palette[region][0]); + toterr[region] = 0; + } + + Vec3 err; + + for (int y = 0; y < tile.size_y; y++) + for (int x = 0; x < tile.size_x; x++) + { + int region = REGION(x,y,shapeindex); + double err, besterr; + + besterr = Utils::norm(tile.data[y][x], palette[region][0]); + indices[y][x] = 0; + + for (int i = 1; i < NINDICES && besterr > 0; ++i) + { + err = Utils::norm(tile.data[y][x], palette[region][i]); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[y][x] = i; + } + } + toterr[region] += besterr; + } +} + +static double perturb_one(const Vec3 colors[], const float importance[], int np, int ch, int prec, const IntEndpts &old_endpts, IntEndpts &new_endpts, + double old_err, int do_b) +{ + // we have the old endpoints: old_endpts + // we have the perturbed endpoints: new_endpts + // we have the temporary endpoints: temp_endpts + + IntEndpts temp_endpts; + float min_err = old_err; // start with the best current error + int beststep; + + // copy real endpoints so we can perturb them + for (int i=0; i>= 1) + { + bool improved = false; + for (int sign = -1; sign <= 1; sign += 2) + { + if (do_b == 0) + { + temp_endpts.A[ch] = new_endpts.A[ch] + sign * step; + if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec)) + continue; + } + else + { + temp_endpts.B[ch] = new_endpts.B[ch] + sign * step; + if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec)) + continue; + } + + float err = map_colors(colors, importance, np, temp_endpts, prec); + + if (err < min_err) + { + improved = true; + min_err = err; + beststep = sign * step; + } + } + // if this was an improvement, move the endpoint and continue search from there + if (improved) + { + if (do_b == 0) + new_endpts.A[ch] += beststep; + else + new_endpts.B[ch] += beststep; + } + } + return min_err; +} + +static void optimize_one(const Vec3 colors[], const float importance[], int np, double orig_err, const IntEndpts &orig_endpts, int prec, IntEndpts &opt_endpts) +{ + double opt_err = orig_err; + for (int ch = 0; ch < NCHANNELS; ++ch) + { + opt_endpts.A[ch] = orig_endpts.A[ch]; + opt_endpts.B[ch] = orig_endpts.B[ch]; + } + /* + err0 = perturb(rgb0, delta0) + err1 = perturb(rgb1, delta1) + if (err0 < err1) + if (err0 >= initial_error) break + rgb0 += delta0 + next = 1 + else + if (err1 >= initial_error) break + rgb1 += delta1 + next = 0 + initial_err = map() + for (;;) + err = perturb(next ? rgb1:rgb0, delta) + if (err >= initial_err) break + next? rgb1 : rgb0 += delta + initial_err = err + */ + IntEndpts new_a, new_b; + IntEndpts new_endpt; + int do_b; + + // now optimize each channel separately + for (int ch = 0; ch < NCHANNELS; ++ch) + { + // figure out which endpoint when perturbed gives the most improvement and start there + // if we just alternate, we can easily end up in a local minima + float err0 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_a, opt_err, 0); // perturb endpt A + float err1 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_b, opt_err, 1); // perturb endpt B + + if (err0 < err1) + { + if (err0 >= opt_err) + continue; + + opt_endpts.A[ch] = new_a.A[ch]; + opt_err = err0; + do_b = 1; // do B next + } + else + { + if (err1 >= opt_err) + continue; + opt_endpts.B[ch] = new_b.B[ch]; + opt_err = err1; + do_b = 0; // do A next + } + + // now alternate endpoints and keep trying until there is no improvement + for (;;) + { + float err = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_endpt, opt_err, do_b); + if (err >= opt_err) + break; + if (do_b == 0) + opt_endpts.A[ch] = new_endpt.A[ch]; + else + opt_endpts.B[ch] = new_endpt.B[ch]; + opt_err = err; + do_b = 1 - do_b; // now move the other endpoint + } + } +} + +static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_err[NREGIONS_ONE], + const IntEndpts orig_endpts[NREGIONS_ONE], int prec, IntEndpts opt_endpts[NREGIONS_ONE]) +{ + Vec3 pixels[Tile::TILE_TOTAL]; + float importance[Tile::TILE_TOTAL]; + double err = 0; + int indices[Tile::TILE_TOTAL]; + + for (int region=0; region 0; ++i) + { + err = Utils::norm(tile.data[y][x], palette[region][i]) * tile.importance_map[y][x]; + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + besterr = err; + } + toterr += besterr; + } + return toterr; +} + +double ZOH::roughone(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_ONE]) +{ + for (int region=0; region maxp) maxp = dp; + } + + // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values + endpts[region].A = mean + minp*direction; + endpts[region].B = mean + maxp*direction; + + // clamp endpoints + // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best + // shape based on endpoints being clamped + Utils::clamp(endpts[region].A); + Utils::clamp(endpts[region].B); + } + + return map_colors(tile, shapeindex, endpts); +} + +double ZOH::compressone(const Tile &t, char *block) +{ + int shapeindex_best = 0; + FltEndpts endptsbest[NREGIONS_ONE], tempendpts[NREGIONS_ONE]; + double msebest = DBL_MAX; + + /* + collect the mse values that are within 5% of the best values + optimize each one and choose the best + */ + // hack for now -- just use the best value WORK + for (int i=0; i0.0; ++i) + { + double mse = roughone(t, i, tempendpts); + if (mse < msebest) + { + msebest = mse; + shapeindex_best = i; + memcpy(endptsbest, tempendpts, sizeof(endptsbest)); + } + + } + return refineone(t, shapeindex_best, endptsbest, block); +} diff --git a/src/nvtt/bc6h/zohtwo.cpp b/src/nvtt/bc6h/zohtwo.cpp new file mode 100755 index 0000000..8a692bd --- /dev/null +++ b/src/nvtt/bc6h/zohtwo.cpp @@ -0,0 +1,892 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +// two regions zoh compress/decompress code +// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis + +/* optimization algorithm + + get initial float endpoints + convert endpoints using 16 bit precision, transform, and get bit delta. choose likely endpoint compression candidates. + note that there will be 1 or 2 candidates; 2 will be chosen when the delta values are close to the max possible. + for each EC candidate in order from max precision to smaller precision + convert endpoints using the appropriate precision. + optimize the endpoints and minimize square error. save the error and index assignments. apply index compression as well. + (thus the endpoints and indices are in final form.) + transform and get bit delta. + if the bit delta fits, exit + if we ended up with no candidates somehow, choose the tail set of EC candidates and retry. this should happen hardly ever. + add a state variable to assert we only do this once. + convert to bit stream. + return the error. + + Global optimization + order all tiles based on their errors + do something special for high-error tiles + the goal here is to try to avoid tiling artifacts. but I think this is a research problem. let's just generate an error image... + + display an image that shows partitioning and precision selected for each tile +*/ + +#include "bits.h" +#include "tile.h" +#include "zoh.h" +#include "arvo/Vec3.h" +#include "arvo/Matrix.h" +#include "arvo/SVD.h" +#include "utils.h" + +#include + +using namespace ArvoMath; + +#define NINDICES 8 +#define INDEXBITS 3 +#define HIGH_INDEXBIT (1<<(INDEXBITS-1)) +#define DENOM (NINDICES-1) + +// WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like? +// i.e. can we search shapes in a particular order so we can see the global error minima easily and +// stop without having to touch all shapes? + +#include "shapes_two.h" +// use only the first 32 available shapes +#undef NSHAPES +#undef SHAPEBITS +#define NSHAPES 32 +#define SHAPEBITS 5 + +#define POS_TO_X(pos) ((pos)&3) +#define POS_TO_Y(pos) (((pos)>>2)&3) + +#define NDELTA 4 + +struct Chanpat +{ + int prec[NDELTA]; // precision pattern for one channel +}; + +struct Pattern +{ + Chanpat chan[NCHANNELS];// allow different bit patterns per channel -- but we still want constant precision per channel + int transformed; // if 0, deltas are unsigned and no transform; otherwise, signed and transformed + int mode; // associated mode value + int modebits; // number of mode bits + char *encoding; // verilog description of encoding for this mode +}; + +#define MAXMODEBITS 5 +#define MAXMODES (1<> endbit, len); break; + case FIELD_D: out.write( d >> endbit, len); break; + case FIELD_RW: out.write(rw >> endbit, len); break; + case FIELD_RX: out.write(rx >> endbit, len); break; + case FIELD_RY: out.write(ry >> endbit, len); break; + case FIELD_RZ: out.write(rz >> endbit, len); break; + case FIELD_GW: out.write(gw >> endbit, len); break; + case FIELD_GX: out.write(gx >> endbit, len); break; + case FIELD_GY: out.write(gy >> endbit, len); break; + case FIELD_GZ: out.write(gz >> endbit, len); break; + case FIELD_BW: out.write(bw >> endbit, len); break; + case FIELD_BX: out.write(bx >> endbit, len); break; + case FIELD_BY: out.write(by >> endbit, len); break; + case FIELD_BZ: out.write(bz >> endbit, len); break; + default: assert(0); + } + } +} + +static bool read_header(Bits &in, ComprEndpts endpts[NREGIONS_TWO], int &shapeindex, Pattern &p) +{ + // reading isn't quite symmetric with writing -- we don't know the encoding until we decode the mode + int mode = in.read(2); + if (mode != 0x00 && mode != 0x01) + mode = (in.read(3) << 2) | mode; + + int pat_index = mode_to_pat[mode]; + + if (pat_index == -2) + return false; // reserved mode found + + assert (pat_index >= 0 && pat_index < NPATTERNS); + assert (in.getptr() == patterns[pat_index].modebits); + + p = patterns[pat_index]; + + int d; + int rw, rx, ry, rz; + int gw, gx, gy, gz; + int bw, bx, by, bz; + + d = 0; + rw = rx = ry = rz = 0; + gw = gx = gy = gz = 0; + bw = bx = by = bz = 0; + + int ptr = strlen(p.encoding); + + while (ptr) + { + Field field; + int endbit, len; + + Utils::parse(p.encoding, ptr, field, endbit, len); + + switch(field) + { + case FIELD_M: break; // already processed so ignore + case FIELD_D: d |= in.read(len) << endbit; break; + case FIELD_RW: rw |= in.read(len) << endbit; break; + case FIELD_RX: rx |= in.read(len) << endbit; break; + case FIELD_RY: ry |= in.read(len) << endbit; break; + case FIELD_RZ: rz |= in.read(len) << endbit; break; + case FIELD_GW: gw |= in.read(len) << endbit; break; + case FIELD_GX: gx |= in.read(len) << endbit; break; + case FIELD_GY: gy |= in.read(len) << endbit; break; + case FIELD_GZ: gz |= in.read(len) << endbit; break; + case FIELD_BW: bw |= in.read(len) << endbit; break; + case FIELD_BX: bx |= in.read(len) << endbit; break; + case FIELD_BY: by |= in.read(len) << endbit; break; + case FIELD_BZ: bz |= in.read(len) << endbit; break; + default: assert(0); + } + } + + assert (in.getptr() == 128 - 46); + + shapeindex = d; + endpts[0].A[0] = rw; endpts[0].B[0] = rx; endpts[1].A[0] = ry; endpts[1].B[0] = rz; + endpts[0].A[1] = gw; endpts[0].B[1] = gx; endpts[1].A[1] = gy; endpts[1].B[1] = gz; + endpts[0].A[2] = bw; endpts[0].B[2] = bx; endpts[1].A[2] = by; endpts[1].B[2] = bz; + + return true; +} + +static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out) +{ + int positions[NREGIONS_TWO]; + + for (int r = 0; r < NREGIONS_TWO; ++r) + positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r); + + for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos) + { + int x = POS_TO_X(pos); + int y = POS_TO_Y(pos); + + bool match = false; + + for (int r = 0; r < NREGIONS_TWO; ++r) + if (positions[r] == pos) { match = true; break; } + + out.write(indices[y][x], INDEXBITS - (match ? 1 : 0)); + } +} + +static void emit_block(const ComprEndpts compr_endpts[NREGIONS_TWO], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block) +{ + Bits out(block, ZOH::BITSIZE); + + write_header(compr_endpts, shapeindex, p, out); + + write_indices(indices, shapeindex, out); + + assert(out.getptr() == ZOH::BITSIZE); +} + +static void generate_palette_quantized(const IntEndpts &endpts, int prec, Vec3 palette[NINDICES]) +{ + // scale endpoints + int a, b; // really need a IntVec3... + + a = Utils::unquantize(endpts.A[0], prec); + b = Utils::unquantize(endpts.B[0], prec); + + // interpolate + for (int i = 0; i < NINDICES; ++i) + palette[i].X() = Utils::finish_unquantize(PALETTE_LERP(a, b, i, DENOM), prec); + + a = Utils::unquantize(endpts.A[1], prec); + b = Utils::unquantize(endpts.B[1], prec); + + // interpolate + for (int i = 0; i < NINDICES; ++i) + palette[i].Y() = Utils::finish_unquantize(PALETTE_LERP(a, b, i, DENOM), prec); + + a = Utils::unquantize(endpts.A[2], prec); + b = Utils::unquantize(endpts.B[2], prec); + + // interpolate + for (int i = 0; i < NINDICES; ++i) + palette[i].Z() = Utils::finish_unquantize(PALETTE_LERP(a, b, i, DENOM), prec); +} + +static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W]) +{ + int positions[NREGIONS_TWO]; + + for (int r = 0; r < NREGIONS_TWO; ++r) + positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r); + + for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos) + { + int x = POS_TO_X(pos); + int y = POS_TO_Y(pos); + + bool match = false; + + for (int r = 0; r < NREGIONS_TWO; ++r) + if (positions[r] == pos) { match = true; break; } + + indices[y][x]= in.read(INDEXBITS - (match ? 1 : 0)); + } +} + +void ZOH::decompresstwo(const char *block, Tile &t) +{ + Bits in(block, ZOH::BITSIZE); + + Pattern p; + IntEndpts endpts[NREGIONS_TWO]; + ComprEndpts compr_endpts[NREGIONS_TWO]; + int shapeindex; + + if (!read_header(in, compr_endpts, shapeindex, p)) + { + // reserved mode, return all zeroes + Vec3 zero(0); + + for (int y = 0; y < Tile::TILE_H; y++) + for (int x = 0; x < Tile::TILE_W; x++) + t.data[y][x] = zero; + + return; + } + + decompress_endpts(compr_endpts, endpts, p); + + Vec3 palette[NREGIONS_TWO][NINDICES]; + for (int r = 0; r < NREGIONS_TWO; ++r) + generate_palette_quantized(endpts[r], p.chan[0].prec[0], &palette[r][0]); + + int indices[Tile::TILE_H][Tile::TILE_W]; + + read_indices(in, shapeindex, indices); + + assert(in.getptr() == ZOH::BITSIZE); + + // lookup + for (int y = 0; y < Tile::TILE_H; y++) + for (int x = 0; x < Tile::TILE_W; x++) + t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]]; +} + +// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr +static double map_colors(const Vec3 colors[], const float importance[], int np, const IntEndpts &endpts, int prec) +{ + Vec3 palette[NINDICES]; + double toterr = 0; + Vec3 err; + + generate_palette_quantized(endpts, prec, palette); + + for (int i = 0; i < np; ++i) + { + double err, besterr; + + besterr = Utils::norm(colors[i], palette[0]) * importance[i]; + + for (int j = 1; j < NINDICES && besterr > 0; ++j) + { + err = Utils::norm(colors[i], palette[j]) * importance[i]; + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + besterr = err; + } + toterr += besterr; + } + return toterr; +} + +// assign indices given a tile, shape, and quantized endpoints, return toterr for each region +static void assign_indices(const Tile &tile, int shapeindex, IntEndpts endpts[NREGIONS_TWO], int prec, + int indices[Tile::TILE_H][Tile::TILE_W], double toterr[NREGIONS_TWO]) +{ + // build list of possibles + Vec3 palette[NREGIONS_TWO][NINDICES]; + + for (int region = 0; region < NREGIONS_TWO; ++region) + { + generate_palette_quantized(endpts[region], prec, &palette[region][0]); + toterr[region] = 0; + } + + Vec3 err; + + for (int y = 0; y < tile.size_y; y++) + for (int x = 0; x < tile.size_x; x++) + { + int region = REGION(x,y,shapeindex); + double err, besterr; + + besterr = Utils::norm(tile.data[y][x], palette[region][0]); + indices[y][x] = 0; + + for (int i = 1; i < NINDICES && besterr > 0; ++i) + { + err = Utils::norm(tile.data[y][x], palette[region][i]); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[y][x] = i; + } + } + toterr[region] += besterr; + } +} + +static double perturb_one(const Vec3 colors[], const float importance[], int np, int ch, int prec, const IntEndpts &old_endpts, IntEndpts &new_endpts, + double old_err, int do_b) +{ + // we have the old endpoints: old_endpts + // we have the perturbed endpoints: new_endpts + // we have the temporary endpoints: temp_endpts + + IntEndpts temp_endpts; + float min_err = old_err; // start with the best current error + int beststep; + + // copy real endpoints so we can perturb them + for (int i=0; i>= 1) + { + bool improved = false; + for (int sign = -1; sign <= 1; sign += 2) + { + if (do_b == 0) + { + temp_endpts.A[ch] = new_endpts.A[ch] + sign * step; + if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec)) + continue; + } + else + { + temp_endpts.B[ch] = new_endpts.B[ch] + sign * step; + if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec)) + continue; + } + + float err = map_colors(colors, importance, np, temp_endpts, prec); + + if (err < min_err) + { + improved = true; + min_err = err; + beststep = sign * step; + } + } + // if this was an improvement, move the endpoint and continue search from there + if (improved) + { + if (do_b == 0) + new_endpts.A[ch] += beststep; + else + new_endpts.B[ch] += beststep; + } + } + return min_err; +} + +static void optimize_one(const Vec3 colors[], const float importance[], int np, double orig_err, const IntEndpts &orig_endpts, int prec, IntEndpts &opt_endpts) +{ + double opt_err = orig_err; + for (int ch = 0; ch < NCHANNELS; ++ch) + { + opt_endpts.A[ch] = orig_endpts.A[ch]; + opt_endpts.B[ch] = orig_endpts.B[ch]; + } + /* + err0 = perturb(rgb0, delta0) + err1 = perturb(rgb1, delta1) + if (err0 < err1) + if (err0 >= initial_error) break + rgb0 += delta0 + next = 1 + else + if (err1 >= initial_error) break + rgb1 += delta1 + next = 0 + initial_err = map() + for (;;) + err = perturb(next ? rgb1:rgb0, delta) + if (err >= initial_err) break + next? rgb1 : rgb0 += delta + initial_err = err + */ + IntEndpts new_a, new_b; + IntEndpts new_endpt; + int do_b; + + // now optimize each channel separately + for (int ch = 0; ch < NCHANNELS; ++ch) + { + // figure out which endpoint when perturbed gives the most improvement and start there + // if we just alternate, we can easily end up in a local minima + float err0 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_a, opt_err, 0); // perturb endpt A + float err1 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_b, opt_err, 1); // perturb endpt B + + if (err0 < err1) + { + if (err0 >= opt_err) + continue; + + opt_endpts.A[ch] = new_a.A[ch]; + opt_err = err0; + do_b = 1; // do B next + } + else + { + if (err1 >= opt_err) + continue; + opt_endpts.B[ch] = new_b.B[ch]; + opt_err = err1; + do_b = 0; // do A next + } + + // now alternate endpoints and keep trying until there is no improvement + for (;;) + { + float err = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_endpt, opt_err, do_b); + if (err >= opt_err) + break; + if (do_b == 0) + opt_endpts.A[ch] = new_endpt.A[ch]; + else + opt_endpts.B[ch] = new_endpt.B[ch]; + opt_err = err; + do_b = 1 - do_b; // now move the other endpoint + } + } +} + +static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_err[NREGIONS_TWO], + const IntEndpts orig_endpts[NREGIONS_TWO], int prec, IntEndpts opt_endpts[NREGIONS_TWO]) +{ + Vec3 pixels[Tile::TILE_TOTAL]; + float importance[Tile::TILE_TOTAL]; + double err = 0; + int indices[Tile::TILE_TOTAL]; + + for (int region=0; region 0; ++i) + { + err = Utils::norm(tile.data[y][x], palette[region][i]) * tile.importance_map[y][x]; + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + besterr = err; + } + toterr += besterr; + } + return toterr; +} + +double ZOH::roughtwo(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_TWO]) +{ + for (int region=0; region maxp) maxp = dp; + } + + // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values + endpts[region].A = mean + minp*direction; + endpts[region].B = mean + maxp*direction; + + // clamp endpoints + // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best + // shape based on endpoints being clamped + Utils::clamp(endpts[region].A); + Utils::clamp(endpts[region].B); + } + + return map_colors(tile, shapeindex, endpts); +} + +double ZOH::compresstwo(const Tile &t, char *block) +{ + int shapeindex_best = 0; + FltEndpts endptsbest[NREGIONS_TWO], tempendpts[NREGIONS_TWO]; + double msebest = DBL_MAX; + + /* + collect the mse values that are within 5% of the best values + optimize each one and choose the best + */ + // hack for now -- just use the best value WORK + for (int i=0; i0.0; ++i) + { + double mse = roughtwo(t, i, tempendpts); + if (mse < msebest) + { + msebest = mse; + shapeindex_best = i; + memcpy(endptsbest, tempendpts, sizeof(endptsbest)); + } + + } + return refinetwo(t, shapeindex_best, endptsbest, block); +} + diff --git a/src/nvtt/bc7/ImfArray.h b/src/nvtt/bc7/ImfArray.h new file mode 100644 index 0000000..5160fa4 --- /dev/null +++ b/src/nvtt/bc7/ImfArray.h @@ -0,0 +1,261 @@ +/////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2002, Industrial Light & Magic, a division of Lucas +// Digital Ltd. LLC +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Industrial Light & Magic nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////// + + + +#ifndef INCLUDED_IMF_ARRAY_H +#define INCLUDED_IMF_ARRAY_H + +//------------------------------------------------------------------------- +// +// class Array +// class Array2D +// +// "Arrays of T" whose sizes are not known at compile time. +// When an array goes out of scope, its elements are automatically +// deleted. +// +// Usage example: +// +// struct C +// { +// C () {std::cout << "C::C (" << this << ")\n";}; +// virtual ~C () {std::cout << "C::~C (" << this << ")\n";}; +// }; +// +// int +// main () +// { +// Array a(3); +// +// C &b = a[1]; +// const C &c = a[1]; +// C *d = a + 2; +// const C *e = a; +// +// return 0; +// } +// +//------------------------------------------------------------------------- + +namespace Imf { + + +template +class Array +{ + public: + + //----------------------------- + // Constructors and destructors + //----------------------------- + + Array () {_data = 0;} + Array (long size) {_data = new T[size];} + ~Array () {delete [] _data;} + + + //----------------------------- + // Access to the array elements + //----------------------------- + + operator T * () {return _data;} + operator const T * () const {return _data;} + + + //------------------------------------------------------ + // Resize and clear the array (the contents of the array + // are not preserved across the resize operation). + // + // resizeEraseUnsafe() is more memory efficient than + // resizeErase() because it deletes the old memory block + // before allocating a new one, but if allocating the + // new block throws an exception, resizeEraseUnsafe() + // leaves the array in an unusable state. + // + //------------------------------------------------------ + + void resizeErase (long size); + void resizeEraseUnsafe (long size); + + + private: + + Array (const Array &); // Copying and assignment + Array & operator = (const Array &); // are not implemented + + T * _data; +}; + + +template +class Array2D +{ + public: + + //----------------------------- + // Constructors and destructors + //----------------------------- + + Array2D (); // empty array, 0 by 0 elements + Array2D (long sizeX, long sizeY); // sizeX by sizeY elements + ~Array2D (); + + + //----------------------------- + // Access to the array elements + //----------------------------- + + T * operator [] (long x); + const T * operator [] (long x) const; + + + //------------------------------------------------------ + // Resize and clear the array (the contents of the array + // are not preserved across the resize operation). + // + // resizeEraseUnsafe() is more memory efficient than + // resizeErase() because it deletes the old memory block + // before allocating a new one, but if allocating the + // new block throws an exception, resizeEraseUnsafe() + // leaves the array in an unusable state. + // + //------------------------------------------------------ + + void resizeErase (long sizeX, long sizeY); + void resizeEraseUnsafe (long sizeX, long sizeY); + + + private: + + Array2D (const Array2D &); // Copying and assignment + Array2D & operator = (const Array2D &); // are not implemented + + long _sizeY; + T * _data; +}; + + +//--------------- +// Implementation +//--------------- + +template +inline void +Array::resizeErase (long size) +{ + T *tmp = new T[size]; + delete [] _data; + _data = tmp; +} + + +template +inline void +Array::resizeEraseUnsafe (long size) +{ + delete [] _data; + _data = 0; + _data = new T[size]; +} + + +template +inline +Array2D::Array2D (): + _sizeY (0), _data (0) +{ + // emtpy +} + + +template +inline +Array2D::Array2D (long sizeX, long sizeY): + _sizeY (sizeY), _data (new T[sizeX * sizeY]) +{ + // emtpy +} + + +template +inline +Array2D::~Array2D () +{ + delete [] _data; +} + + +template +inline T * +Array2D::operator [] (long x) +{ + return _data + x * _sizeY; +} + + +template +inline const T * +Array2D::operator [] (long x) const +{ + return _data + x * _sizeY; +} + + +template +inline void +Array2D::resizeErase (long sizeX, long sizeY) +{ + T *tmp = new T[sizeX * sizeY]; + delete [] _data; + _sizeY = sizeY; + _data = tmp; +} + + +template +inline void +Array2D::resizeEraseUnsafe (long sizeX, long sizeY) +{ + delete [] _data; + _data = 0; + _sizeY = 0; + _data = new T[sizeX * sizeY]; + _sizeY = sizeY; +} + + +} // namespace Imf + +#endif diff --git a/src/nvtt/bc7/arvo/ArvoMath.cpp b/src/nvtt/bc7/arvo/ArvoMath.cpp new file mode 100644 index 0000000..95d1a7d --- /dev/null +++ b/src/nvtt/bc7/arvo/ArvoMath.cpp @@ -0,0 +1,342 @@ +/*************************************************************************** +* Math.C * +* * +* Some basic math functions. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 06/21/93 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1999, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#include +#include +#include +#include +#include "ArvoMath.h" +#include "form.h" + +namespace ArvoMath { + static const float Epsilon = 1.0E-5; + static const double LogTwo = log( 2.0 ); + +#define BinCoeffMax 500 + + double RelErr( double x, double y ) + { + double z = x - y; + if( x < 0.0 ) x = -x; + if( y < 0.0 ) y = -y; + return z / ( x > y ? x : y ); + } + + /*************************************************************************** + * A R C Q U A D * + * * + * Returns the theta / ( 2*PI ) where the input variables x and y are * + * such that x == COS( theta ) and y == SIN( theta ). * + * * + ***************************************************************************/ + float ArcQuad( float x, float y ) + { + if( Abs( x ) > Epsilon ) + { + float temp = OverTwoPi * atan( Abs( y ) / Abs( x ) ); + if( x < 0.0 ) temp = 0.5 - temp; + if( y < 0.0 ) temp = 1.0 - temp; + return( temp ); + } + else if( y > Epsilon ) return( 0.25 ); + else if( y < -Epsilon ) return( 0.75 ); + else return( 0.0 ); + } + + /*************************************************************************** + * A R C T A N * + * * + * Returns the angle theta such that x = COS( theta ) & y = SIN( theta ). * + * * + ***************************************************************************/ + float ArcTan( float x, float y ) + { + if( Abs( x ) > Epsilon ) + { + float temp = atan( Abs( y ) / Abs( x ) ); + if( x < 0.0 ) temp = Pi - temp; + if( y < 0.0 ) temp = TwoPi - temp; + return( temp ); + } + else if( y > Epsilon ) return( PiOverTwo ); + else if( y < -Epsilon ) return( 3 * PiOverTwo ); + else return( 0.0 ); + } + + /*************************************************************************** + * M A C H I N E E P S I L O N * + * * + * Returns the machine epsilon. * + * * + ***************************************************************************/ + float MachineEpsilon() + { + float x = 1.0; + float y; + float z = 1.0 + x; + while( z > 1.0 ) + { + y = x; + x /= 2.0; + z = (float)( 1.0 + (float)x ); // Avoid double precision! + } + return (float)y; + } + + /*************************************************************************** + * L O G G A M M A * + * * + * Computes the natural log of the gamma function using the Lanczos * + * approximation formula. Gamma is defined by * + * * + * ( z - 1 ) -t * + * gamma( z ) = Integral[ t e dt ] * + * * + * * + * where the integral ranges from 0 to infinity. The gamma function * + * satisfies * + * gamma( n + 1 ) = n! * + * * + * This algorithm has been adapted from "Numerical Recipes", p. 157. * + * * + ***************************************************************************/ + double LogGamma( double x ) + { + static const double + coeff0 = 7.61800917300E+1, + coeff1 = -8.65053203300E+1, + coeff2 = 2.40140982200E+1, + coeff3 = -1.23173951600E+0, + coeff4 = 1.20858003000E-3, + coeff5 = -5.36382000000E-6, + stp = 2.50662827465E+0, + half = 5.00000000000E-1, + fourpf = 4.50000000000E+0, + one = 1.00000000000E+0, + two = 2.00000000000E+0, + three = 3.00000000000E+0, + four = 4.00000000000E+0, + five = 5.00000000000E+0; + double r = coeff0 / ( x ) + coeff1 / ( x + one ) + + coeff2 / ( x + two ) + coeff3 / ( x + three ) + + coeff4 / ( x + four ) + coeff5 / ( x + five ) ; + double s = x + fourpf; + double t = ( x - half ) * log( s ) - s; + return t + log( stp * ( r + one ) ); + } + + /*************************************************************************** + * L O G F A C T * + * * + * Returns the natural logarithm of n factorial. For efficiency, some * + * of the values are cached, so they need be computed only once. * + * * + ***************************************************************************/ + double LogFact( int n ) + { + static const int Cache_Size = 100; + static double c[ Cache_Size ] = { 0.0 }; // Cache some of the values. + if( n <= 1 ) return 0.0; + if( n < Cache_Size ) + { + if( c[n] == 0.0 ) c[n] = LogGamma((double)(n+1)); + return c[n]; + } + return LogGamma((double)(n+1)); // gamma(n+1) == n! + } + + /*************************************************************************** + * M U L T I N O M I A L C O E F F * + * * + * Returns the multinomial coefficient ( n; X1 X2 ... Xk ) which is * + * defined to be n! / ( X1! X2! ... Xk! ). This is done by computing * + * exp( log(n!) - log(X1!) - log(X2!) - ... - log(Xk!) ). The value of * + * n is obtained by summing the Xi's. * + * * + ***************************************************************************/ + double MultinomialCoeff( int k, int X[] ) + { + int i; + // Find n by summing the coefficients. + + int n = X[0]; + for( i = 1; i < k; i++ ) n += X[i]; + + // Compute log(n!) then subtract log(X!) for each X. + + double LogCoeff = LogFact( n ); + for( i = 0; i < k; i++ ) LogCoeff -= LogFact( X[i] ); + + // Round the exponential of the result to the nearest integer. + + return floor( exp( LogCoeff ) + 0.5 ); + } + + + double MultinomialCoeff( int i, int j, int k ) + { + int n = i + j + k; + double x = LogFact( n ) - LogFact( i ) - LogFact( j ) - LogFact( k ); + return floor( exp( x ) + 0.5 ); + } + + /*************************************************************************** + * B I N O M I A L C O E F F S * + * * + * Generate all n+1 binomial coefficents for a given n. This is done by * + * computing the n'th row of Pascal's triange, starting from the top. * + * No additional storage is required. * + * * + ***************************************************************************/ + void BinomialCoeffs( int n, long *coeff ) + { + coeff[0] = 1; + for( int i = 1; i <= n; i++ ) + { + long a = coeff[0]; + long b = coeff[1]; + for( int j = 1; j < i; j++ ) // Make next row of Pascal's triangle. + { + coeff[j] = a + b; // Overwrite the old row. + a = b; + b = coeff[j+1]; + } + coeff[i] = 1; // The last entry in any row is always 1. + } + } + + void BinomialCoeffs( int n, double *coeff ) + { + coeff[0] = 1.0; + for( int i = 1; i <= n; i++ ) + { + double a = coeff[0]; + double b = coeff[1]; + for( int j = 1; j < i; j++ ) // Make next row of Pascal's triangle. + { + coeff[j] = a + b; // Overwrite the old row. + a = b; + b = coeff[j+1]; + } + coeff[i] = 1.0; // The last entry in any row is always 1. + } + } + + const double *BinomialCoeffs( int n ) + { + static double *coeff[ BinCoeffMax + 1 ] = { 0 }; + if( n > BinCoeffMax || n < 0 ) + { + std::cerr << form( "%d is outside of (0,%d) in BinomialCoeffs", n, BinCoeffMax ); + return NULL; + } + if( coeff[n] == NULL ) // Fill in this entry. + { + double *c = new double[ n + 1 ]; + if( c == NULL ) + { + std::cerr << form( "Could not allocate for BinomialCoeffs(%d)", n ); + return NULL; + } + BinomialCoeffs( n, c ); + coeff[n] = c; + } + return coeff[n]; + } + + /*************************************************************************** + * B I N O M I A L C O E F F * + * * + * Compute a given binomial coefficient. Several rows of Pascal's * + * triangle are stored for efficiently computing the small coefficients. * + * Higher-order terms are computed using LogFact. * + * * + ***************************************************************************/ + double BinomialCoeff( int n, int k ) + { + double b; + int p = n - k; + if( k <= 1 || p <= 1 ) // Check for errors and special cases. + { + if( k == 0 || p == 0 ) return 1; + if( k == 1 || p == 1 ) return n; + std::cerr << form( "BinomialCoeff(%d,%d) is undefined", n, k ); + return 0; + } + static const int // Store part of Pascal's triange for small coeffs. + n0[] = { 1 }, + n1[] = { 1, 1 }, + n2[] = { 1, 2, 1 }, + n3[] = { 1, 3, 3, 1 }, + n4[] = { 1, 4, 6, 4, 1 }, + n5[] = { 1, 5, 10, 10, 5, 1 }, + n6[] = { 1, 6, 15, 20, 15, 6, 1 }, + n7[] = { 1, 7, 21, 35, 35, 21, 7, 1 }, + n8[] = { 1, 8, 28, 56, 70, 56, 28, 8, 1 }, + n9[] = { 1, 9, 36, 84, 126, 126, 84, 36, 9, 1 }; + switch( n ) + { + case 0 : b = n0[k]; break; + case 1 : b = n1[k]; break; + case 2 : b = n2[k]; break; + case 3 : b = n3[k]; break; + case 4 : b = n4[k]; break; + case 5 : b = n5[k]; break; + case 6 : b = n6[k]; break; + case 7 : b = n7[k]; break; + case 8 : b = n8[k]; break; + case 9 : b = n9[k]; break; + default: + { + double x = LogFact( n ) - LogFact( p ) - LogFact( k ); + b = floor( exp( x ) + 0.5 ); + } + } + return b; + } + + + /*************************************************************************** + * L O G D O U B L E F A C T (Log of double factorial) * + * * + * Return log( n!! ) where the double factorial is defined by * + * * + * (2 n + 1)!! = 1 * 3 * 5 * ... * (2n + 1) (Odd integers) * + * * + * (2 n)!! = 2 * 4 * 6 * ... * 2n (Even integers) * + * * + * and is related to the single factorial via * + * * + * (2 n + 1)!! = (2 n + 1)! / ( 2^n n! ) (Odd integers) * + * * + * (2 n)!! = 2^n n! (Even integers) * + * * + ***************************************************************************/ + double LogDoubleFact( int n ) // log( n!! ) + { + int k = n / 2; + double f = LogFact( k ) + k * LogTwo; + if( Odd(n) ) f = LogFact( n ) - f; + return f; + } +}; diff --git a/src/nvtt/bc7/arvo/ArvoMath.h b/src/nvtt/bc7/arvo/ArvoMath.h new file mode 100644 index 0000000..e9edd7e --- /dev/null +++ b/src/nvtt/bc7/arvo/ArvoMath.h @@ -0,0 +1,212 @@ +/*************************************************************************** +* Math.h * +* * +* Convenient constants, macros, and inline functions for basic math * +* functions. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 06/17/93 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1999, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#ifndef __MATH_INCLUDED__ +#define __MATH_INCLUDED__ + +#include +#include + +namespace ArvoMath { + +#ifndef MAXFLOAT +#define MAXFLOAT 1.0E+20 +#endif + + static const double + Pi = 3.14159265358979, + PiSquared = Pi * Pi, + TwoPi = 2.0 * Pi, + FourPi = 4.0 * Pi, + PiOverTwo = Pi / 2.0, + PiOverFour = Pi / 4.0, + OverPi = 1.0 / Pi, + OverTwoPi = 1.0 / TwoPi, + OverFourPi = 1.0 / FourPi, + Infinity = MAXFLOAT, + Tiny = 1.0 / MAXFLOAT, + DegreesToRad = Pi / 180.0, + RadToDegrees = 180.0 / Pi; + + inline int Odd ( int k ) { return k & 1; } + inline int Even ( int k ) { return !(k & 1); } + inline float Abs ( int x ) { return x > 0 ? x : -x; } + inline float Abs ( float x ) { return x > 0. ? x : -x; } + inline float Abs ( double x ) { return x > 0. ? x : -x; } + inline float Min ( float x, float y ) { return x < y ? x : y; } + inline float Max ( float x, float y ) { return x > y ? x : y; } + inline double dMin ( double x, double y ) { return x < y ? x : y; } + inline double dMax ( double x, double y ) { return x > y ? x : y; } + inline float Sqr ( int x ) { return x * x; } + inline float Sqr ( float x ) { return x * x; } + inline float Sqr ( double x ) { return x * x; } + inline float Sqrt ( double x ) { return x > 0. ? sqrt(x) : 0.; } + inline float Cubed ( float x ) { return x * x * x; } + inline int Sign ( float x ) { return x > 0. ? 1 : (x < 0. ? -1 : 0); } + inline void Swap ( float &a, float &b ) { float c = a; a = b; b = c; } + inline void Swap ( int &a, int &b ) { int c = a; a = b; b = c; } + inline double Sin ( double x, int n ) { return pow( sin(x), n ); } + inline double Cos ( double x, int n ) { return pow( cos(x), n ); } + inline float ToSin ( double x ) { return Sqrt( 1.0 - Sqr(x) ); } + inline float ToCos ( double x ) { return Sqrt( 1.0 - Sqr(x) ); } + inline float MaxAbs( float x, float y ) { return Max( Abs(x), Abs(y) ); } + inline float MinAbs( float x, float y ) { return Min( Abs(x), Abs(y) ); } + inline float Pythag( double x, double y ) { return Sqrt( x*x + y*y ); } + + inline double ArcCos( double x ) + { + double y; + if( -1.0 <= x && x <= 1.0 ) y = acos( x ); + else if( x > 1.0 ) y = 0.0; + else if( x < -1.0 ) y = Pi; + return y; + } + + inline double ArcSin( double x ) + { + if( x < -1.0 ) x = -1.0; + if( x > 1.0 ) x = 1.0; + return asin( x ); + } + + inline float Clamp( float min, float &x, float max ) + { + if( x < min ) x = min; else + if( x > max ) x = max; + return x; + } + + inline double Clamp( float min, double &x, float max ) + { + if( x < min ) x = min; else + if( x > max ) x = max; + return x; + } + + inline float Max( float x, float y, float z ) + { + float t; + if( x >= y && x >= z ) t = x; + else if( y >= z ) t = y; + else t = z; + return t; + } + + inline float Min( float x, float y, float z ) + { + float t; + if( x <= y && x <= z ) t = x; + else if( y <= z ) t = y; + else t = z; + return t; + } + + inline float Max( float x, float y, float z, float w ) + { + float t; + if( x >= y && x >= z && x >= w) t = x; + else if( y >= z && y >= w ) t = y; + else if (z >= w) t = z; + else t = w; + return t; + } + + inline float Min( float x, float y, float z, float w ) + { + float t; + if( x <= y && x <= z && x <= w) t = x; + else if( y <= z && y <= w ) t = y; + else if (z <= w) t = z; + else t = w; + return t; + } + + inline double dMax( double x, double y, double z ) + { + double t; + if( x >= y && x >= z ) t = x; + else if( y >= z ) t = y; + else t = z; + return t; + } + + inline double dMin( double x, double y, double z ) + { + double t; + if( x <= y && x <= z ) t = x; + else if( y <= z ) t = y; + else t = z; + return t; + } + + inline float MaxAbs( float x, float y, float z ) + { + return Max( Abs( x ), Abs( y ), Abs( z ) ); + } + + inline float MaxAbs( float x, float y, float z, float w ) + { + return Max( Abs( x ), Abs( y ), Abs( z ), Abs( w ) ); + } + + inline float Pythag( float x, float y, float z ) + { + return sqrt( x * x + y * y + z * z ); + } + + extern float ArcTan ( float x, float y ); + extern float ArcQuad ( float x, float y ); + extern float MachineEpsilon ( ); + extern double LogGamma ( double x ); + extern double LogFact ( int n ); + extern double LogDoubleFact ( int n ); // log( n!! ) + extern double BinomialCoeff ( int n, int k ); + extern void BinomialCoeffs ( int n, long *coeffs ); + extern void BinomialCoeffs ( int n, double *coeffs ); + extern double MultinomialCoeff( int i, int j, int k ); + extern double MultinomialCoeff( int k, int N[] ); + extern double RelErr ( double x, double y ); + +#ifndef ABS +#define ABS( x ) ((x) > 0 ? (x) : -(x)) +#endif + +#ifndef MAX +#define MAX( x, y ) ((x) > (y) ? (x) : (y)) +#endif + +#ifndef MIN +#define MIN( x, y ) ((x) < (y) ? (x) : (y)) +#endif + +}; + +#endif + + + + + + + diff --git a/src/nvtt/bc7/arvo/Char.cpp b/src/nvtt/bc7/arvo/Char.cpp new file mode 100644 index 0000000..cc450a5 --- /dev/null +++ b/src/nvtt/bc7/arvo/Char.cpp @@ -0,0 +1,420 @@ +/*************************************************************************** +* Char.h * +* * +* Convenient constants, macros, and inline functions for manipulation of * +* characters and strings. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 07/01/93 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1999, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#include +#include +#include +#include "Char.h" + +namespace ArvoMath { + + typedef char *charPtr; + + // Treat "str" as a file name, and return just the directory + // portion -- i.e. strip off the name of the leaf object (but + // leave the final "/". + const char *getPath( const char *str, char *buff ) + { + int k; + for( k = strlen( str ) - 1; k >= 0; k-- ) + { + if( str[k] == Slash ) break; + } + for( int i = 0; i <= k; i++ ) buff[i] = str[i]; + buff[k+1] = NullChar; + return buff; + } + + // Treat "str" as a file name, and return just the file name + // portion -- i.e. strip off everything up to and including + // the final "/". + const char *getFile( const char *str, char *buff ) + { + int k; + int len = strlen( str ); + for( k = len - 1; k >= 0; k-- ) + { + if( str[k] == Slash ) break; + } + for( int i = 0; i < len - k; i++ ) buff[i] = str[ i + k + 1 ]; + return buff; + } + + int getPrefix( const char *str, char *buff ) + { + int len = 0; + while( *str != NullChar && *str != Period ) + { + *buff++ = *str++; + len++; + } + *buff = NullChar; + return len; + } + + int getSuffix( const char *str, char *buff ) + { + int n = strlen( str ); + int k = n - 1; + while( k >= 0 && str[k] != Period ) k--; + for( int i = k + 1; i < n; i++ ) *buff++ = str[i]; + *buff = NullChar; + return n - k - 1; + } + + const char* toString( int number, char *buff ) + { + static char local_buff[32]; + char *str = ( buff == NULL ) ? local_buff : buff; + sprintf( str, "%d", number ); + return str; + } + + const char* toString( float number, char *buff ) + { + static char local_buff[32]; + char *str = ( buff == NULL ) ? local_buff : buff; + sprintf( str, "%g", number ); + return str; + } + + int isInteger( const char *str ) + { + int n = strlen( str ); + for( int i = 0; i < n; i++ ) + { + char c = str[i]; + if( isDigit(c) ) continue; + if( c == Plus || c == Minus ) continue; + if( c == Space ) continue; + return 0; + } + return 1; + } + + // Test to see if a string has a given suffix. + int hasSuffix( const char *string, const char *suffix ) + { + if( suffix == NULL ) return 1; // The null suffix always matches. + if( string == NULL ) return 0; // The null string can only have a null suffix. + int m = strlen( string ); + int k = strlen( suffix ); + if( k <= 0 ) return 1; // Empty suffix always matches. + if( m < k + 1 ) return 0; // String is too short to have this suffix. + + // See if the file has the given suffix. + int s = m - k; // Beginning of suffix (if it matches). + for( int i = 0; i < k; i++ ) + if( string[ s + i ] != suffix[ i ] ) return 0; + return s; // Always > 0. + } + + // Test to see if a string has a given prefix. + int hasPrefix( const char *string, const char *prefix ) + { + if( prefix == NULL ) return 1; // The null prefix always matches. + if( string == NULL ) return 0; // The null string can only have a null suffix. + while( *prefix ) + { + if( *prefix++ != *string++ ) return 0; + } + return 1; + } + + // Test to see if the string contains the given character. + int inString( char c, const char *str ) + { + if( str == NULL || str[0] == NullChar ) return 0; + while( *str != '\0' ) + if( *str++ == c ) return 1; + return 0; + } + + int nullString( const char *str ) + { + return str == NULL || str[0] == NullChar; + } + + const char *stripSuffix( const char *string, const char *suffix, char *buff ) + { + static char local_buff[256]; + if( buff == NULL ) buff = local_buff; + buff[0] = NullChar; + if( !hasSuffix( string, suffix ) ) return NULL; + int s = strlen( string ) - strlen( suffix ); + for( int i = 0; i < s; i++ ) + { + buff[i] = string[i]; + } + buff[s] = NullChar; + return buff; + } + + int getIndex( const char *pat, const char *str ) + { + int p_len = strlen( pat ); + int s_len = strlen( str ); + if( p_len == 0 || s_len == 0 ) return -1; + for( int i = 0; i <= s_len - p_len; i++ ) + { + int match = 1; + for( int j = 0; j < p_len; j++ ) + { + if( str[ i + j ] != pat[ j ] ) { match = 0; break; } + } + if( match ) return i; + } + return -1; + } + + int getSubstringAfter( const char *pat, const char *str, char *buff ) + { + int ind = getIndex( pat, str ); + if( ind < 0 ) return -1; + int p_len = strlen( pat ); + int k = 0; + for( int i = ind + p_len; ; i++ ) + { + buff[ k++ ] = str[ i ]; + if( str[ i ] == NullChar ) break; + } + return k; + } + + const char *SubstringAfter( const char *pat, const char *str, char *user_buff ) + { + static char temp[128]; + char *buff = ( user_buff != NULL ) ? user_buff : temp; + int k = getSubstringAfter( pat, str, buff ); + if( k > 0 ) return buff; + return str; + } + + const char *metaString( const char *str, char *user_buff ) + { + static char temp[128]; + char *buff = ( user_buff != NULL ) ? user_buff : temp; + sprintf( buff, "\"%s\"", str ); + return buff; + } + + // This is the opposite of metaString. + const char *stripQuotes( const char *str, char *user_buff ) + { + static char temp[128]; + char *buff = ( user_buff != NULL ) ? user_buff : temp; + char *b = buff; + for(;;) + { + if( *str != DoubleQuote ) *b++ = *str; + if( *str == NullChar ) break; + str++; + } + return buff; + } + + int getIntFlag( const char *flags, const char *flag, int &value ) + { + while( *flags ) + { + if( hasPrefix( flags, flag ) ) + { + int k = strlen( flag ); + if( flags[k] == '=' ) + { + value = atoi( flags + k + 1 ); + return 1; + } + } + flags++; + } + return 0; + } + + int getFloatFlag( const char *flags, const char *flag, float &value ) + { + while( *flags ) + { + if( hasPrefix( flags, flag ) ) + { + int k = strlen( flag ); + if( flags[k] == '=' ) + { + value = atof( flags + k + 1 ); + return 1; + } + } + flags++; + } + return 0; + } + + SortedList::SortedList( sort_type type_, int ascend_ ) + { + type = type_; + ascend = ascend_; + num_elements = 0; + max_elements = 0; + sorted = 1; + list = NULL; + } + + SortedList::~SortedList() + { + Clear(); + delete[] list; + } + + void SortedList::Clear() + { + // Delete all the private copies of the strings and re-initialize the + // list. Reuse the same list, expanding it when necessary. + for( int i = 0; i < num_elements; i++ ) + { + delete list[i]; + list[i] = NULL; + } + num_elements = 0; + sorted = 1; + } + + SortedList &SortedList::operator<<( const char *str ) + { + // Add a new string to the end of the list, expanding the list if necessary. + // Mark the list as unsorted, so that the next reference to an element will + // cause the list to be sorted again. + if( num_elements == max_elements ) Expand(); + list[ num_elements++ ] = strdup( str ); + sorted = 0; + return *this; + } + + const char *SortedList::operator()( int i ) + { + // Return the i'th element of the list. Sort first if necessary. + static char *null = ""; + if( num_elements == 0 || i < 0 || i >= num_elements ) return null; + if( !sorted ) Sort(); + return list[i]; + } + + void SortedList::Expand() + { + // Create a new list of twice the size and copy the old list into it. + // This doubles "max_elements", but leaves "num_elements" unchanged. + if( max_elements == 0 ) max_elements = 1; + max_elements *= 2; + charPtr *new_list = new charPtr[ max_elements ]; + for( int i = 0; i < max_elements; i++ ) + new_list[i] = ( i < num_elements ) ? list[i] : NULL; + delete[] list; + list = new_list; + } + + void SortedList::Swap( int i, int j ) + { + char *temp = list[i]; + list[i] = list[j]; + list[j] = temp; + } + + int SortedList::inOrder( int p, int q ) const + { + int test; + if( type == sort_alphabetic ) + test = ( strcmp( list[p], list[q] ) <= 0 ); + else + { + int len_p = strlen( list[p] ); + int len_q = strlen( list[q] ); + test = ( len_p < len_q ) || + ( len_p == len_q && strcmp( list[p], list[q] ) <= 0 ); + } + if( ascend ) return test; + return !test; + } + + // This is an insertion sort that operates on subsets of the + // input defined by the step length. + void SortedList::InsertionSort( int start, int size, int step ) + { + for( int i = 0; i + step < size; i += step ) + { + for( int j = i; j >= 0; j -= step ) + { + int p = start + j; + int q = p + step; + if( inOrder( p, q ) ) break; + Swap( p, q ); + } + } + } + + // This is a Shell sort. + void SortedList::Sort() + { + for( int step = num_elements / 2; step > 1; step /= 2 ) + for( int start = 0; start < step; start++ ) + InsertionSort( start, num_elements - start, step ); + InsertionSort( 0, num_elements, 1 ); + sorted = 1; + } + + void SortedList::SetOrder( sort_type type_, int ascend_ ) + { + if( type_ != type || ascend_ != ascend ) + { + type = type_; + ascend = ascend_; + sorted = 0; + } + } + + int getstring( std::istream &in, const char *str ) + { + char ch; + if( str == NULL ) return 1; + while( *str != NullChar ) + { + in >> ch; + if( *str != ch ) return 0; + str++; + } + return 1; + } + + std::istream &skipWhite( std::istream &in ) + { + char c; + while( in.get(c) ) + { + if( !isWhite( c ) ) + { + in.putback(c); + break; + } + } + return in; + } +}; diff --git a/src/nvtt/bc7/arvo/Char.h b/src/nvtt/bc7/arvo/Char.h new file mode 100644 index 0000000..2742c1d --- /dev/null +++ b/src/nvtt/bc7/arvo/Char.h @@ -0,0 +1,245 @@ +/*************************************************************************** +* Char.h * +* * +* Convenient constants, macros, and inline functions for manipulation of * +* characters and strings. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 07/01/93 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1999, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#ifndef __CHAR_INCLUDED__ +#define __CHAR_INCLUDED__ + +#include +#include + +namespace ArvoMath { + + static const char + Apostrophe = '\'' , + Asterisk = '*' , + Atsign = '@' , + Backslash = '\\' , + Bell = '\7' , + Colon = ':' , + Comma = ',' , + Dash = '-' , + DoubleQuote = '"' , + EqualSign = '=' , + Exclamation = '!' , + GreaterThan = '>' , + Hash = '#' , + Lbrack = '[' , + Lcurley = '{' , + LessThan = '<' , + Lparen = '(' , + Minus = '-' , + NewLine = '\n' , + NullChar = '\0' , + Percent = '%' , + Period = '.' , + Pound = '#' , + Plus = '+' , + Rbrack = ']' , + Rcurley = '}' , + Rparen = ')' , + Semicolon = ';' , + Space = ' ' , + Slash = '/' , + Star = '*' , + Tab = '\t' , + Tilde = '~' , + Underscore = '_' ; + + inline int isWhite( char c ) { return c == Space || c == NewLine || c == Tab; } + inline int isUcase( char c ) { return 'A' <= c && c <= 'Z'; } + inline int isLcase( char c ) { return 'a' <= c && c <= 'z'; } + inline int isAlpha( char c ) { return isUcase( c ) || isLcase( c ); } + inline int isDigit( char c ) { return '0' <= c && c <= '9'; } + inline char ToLower( char c ) { return isUcase( c ) ? c + ( 'a' - 'A' ) : c; } + inline char ToUpper( char c ) { return isLcase( c ) ? c + ( 'A' - 'a' ) : c; } + + extern const char *getPath( + const char *str, + char *buff + ); + + extern const char *getFile( + const char *str, + char *buff + ); + + extern int getPrefix( + const char *str, + char *buff + ); + + extern int getSuffix( + const char *str, + char *buff + ); + + extern int isInteger( + const char *str + ); + + extern int hasSuffix( + const char *string, + const char *suffix + ); + + extern int hasPrefix( + const char *string, + const char *prefix + ); + + extern int inString( + char c, + const char *str + ); + + extern int nullString( + const char *str + ); + + extern const char *stripSuffix( // Return NULL if unsuccessful. + const char *string, // The string to truncate. + const char *suffix, // The suffix to remove. + char *buff = NULL // Defaults to internal buffer. + ); + + extern const char* toString( + int n, // An integer to convert to a string. + char *buff = NULL // Defauts to internal buffer. + ); + + extern const char* toString( + float x, // A float to convert to a string. + char *buff = NULL // Defauts to internal buffer. + ); + + extern int getIndex( // The index of the start of a pattern in a string. + const char *pat, // The pattern to look for. + const char *str // The string to search. + ); + + extern int getSubstringAfter( + const char *pat, + const char *str, + char *buff + ); + + extern const char *SubstringAfter( + const char *pat, + const char *str, + char *buff = NULL // Defauts to internal buffer. + ); + + extern const char *metaString( + const char *str, // Make this a string within a string. + char *buff = NULL // Defauts to internal buffer. + ); + + extern const char *stripQuotes( + const char *str, // This is the opposite of metaString. + char *buff = NULL // Defauts to internal buffer. + ); + + extern int getIntFlag( + const char *flags, // List of assignment statements. + const char *flag, // A specific flag to look for. + int &value // The variable to assign the value to. + ); + + extern int getFloatFlag( + const char *flags, // List of assignment statements. + const char *flag, // A specific flag to look for. + float &value // The variable to assign the value to. + ); + + extern int getstring( + std::istream &in, + const char *str + ); + + enum sort_type { + sort_alphabetic, // Standard dictionary ordering. + sort_lexicographic // Sort first by length, then alphabetically. + }; + + class SortedList { + + public: + SortedList( sort_type = sort_alphabetic, int ascending = 1 ); + ~SortedList(); + SortedList &operator<<( const char * ); + int Size() const { return num_elements; } + const char *operator()( int i ); + void Clear(); + void SetOrder( sort_type = sort_alphabetic, int ascending = 1 ); + + private: + void Sort(); + void InsertionSort( int start, int size, int step ); + void Swap( int i, int j ); + void Expand(); + int inOrder( int i, int j ) const; + int num_elements; + int max_elements; + int sorted; + int ascend; + sort_type type; + char **list; + }; + + + inline int Match( const char *s, const char *t ) + { + return s != NULL && + (t != NULL && strcmp( s, t ) == 0); + } + + inline int Match( const char *s, const char *t1, const char *t2 ) + { + return s != NULL && ( + (t1 != NULL && strcmp( s, t1 ) == 0) || + (t2 != NULL && strcmp( s, t2 ) == 0) ); + } + + union long_union_float { + long i; + float f; + }; + + inline long float_as_long( float x ) + { + long_union_float u; + u.f = x; + return u.i; + } + + inline float long_as_float( long i ) + { + long_union_float u; + u.i = i; + return u.f; + } + + extern std::istream &skipWhite( std::istream &in ); +}; +#endif diff --git a/src/nvtt/bc7/arvo/Complex.cpp b/src/nvtt/bc7/arvo/Complex.cpp new file mode 100644 index 0000000..468704f --- /dev/null +++ b/src/nvtt/bc7/arvo/Complex.cpp @@ -0,0 +1,76 @@ +/*************************************************************************** +* Complex.C * +* * +* Complex numbers, complex arithmetic, and functions of a complex * +* variable. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 03/02/2000 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 2000, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#include "Complex.h" +#include "form.h" + +namespace ArvoMath { + const Complex Complex::i( 0.0, 1.0 ); + + std::ostream &operator<<( std::ostream &out, const Complex &z ) + { + out << form( "(%f,%f) ", z.Real(), z.Imag() ); + return out; + } + + Complex cos( const Complex &z ) + { + return Complex( + ::cos( z.Real() ) * ::cosh( z.Imag() ), + -::sin( z.Real() ) * ::sinh( z.Imag() ) + ); + } + + Complex sin( const Complex &z ) + { + return Complex( + ::sin( z.Real() ) * ::cosh( z.Imag() ), + ::cos( z.Real() ) * ::sinh( z.Imag() ) + ); + } + + Complex cosh( const Complex &z ) + { + return Complex( + ::cosh( z.Real() ) * ::cos( z.Imag() ), + ::sinh( z.Real() ) * ::sin( z.Imag() ) + ); + } + + Complex sinh( const Complex &z ) + { + return Complex( + ::sinh( z.Real() ) * ::cos( z.Imag() ), + ::cosh( z.Real() ) * ::sin( z.Imag() ) + ); + } + + Complex log( const Complex &z ) + { + float r = ::sqrt( z.Real() * z.Real() + z.Imag() * z.Imag() ); + float t = ::acos( z.Real() / r ); + if( z.Imag() < 0.0 ) t = 2.0 * 3.1415926 - t; + return Complex( ::log(r), t ); + } +}; diff --git a/src/nvtt/bc7/arvo/Complex.h b/src/nvtt/bc7/arvo/Complex.h new file mode 100644 index 0000000..671fd57 --- /dev/null +++ b/src/nvtt/bc7/arvo/Complex.h @@ -0,0 +1,187 @@ +/*************************************************************************** +* Complex.h * +* * +* Complex numbers, complex arithmetic, and functions of a complex * +* variable. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 03/02/2000 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 2000, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#ifndef __COMPLEX_INCLUDED__ +#define __COMPLEX_INCLUDED__ + +#include +#include + +namespace ArvoMath { + + class Complex { + public: + Complex() { x = 0; y = 0; } + Complex( float a ) { x = a; y = 0; } + Complex( float a, float b ) { x = a; y = b; } + Complex( const Complex &z ) { *this = z; } + float &Real() { return x; } + float &Imag() { return y; } + float Real() const { return x; } + float Imag() const { return y; } + inline Complex &operator=( const Complex &z ); + static const Complex i; + private: + float x; + float y; + }; + + inline Complex &Complex::operator=( const Complex &z ) + { + x = z.Real(); + y = z.Imag(); + return *this; + } + + inline float Real( const Complex &z ) + { + return z.Real(); + } + + inline float Imag( const Complex &z ) + { + return z.Imag(); + } + + inline Complex conj( const Complex &z ) + { + return Complex( z.Real(), -z.Imag() ); + } + + inline double modsqr( const Complex &z ) + { + return z.Real() * z.Real() + z.Imag() * z.Imag(); + } + + inline double modulus( const Complex &z ) + { + return sqrt( z.Real() * z.Real() + z.Imag() * z.Imag() ); + } + + inline double arg( const Complex &z ) + { + float t = acos( z.Real() / modulus(z) ); + if( z.Imag() < 0.0 ) t = 2.0 * 3.1415926 - t; + return t; + } + + inline Complex operator*( const Complex &z, float a ) + { + return Complex( a * z.Real(), a * z.Imag() ); + } + + inline Complex operator*( float a, const Complex &z ) + { + return Complex( a * z.Real(), a * z.Imag() ); + } + + inline Complex operator*( const Complex &z, const Complex &w ) + { + return Complex( + z.Real() * w.Real() - z.Imag() * w.Imag(), + z.Real() * w.Imag() + z.Imag() * w.Real() + ); + } + + inline Complex operator+( const Complex &z, const Complex &w ) + { + return Complex( z.Real() + w.Real(), z.Imag() + w.Imag() ); + } + + inline Complex operator-( const Complex &z, const Complex &w ) + { + return Complex( z.Real() - w.Real(), z.Imag() - w.Imag() ); + } + + inline Complex operator-( const Complex &z ) + { + return Complex( -z.Real(), -z.Imag() ); + } + + inline Complex operator/( const Complex &z, float w ) + { + return Complex( z.Real() / w, z.Imag() / w ); + } + + inline Complex operator/( const Complex &z, const Complex &w ) + { + return ( z * conj(w) ) / modsqr(w); + } + + inline Complex operator/( float a, const Complex &w ) + { + return conj(w) * ( a / modsqr(w) ); + } + + inline Complex &operator+=( Complex &z, const Complex &w ) + { + z.Real() += w.Real(); + z.Imag() += w.Imag(); + return z; + } + + inline Complex &operator*=( Complex &z, const Complex &w ) + { + return z = ( z * w ); + } + + inline Complex &operator-=( Complex &z, const Complex &w ) + { + z.Real() -= w.Real(); + z.Imag() -= w.Imag(); + return z; + } + + inline Complex exp( const Complex &z ) + { + float r = ::exp( z.Real() ); + return Complex( r * cos( z.Imag() ), r * sin( z.Imag() ) ); + } + + inline Complex pow( const Complex &z, int n ) + { + float r = ::pow( modulus( z ), (double)n ); + float t = arg( z ); + return Complex( r * cos( n * t ), r * sin( n * t ) ); + } + + inline Complex polar( float r, float theta ) + { + return Complex( r * cos( theta ), r * sin( theta ) ); + } + + + extern Complex cos ( const Complex &z ); + extern Complex sin ( const Complex &z ); + extern Complex cosh( const Complex &z ); + extern Complex sinh( const Complex &z ); + extern Complex log ( const Complex &z ); + + extern std::ostream &operator<<( + std::ostream &out, + const Complex & + ); +}; +#endif + diff --git a/src/nvtt/bc7/arvo/Matrix.cpp b/src/nvtt/bc7/arvo/Matrix.cpp new file mode 100644 index 0000000..d84b7ef --- /dev/null +++ b/src/nvtt/bc7/arvo/Matrix.cpp @@ -0,0 +1,1201 @@ +/*************************************************************************** +* Matrix.C * +* * +* General Vector and Matrix classes, with all the associated methods. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 08/16/2000 Revamped for CIT tools. * +* arvo 10/31/1994 Combined RowVec & ColVec into Vector. * +* arvo 06/30/1993 Added singular value decomposition class. * +* arvo 06/25/1993 Major revisions. * +* arvo 09/08/1991 Initial implementation. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 2000, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#include +#include +#include +#include "ArvoMath.h" +#include "Vector.h" +#include "Matrix.h" +#include "form.h" + +namespace ArvoMath { + const Matrix Matrix::Null(0); + + /*-------------------------------------------------------------------------* + * * + * C O N S T R U C T O R S * + * * + *-------------------------------------------------------------------------*/ + + // Create a new matrix of the given size. If n_cols is zero (the default), + // it is assumed that the matrix is to be square; that is, n_rows x n_rows. + // The matrix is filled with "value", which defaults to zero. + Matrix::Matrix( int n_rows, int n_cols, float value ) + { + assert( n_rows >= 0 && n_cols >= 0 ); + rows = 0; + cols = 0; + elem = NULL; + SetSize( n_rows, n_cols ); + float *e = elem; + for( register int i = 0; i < rows * cols; i++ ) *e++ = value; + } + + // Copy constructor. + Matrix::Matrix( const Matrix &M ) + { + rows = 0; + cols = 0; + elem = NULL; + SetSize( M.Rows(), M.Cols() ); + register float *e = elem; + register float *m = M.Array(); + for( register int i = 0; i < rows * cols; i++ ) *e++ = *m++; + } + + Matrix::~Matrix() + { + SetSize( 0, 0 ); + } + + /*-------------------------------------------------------------------------* + * * + * M I S C E L L A N E O U S M E T H O D S * + * * + *-------------------------------------------------------------------------*/ + + // Re-shape the matrix. If the number of elements in the new matrix is + // different from the original matrix, the original data is deleted and + // replaced with a new array. If new_cols is zero (the default), it is + // assumed to be the same as new_rows -- i.e. a square matrix. + void Matrix::SetSize( int new_rows, int new_cols ) + { + if( new_cols == 0 ) new_cols = new_rows; + int n = new_rows * new_cols; + if( rows * cols != n ) + { + if( elem != NULL ) delete[] elem; + elem = ( n == 0 ) ? NULL : new float[ n ]; + } + rows = new_rows; + cols = new_cols; + } + + Vector Matrix::GetCol( int j ) const + { + Vector C( rows ); + float *e = elem + j; + float *c = C.Array(); + for( int i = 0; i < rows; i++ ) + { + *c++ = *e; + e += cols; + } + return C; + } + + Vector Matrix::GetRow( int i ) const + { + Vector R( cols ); + float *e = elem + ( i * cols ); + float *r = R.Array(); + for( int j = 0; j < cols; j++ ) *r++ = *e++; + return R; + } + + void Matrix::SetCol( int j, const Vector &C ) + { + assert( rows == C.Size() ); + float *e = elem + j; + float *c = C.Array(); + for( int i = 0; i < rows; i++ ) + { + *e = *c++; + e += cols; + } + } + + void Matrix::SetRow( int i, const Vector &R ) + { + assert( cols == R.Size() ); + float *e = elem + ( i * cols ); + float *r = R.Array(); + for( int j = 0; j < cols; j++ ) *e++ = *r++; + } + + Matrix Matrix::GetBlock( int imin, int imax, int jmin, int jmax ) const + { + if( imax < imin || jmax < jmin ) return Matrix(0,0); + Matrix M( imax - imin + 1, jmax - jmin + 1 ); + for( int i = imin; i <= imax; i++ ) + for( int j = jmin; j <= jmax; j++ ) + { + M( i - imin, j - jmin ) = (*this)( i, j ); + } + return M; + } + + void Matrix::SetBlock( int imin, int imax, int jmin, int jmax, const Matrix &B ) + { + int ni = imax - imin + 1; + int nj = jmax - jmin + 1; + assert( ni == B.Rows() ); + assert( nj == B.Cols() ); + int k = imin * cols + jmin; + for( int i = 0; i < ni; i++ ) + for( int j = 0; j < nj; j++ ) + { + elem[ k + i * cols + j ] = B(i,j); + } + } + + void Matrix::SetBlock( int imin, int imax, int jmin, int jmax, const Vector &V ) + { + int k = imin * cols + jmin; + if( imin == imax ) + { + int nj = jmax - jmin + 1; + assert( nj == V.Size() ); + for( int j = 0; j < nj; j++ ) elem[ k + j ] = V(j); + } + else if( jmin == jmax ) + { + int ni = imax - imin + 1; + assert( ni == V.Size() ); + for( int i = 0; i < ni; i++ ) elem[ k + i * cols ] = V(i); + } + else + { + // This assertion will be false, and will signal an error. + assert( imin == imax || jmin == jmax ); + } + } + + Matrix &Matrix::SwapRows( int i1, int i2 ) + { + float temp; + float *r1 = elem + ( i1 * cols ); + float *r2 = elem + ( i2 * cols ); + for( register int j = 0; j < cols; j++ ) + { + temp = *r1; + *r1 = *r2; + *r2 = temp; + r1++; + r2++; + } + return *this; + } + + Matrix &Matrix::SwapCols( int j1, int j2 ) + { + float temp; + float *c1 = elem + j1; + float *c2 = elem + j2; + for( register int i = 0; i < rows; i++ ) + { + temp = *c1; + *c1 = *c2; + *c2 = temp; + c1 += cols; + c2 += cols; + } + return *this; + } + + /*-------------------------------------------------------------------------* + * * + * A S S I G N M E N T O P E R A T O R S * + * * + *-------------------------------------------------------------------------*/ + Matrix& Matrix::operator=( const Matrix &M ) + { + SetSize( M.Rows(), M.Cols() ); + register float *e = elem; + register float *m = M.Array(); + for( register int i = 0; i < rows * cols; i++ ) *e++ = *m++; + return *this; + } + + Matrix& Matrix::operator=( float s ) + { + register float *e = elem; + for( register int i = 0; i < rows * cols; i++ ) *e++ = s; + return *this; + } + + /*-------------------------------------------------------------------------* + * * + * O P E R A T O R S * + * * + *-------------------------------------------------------------------------*/ + Vector operator*( const Matrix &M, const Vector &A ) + { + // Handle the special case with translation built in. + if( M.Cols() == 4 && M.Rows() == 4 && A.Size() == 3 ) + { + Vector C(3); + C(0) = M(0,0) * A(0) + M(0,1) * A(1) + M(0,2) * A(2) + M(0,3); + C(1) = M(1,0) * A(0) + M(1,1) * A(1) + M(1,2) * A(2) + M(1,3); + C(2) = M(2,0) * A(0) + M(2,1) * A(1) + M(2,2) * A(2) + M(2,3); + return C; + } + assert( M.Cols() == A.Size() ); + Vector C( M.Rows() ); + float *m = M.Array(); + for( int i = 0; i < M.Rows(); i++ ) + { + register float *a = A.Array(); + register double sum = (*m++) * (*a++); + for( register int j = 1; j < M.Cols(); j++ ) + sum += (*m++) * (*a++); + C(i) = sum; + } + return C; + } + + Vector operator*( const Vector &A, const Matrix &M ) + { + assert( A.Size() == M.Rows() ); + Vector C( M.Cols() ); + for( register int j = 0; j < M.Cols(); j++ ) + { + register double sum = 0.0; + register float *a = A.Array(); + for( register int i = 0; i < M.Rows(); i++ ) + sum += (*a++) * M(i,j); + C(j) = sum; + } + return C; + } + + Vector& operator*=( Vector &A, const Matrix &M ) + { + // Handle the special case with translation built in. + if( M.Cols() == 4 && M.Rows() == 4 && A.Size() == 3 ) + { + float x = M(0,0) * A(0) + M(0,1) * A(1) + M(0,2) * A(2) + M(0,3); + float y = M(1,0) * A(0) + M(1,1) * A(1) + M(1,2) * A(2) + M(1,3); + float z = M(2,0) * A(0) + M(2,1) * A(1) + M(2,2) * A(2) + M(2,3); + A(0) = x; + A(1) = y; + A(2) = z; + return A; + } + assert( M.Cols() == A.Size() ); + Vector C( M.Rows() ); + float *m = M.Array(); + for( register int i = 0; i < M.Rows(); i++ ) + { + double sum = 0.0; + for( register int j = 0; j < A.Size(); j++ ) + sum += (*m++) * A(j); + C(i) = sum; + } + return A = C; + } + + Matrix& operator*=( Matrix &M, float s ) + { + register float *m = M.Array(); + for( register int i = 0; i < M.Rows() * M.Cols(); i++ ) *m++ *= s; + return M; + } + + Matrix& operator/=( Matrix &M, float s ) + { + assert( s != 0.0 ); + register float *m = M.Array(); + for( register int i = 0; i < M.Rows() * M.Cols(); i++ ) *m++ /= s; + return M; + } + + Matrix operator+( const Matrix &A, const Matrix &B ) + { + assert( A.Rows() == B.Rows() ); + assert( A.Cols() == B.Cols() ); + Matrix C( A.Rows(), A.Cols() ); + register float *a = A.Array(); + register float *b = B.Array(); + register float *c = C.Array(); + for( register int i = 0; i < A.Rows() * A.Cols(); i++ ) (*c++) = (*a++) + (*b++); + return C; + } + + Matrix operator-( const Matrix &A, const Matrix &B ) + { + assert( A.Rows() == B.Rows() ); + assert( A.Cols() == B.Cols() ); + Matrix C( A.Rows(), A.Cols() ); + register float *a = A.Array(); + register float *b = B.Array(); + register float *c = C.Array(); + for( register int i = 0; i < A.Rows() * A.Cols(); i++ ) (*c++) = (*a++) - (*b++); + return C; + } + + Matrix operator-( const Matrix &A ) + { + Matrix B( A.Cols(), A.Rows() ); + register float *a = A.Array(); + register float *b = B.Array(); + for( register int i = 0; i < A.Rows() * A.Cols(); i++ ) + { + *b++ = -(*a++); + } + return B; + } + + Matrix& operator+=( Matrix &A, const Matrix &B ) + { + assert( A.Rows() == B.Rows() ); + assert( A.Cols() == B.Cols() ); + register float *a = A.Array(); + register float *b = B.Array(); + for( register int i = 0; i < A.Rows() * A.Cols(); i++ ) (*a++) += (*b++); + return A; + } + + Matrix operator*( const Matrix &A, const Matrix &B ) + { + assert( A.Cols() == B.Rows() ); + Matrix M( A.Rows(), B.Cols() ); + for( register int i = 0; i < A.Rows(); i++ ) + for( register int j = 0; j < B.Cols(); j++ ) + { + double sum = 0.0; + for( register int k = 0; k < A.Cols(); k++ ) sum += A(i,k) * B(k,j); + M(i,j) = sum; + } + return M; + } + + Matrix operator*( float s, const Matrix &A ) + { + Matrix B( A.Cols(), A.Rows() ); + register float *a = A.Array(); + register float *b = B.Array(); + for( register int i = 0; i < A.Rows() * A.Cols(); i++ ) + { + *b++ = s * (*a++); + } + return B; + } + + Matrix operator*( const Matrix &A, float s ) + { + Matrix B( A.Cols(), A.Rows() ); + register float *a = A.Array(); + register float *b = B.Array(); + for( register int i = 0; i < A.Rows() * A.Cols(); i++ ) + { + *b++ = s * (*a++); + } + return B; + } + + Matrix operator/( const Matrix &A, float s ) + { + assert( s != 0.0 ); + Matrix B( A.Cols(), A.Rows() ); + register float *a = A.Array(); + register float *b = B.Array(); + for( register int i = 0; i < A.Rows() * A.Cols(); i++ ) + { + *b++ = (*a++) / s; + } + return B; + } + + Matrix& operator*=( Matrix &A, const Matrix &B ) + { + assert( A.Cols() == B.Rows() ); + Vector R( B.Cols() ); + for( register int i = 0; i < A.Rows(); i++ ) + { + for( register int j = 0; j < B.Cols(); j++ ) // Compute the ith row of A * B. + { + double sum = A(i,0) * B(0,j); + for( register int k = 1; k < A.Cols(); k++ ) sum += A(i,k) * B(k,j); + R(j) = sum; + } + // Copy the new i'th row back into A. + for( register int k = 0; k < A.Cols(); k++ ) A(i,k) = R(k); + } + return A; + } + + /*-------------------------------------------------------------------------* + * * + * M I S C E L L A N E O U S F U N C T I O N S * + * * + *-------------------------------------------------------------------------*/ + Matrix Transp( const Matrix &M ) + { + Matrix T( M.Cols(), M.Rows() ); + register float *m = M.Array(); + for( register int i = 0; i < M.Rows(); i++ ) + for( register int j = 0; j < M.Cols(); j++ ) T(j,i) = *m++; + return T; + } + + // Computes A * Transp(A). + Matrix AATransp( const Matrix &A ) + { + int n = A.Rows(); + Matrix B( n, n ); + for( register int i = 0; i < n; i++ ) + for( register int j = 0; j < n; j++ ) + { + double sum = 0.0; + for( register int k = 0; k < A.Cols(); k++ ) + sum += A(i,k) * A(j,k); + B(i,j) = sum; + } + return B; + } + + // Computes Transp(A) * A. + Matrix ATranspA( const Matrix &A ) + { + int n = A.Cols(); + Matrix B( n, n ); + for( register int i = 0; i < n; i++ ) + for( register int j = 0; j < n; j++ ) + { + double sum = 0.0; + for( register int k = 0; k < A.Rows(); k++ ) + sum += A(k,i) * A(k,j); + B(i,j) = sum; + } + return B; + } + + // Computes the outer product of the vectors A and B. + Matrix Outer( const Vector &A, const Vector &B ) + { + Matrix M( A.Size(), B.Size() ); + for( register int i = 0; i < A.Size(); i++ ) + { + float c = A(i); + for( register int j = 0; j < B.Size(); j++ ) M(i,j) = c * B(j); + } + return M; + } + + // Computes the L1-norm of the matrix A, which is the maximum absolute + // row sum. + double OneNorm( const Matrix &A ) + { + double norm = 0.0; + for( register int i = 0; i < A.Rows(); i++ ) + { + double sum = 0.0; + for( register int j = 0; j < A.Cols(); j++ ) sum += Abs( A(i,j) ); + if( sum > norm ) norm = sum; + } + return norm; + } + + // Computes the L-infinity norm of the matrix A, which is the maximum + // absolute column sum. + double SupNorm( const Matrix &A ) + { + double norm = 0.0; + for( register int j = 0; j < A.Cols(); j++ ) + { + double sum = 0.0; + for( register int i = 0; i < A.Rows(); i++ ) sum += Abs( A(i,j) ); + if( sum > norm ) norm = sum; + } + return norm; + } + + // Returns the square matrix with the elements of the vector d along + // its diagonal. + Matrix Diag( const Vector &d ) + { + Matrix D( d.Size() ); + for( register int i = 0; i < d.Size(); i++ ) D(i,i) = d(i); + return D; + } + + // Returns the 3 x 3 diagonal matrix with x, y, and z as its diagonal + // elements. + Matrix Diag( float x, float y, float z ) + { + Matrix D(3,3); + D(0,0) = x; + D(1,1) = y; + D(2,2) = z; + return D; + } + + // Returns the vector consisting of the diagonal elements of the + // matrix M, which need not be square. + Vector Diag( const Matrix &M ) + { + int m = Min( M.Rows(), M.Cols() ); + Vector V(m); + for( register int i = 0; i < m; i++ ) V(i) = M(i,i); + return V; + } + + // Returns the n x n identity matrix. + Matrix Ident( int n ) + { + Matrix I( n ); + for( register int i = 0; i < n; i++ ) I(i,i) = 1.0; + return I; + } + + // Determines whether the matrix M is "Null" -- i.e. has zero rows + // or columns. + int Null( const Matrix &M ) + { + return M.Rows() == 0 || M.Cols() == 0; + } + + int Square( const Matrix &M ) + { + return M.Rows() == M.Cols(); + } + + // Convert a "vector-shaped" matrix to a vector. That is, represent a + // matrix with a single row or a single column as a vector. + Vector ToVector( const Matrix &M ) + { + if( M.Rows() == 1 ) + { + Vector V( M.Cols() ); + for( int j = 0; j < M.Cols(); j++ ) V(j) = M(0,j); + return V; + } + else if( M.Cols() == 1 ) + { + Vector V( M.Rows() ); + for( int i = 0; i < M.Rows(); i++ ) V(i) = M(i,0); + return V; + } + else + { + // Report an error. + assert( M.Rows() == 1 || M.Cols() == 1 ); + } + return Vector(); + } + + std::ostream &operator<<( std::ostream &out, const Matrix &M ) + { + if( M.Rows() == 0 || M.Cols() == 0 ) + { + out << "NULL" << std::endl; + } + else for( register int i = 0; i < M.Rows(); i++ ) + { + out << form( "%3d: ", i ); + for( register int j = 0; j < M.Cols(); j++ ) + out << form( " %10.5g", M(i,j) ); + out << std::endl; + } + return out; + } + + /*-------------------------------------------------------------------------* + * R O T A T I O N * + * * + * Builds a 3x3 modeling matrix that performs a rotation about an * + * arbitrary axis. The rotation is right-handed about this axis and * + * "angle" is taken to be in radians. The only error that can occur is * + * when "axis" is the zero-vector. * + * * + *-------------------------------------------------------------------------*/ + Matrix Rotation( const Vector &Axis, float angle ) + { + // Compute a unit quaternion (a,b,c,d) that performs the rotation. + + float t = TwoNormSqr( Axis ); + if( t == 0.0 ) return Matrix(3,3); + t = sin( angle * 0.5 ) / sqrt( t ); + + // Fill in the entries of the quaternion. + + float a = cos( angle * 0.5 ); + float b = t * Axis(0); + float c = t * Axis(1); + float d = t * Axis(2); + + // Compute all the double products of a, b, c, and d, except a * a. + + float bb = b * b; + float cc = c * c; + float dd = d * d; + float ab = a * b; + float ac = a * c; + float ad = a * d; + float bc = b * c; + float bd = b * d; + float cd = c * d; + + // Fill in the entries of the rotation matrix. + + Matrix R(3,3); + + R(0,0) = 1.0 - 2.0 * ( cc + dd ); + R(0,1) = 2.0 * ( bc + ad ); + R(0,2) = 2.0 * ( bd - ac ); + + R(1,0) = 2.0 * ( bc - ad ); + R(1,1) = 1.0 - 2.0 * ( bb + dd ); + R(1,2) = 2.0 * ( cd + ab ); + + R(2,0) = 2.0 * ( bd + ac ); + R(2,1) = 2.0 * ( cd - ab ); + R(2,2) = 1.0 - 2.0 * ( bb + cc ); + + return R; + } + + /*-------------------------------------------------------------------------* + * R O T A T I O N * + * * + * Builds a 4x4 modeling matrix that performs a rotation about an * + * arbitrary axis through an arbitrary point. The rotation is * + * right-handed about this axis and "angle" is taken to be in radians. * + * * + *-------------------------------------------------------------------------*/ + Matrix Rotation( const Vector &Axis, const Vector &Origin, float angle ) + { + Matrix R = Rotation( Axis, angle ); // A simple 3x3 rotation. + Matrix M = Ident(4); // A 4x4 including translation. + + // Compute the last row of the matrix (the translation) using the + // 3x3 rotation matrix. We need to compute the last row of the 4x4 + // matrix that performs Translate( -Origin ) * Rotate * Translate( Origin ). + // + // | I p | | R 0 | | I -p | | R p - Rp | + // | | | | | | = | | + // | 0 1 | | 0 1 | | 0 1 | | 0 1 | + // + // So, the desired column is p - R p. + + Vector V( Origin - R * Origin ); + for( int i = 0; i < 3; i++ ) + { + M(i,3) = V(i); + for( int j = 0; j < 3; j++ ) + M(i,j) = R(i,j); + } + return M; + } + + /*-------------------------------------------------------------------------* + * X R O T A T I O N * + * * + * Builds a 3x3 modeling matrix that performs a rotation about the X-axis. * + * * + *-------------------------------------------------------------------------*/ + Matrix Xrotation( float angle ) + { + Matrix M = Ident(3); + float c = cos( angle ); + float s = sin( angle ); + M(1,1) = c; M(1,2) = -s; + M(2,1) = s; M(2,2) = c; + return M; + } + + /*-------------------------------------------------------------------------* + * Y R O T A T I O N * + * * + * Builds a 3x3 modeling matrix that performs a rotation about the Y-axis. * + * * + *-------------------------------------------------------------------------*/ + Matrix Yrotation( float angle ) + { + Matrix M = Ident(3); + float c = cos( angle ); + float s = sin( angle ); + M(0,0) = c; M(0,2) = -s; + M(2,0) = s; M(2,2) = c; + return M; + } + + /*-------------------------------------------------------------------------* + * Z R O T A T I O N * + * * + * Builds a 3x3 modeling matrix that performs a rotation about the Z-axis. * + * * + *-------------------------------------------------------------------------*/ + Matrix Zrotation( float angle ) + { + Matrix M = Ident(3); + float c = cos( angle ); + float s = sin( angle ); + M(0,0) = c; M(0,1) = -s; + M(1,0) = s; M(1,1) = c; + return M; + } + + /*-------------------------------------------------------------------------* + * H O U S E H O L D E R * + * * + * Returns the Householder reflection matrix that reflects through the * + * plane orthogonal to V. The vector V is not assumed to be normalized. * + * * + *-------------------------------------------------------------------------*/ + Matrix Householder( const Vector &V ) + { + Matrix I = Ident( V.Size() ); + float c = 2.0 / ( V * V ); + return I - Outer( c * V, V ); + } + + /*=========================================================================* + * R O T A T I O N Author: Jim Arvo, 1991 * + * * + * This routine maps three values (x1, x2, x3) in the range [0,1] into * + * a 3x3 rotation matrix, M. Uniformly distributed random variables * + * x1, x2, and x3 create uniformly distributed random rotation matrices. * + * To create small uniformly distributed "perturbations", supply * + * samples in the following ranges * + * * + * x1 in [ 0, d ] * + * x2 in [ 0, 1 ] * + * x3 in [ 0, d ] * + * * + * where 0 < d < 1 controls the size of the perturbation. Any of the * + * random variables may be stratified (or "jittered") for a slightly more * + * even distribution. * + * * + *=========================================================================*/ + Matrix Rotation( float x1, float x2, float x3 ) + { + Matrix M(3,3); + float theta = x1 * TwoPi; // Rotation about the pole (Z). + float phi = x2 * TwoPi; // For direction of pole deflection. + float z = x3 * 2.0; // For magnitude of pole deflection. + + // Compute a vector V used for distributing points over the sphere + // via the reflection I - V Transpose(V). This formulation of V + // will guarantee that if x1 and x2 are uniformly distributed, + // the reflected points will be uniform on the sphere. Note that V + // has length sqrt(2) to eliminate the 2 in the Householder matrix. + + float r = sqrt( z ); + float Vx = sin( phi ) * r; + float Vy = cos( phi ) * r; + float Vz = sqrt( 2.0 - z ); + + // Compute the row vector S = Transpose(V) * R, where R is a simple + // rotation by theta about the z-axis. No need to compute Sz since + // it's just Vz. + + float st = sin( theta ); + float ct = cos( theta ); + float Sx = Vx * ct - Vy * st; + float Sy = Vx * st + Vy * ct; + + // Construct the rotation matrix ( V Transpose(V) - I ) R, which + // is equivalent to V S - R. + + M(0,0) = Vx * Sx - ct; + M(0,1) = Vx * Sy - st; + M(0,2) = Vx * Vz; + + M(1,0) = Vy * Sx + st; + M(1,1) = Vy * Sy - ct; + M(1,2) = Vy * Vz; + + M(2,0) = Vz * Sx; + M(2,1) = Vz * Sy; + M(2,2) = 1.0 - z; // This equals Vz * Vz - 1.0 + + return M; + } + + /*-------------------------------------------------------------------------* + * P A R T I A L P I V O T * + * * + * Look for the element with the largest magnitude on or below the * + * diagonal in column "col" of the matrix A. Bring this element to the * + * diagonal by a row interchange. Perform the same row interchange on b. * + * * + *-------------------------------------------------------------------------*/ + static int PartialPivot( int col, Matrix &A, Vector &b ) + { + int n = A.Cols(); + float a_max = Abs( A( col, col ) ); + int i_max = col; + for( int i = col + 1; i < n; i++ ) + { + float temp = Abs( A( i, col ) ); + if( temp > a_max ) + { + a_max = temp; + i_max = i; + } + } + if( a_max == 0.0 ) return 0; + if( i_max != col ) + { + A.SwapRows( col, i_max ); + b.Swap ( col, i_max ); + } + return 1; + } + + /*-------------------------------------------------------------------------* + * G A U S S I A N E L I M I N A T I O N * + * * + * Solves the linear system A x = b using Gaussian elimination, with or * + * without partial pivoting. * + * * + *-------------------------------------------------------------------------*/ + int GaussElimination( const Matrix &A, const Vector &b, Vector &x, pivot_type pivot ) + { + assert( Square( A ) ); + assert( A.Rows() == b.Size() ); + Matrix B( A ); + Vector c( b ); + x.SetSize( A.Cols() ); + int m = B.Rows(); + register int i, j, k; + + // Perform Gaussian elimination on the copies, B and c. + + for( i = 0; i < m; i++ ) + { + if( pivot == pivot_partial ) PartialPivot( i, B, c ); + + for( j = i + 1; j < m; j++ ) + { + double scale = -B(j,i) / B(i,i); + for( k = i; k < m; k++ ) + B(j,k) += scale * B(i,k); + B(j,i) = 0.0; + c(j) += scale * c(i); + } + } + + // Now solve by back substitution. + + for( i = m - 1; i >= 0; i-- ) + { + double a = 0.0; + for( j = i + 1; j < m; j++ ) a += B(i,j) * x(j); + x(i) = ( c(i) - a ) / B(i,i); + } + + return 1; + } + + /*-------------------------------------------------------------------------* + * L E A S T S Q U A R E S * + * * + * Solves the normal equations associated with the system A x = b, which * + * are given by Transp(A) A x = Transp(A) b. * + * * + *-------------------------------------------------------------------------*/ + int LeastSquares( const Matrix &A, const Vector &b, Vector &x ) + { + // + // Set up and solve the normal equations Transp(A) A x = Transp(A) b. + // Note that Transp(A) * b is computed here as b * A. + // + GaussElimination( ATranspA(A), b * A, x ); + return 1; + } + + /*-------------------------------------------------------------------------* + * D E T E R M I N A N T * + * * + * Computes the determinant of the n by n matrix M using Householder * + * transformations. * + * * + *-------------------------------------------------------------------------*/ + double Determinant( const Matrix &M ) + { + static const float MachEps = MachineEpsilon(); + assert( Square(M) ); + + double dot; + int k; + Matrix A = M; // Make a copy that we can destroy. + double det = 1.0; // Multiply diagonal elements as they are generated. + int sign = 1; // Keep track of sign (each reflection has det -1). + int n = M.Cols(); + + for( int i = 0; i < n - 1; i++ ) + { + // Compute the 2-norm of the first column of the (n-i)x(n-i) submatrix. + + dot = 0.0; + for( k = i; k < n; k++ ) dot += Sqr( A(k,i) ); + + double Xnorm = sqrt( dot ); + if( Xnorm == 0.0 ) return 0.0; + + // This norm is another diagonal element of the upper triangular + // matrix, so we multiply it into the running product for det. + + det *= Xnorm; + + // If X is already of the right form we must not perform the + // processing because V will be zero. + + float x1 = Abs( A(i,i) ); + float diff = Abs( Xnorm - x1 ); + if( diff < MachEps * Max( Xnorm, x1 ) ) continue; // This column is okay as is. + + // Each Householder transformation has a determinant of -1, + // so we must keep track of how many we apply. + + sign *= -1; + + // Compute the V vector, which will define the Householder + // transformation via H = I - V transp(V). Leave it in the + // i'th column of A. V = sqrt(2) * Normalized( X - ( Xnorm, 0, 0,... ) ). + + float scale = 1.0 / sqrt( Xnorm * Abs( A(i,i) - Xnorm ) ); // sqrt(2) / || p || + A(i,i) = ( A(i,i) - Xnorm ) * scale; + for( k = i + 1; k < n; k++ ) A(k,i) *= scale; + + // Now apply the transformation I - V Transp(V) to all the remaining columns, + // except for the first row. + + for( int j = i + 1; j < n; j++ ) + { + // Compute Y dot V. + + dot = 0.0; + for( k = i; k < n; k++ ) dot += A(k,i) * A(k,j); + + // Subtract V ( V dot A(*,j) ) from A(*,j), ignoring the first row. + + for( k = i + 1; k < n; k++ ) A(k,j) -= A(k,i) * dot; + + } // for j + + } // for i + + // Now multiply in the very last element of the matrix and + // the accumulated sign. + + return det * A(n-1,n-1) * sign; + } + + /*-------------------------------------------------------------------------* + * C O F A C T O R * + * * + * Computes the (i,j) cofactor of the n by n matrix M. * + * * + *-------------------------------------------------------------------------*/ + double Matrix::Cofactor( int omit_i, int omit_j ) const + { + assert( Square( *this ) ); + assert( omit_i >= 0 && omit_j >= 0 ); + assert( omit_i < Rows() ); + assert( omit_j < Cols() ); + + // Create a new matrix that is smaller by one in both dimensions and + // copy the old matrix into it, omitting the specified row and column. + + Matrix A( Rows() - 1, Cols() - 1 ); + for( int i = 0; i < Rows() - 1; i++ ) + { + int ii = ( i < omit_i ) ? i : i + 1; + for( int j = 0; j < Cols() - 1; j++ ) + { + int jj = ( j < omit_j ) ? j : j + 1; + A( i, j ) = (*this)(ii,jj); + } + } + + // Return the determinant of the smaller matrix. + + return Determinant( A ); + } + + /*-------------------------------------------------------------------------* + * A D J O I N T * + * * + * Computes the adjoint of a matrix. * + * * + *-------------------------------------------------------------------------*/ + Matrix Adjoint( const Matrix &M ) + { + double det; + return Adjoint( M, det ); // Discard the determinant. + } + + Matrix Adjoint( const Matrix &M, double &det ) + { + int n = M.Rows(); + det = 0.0; + Matrix A( n, n ); + assert( Square(M) ); + if( n == 3 ) + { + A(0,0) = M(1,1) * M(2,2) - M(1,2) * M(2,1); + A(0,1) = M(1,2) * M(2,0) - M(1,0) * M(2,2); + A(0,2) = M(1,0) * M(2,1) - M(1,1) * M(2,0); + + A(1,0) = M(0,2) * M(2,1) - M(0,1) * M(2,2); + A(1,1) = M(0,0) * M(2,2) - M(0,2) * M(2,0); + A(1,2) = M(0,1) * M(2,0) - M(0,0) * M(2,1); + + A(2,0) = M(0,1) * M(1,2) - M(0,2) * M(1,1); + A(2,1) = M(0,2) * M(1,0) - M(0,0) * M(1,2); + A(2,2) = M(0,0) * M(1,1) - M(0,1) * M(1,0); + + det = A(0,0) * M(0,0) + A(1,0) * M(1,0) + A(2,0) * M(2,0); + } + else + { + for( register int i = 0; i < n; i++ ) + { + for( register int j = 0; j < n; j++ ) + { + if( Odd( i + j ) ) + A(i,j) = -M.Cofactor(i,j); + else A(i,j) = M.Cofactor(i,j); + } + det += M(i,0) * A(i,0); + } + } + return A; + } + + /*-------------------------------------------------------------------------* + * I N V E R S E * + * * + * Computes the inverse of a square matrix. * + * * + *-------------------------------------------------------------------------*/ + Matrix Inverse( const Matrix &M ) + { + assert( Square( M ) ); + int n = M.Cols(); + Matrix Inv( n, n ); + Vector b( n ), x( n ); + + for( int i = 0; i < n; i++ ) + { + if( i > 0 ) b( i - 1 ) = 0.0; + b(i) = 1.0; + GaussElimination( M, b, x ); + Inv.SetCol( i, x ); + } + return Inv; + } + + /*-------------------------------------------------------------------------* + * T R A C E * + * * + * Computes the trace of a square matrix. * + * * + *-------------------------------------------------------------------------*/ + extern double Trace( const Matrix &M ) + { + assert( Square(M) ); + double trace = M(0,0); + for( int i = 1; i < M.Cols(); i++ ) trace += M(i,i); + return trace; + } +}; + + + +/* + +C +C Subroutine GAUSS solves the the system Ax = b by using Gaussian elimination. +C + +SUBROUTINE GAUSS( A, B, X, LDA, N, IFLAG ) +REAL A( LDA, N ), B( N ), X( N ) + +DO 300 I = 1 , N - 1 +I2 = I +CALL PIVOT( A, B, LDA, N, I2, IFLAG ) +IF ( IFLAG .LT. 0 ) RETURN +DO 200 J = I + 1 , N +TEMP = A( J , I ) / A( I , I ) +A( J , I ) = 0.0 +B( J ) = B( J ) - TEMP * B( I ) +DO 100 K = I + 1 , N +A( J , K ) = A( J , K ) - TEMP * A( I , K ) +100 CONTINUE +200 CONTINUE +300 CONTINUE + +X( N ) = B( N ) / A( N , N ) +DO 500 I = N - 1 , 1 , -1 +TEMP = 0.0 +DO 400 J = I + 1 , N +TEMP = TEMP + A( I , J ) * X( J ) +400 CONTINUE +X( I ) = ( B( I ) - TEMP ) / A( I , I ) +500 CONTINUE + +RETURN +END + + + +SUBROUTINE PIVOT( A, B, LDA, N, J, IFLAG ) +REAL A( LDA, N ), B( N ), AMAX, TEMP +DATA TOL / 1.0E-6 / + +IFLAG = -1 +IF ( J .GT. N ) RETURN +IF ( J .EQ. N .AND. ABS( A(N,N) ) .LT. TOL ) RETURN +IF ( J .EQ. N ) GO TO 40 + +AMAX = ABS( A( J , J ) ) +INDEX = J +10 DO 20 I = J + 1 , N +IF ( ABS( A( I , J ) ) .LE. AMAX ) GO TO 20 +AMAX = ABS( A( I , J ) ) +INDEX = I +20 CONTINUE + +IF ( AMAX .LT. TOL ) RETURN + +TEMP = B( J ) +B( J ) = B( INDEX ) +B( INDEX ) = TEMP + +DO 30 K = 1 , N +TEMP = A( J , K ) +A( J , K ) = A( INDEX , K ) +A( INDEX , K ) = TEMP +30 CONTINUE + +40 IFLAG = 1 +RETURN +END + + +*/ + + + + + diff --git a/src/nvtt/bc7/arvo/Matrix.h b/src/nvtt/bc7/arvo/Matrix.h new file mode 100644 index 0000000..1832c8f --- /dev/null +++ b/src/nvtt/bc7/arvo/Matrix.h @@ -0,0 +1,142 @@ +/*************************************************************************** +* Matrix.h * +* * +* General Vector and Matrix classes, with all the associated methods. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 08/16/2000 Revamped for CIT tools. * +* arvo 10/31/1994 Combined RowVec & ColVec into Vector. * +* arvo 06/30/1993 Added singular value decomposition class. * +* arvo 06/25/1993 Major revisions. * +* arvo 09/08/1991 Initial implementation. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 2000, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#ifndef __MATRIX_INCLUDED__ +#define __MATRIX_INCLUDED__ + +#include +#include "Vector.h" + +namespace ArvoMath { + + class Matrix { + public: + Matrix( const Matrix & ); + Matrix( int num_rows = 0, int num_cols = 0, float value = 0.0 ); + ~Matrix(); + Matrix &operator=( const Matrix &M ); + Matrix &operator=( float s ); + Vector GetCol( int col ) const; + Vector GetRow( int row ) const; + void SetCol( int col, const Vector & ); + void SetRow( int row, const Vector & ); + Matrix GetBlock( int imin, int imax, int jmin, int jmax ) const; + void SetBlock( int imin, int imax, int jmin, int jmax, const Matrix & ); + void SetBlock( int imin, int imax, int jmin, int jmax, const Vector & ); + Matrix &SwapRows( int i1, int i2 ); + Matrix &SwapCols( int j1, int j2 ); + void SetSize( int rows, int cols = 0 ); + double Cofactor( int i, int j ) const; + static const Matrix Null; + + public: // Inlined functions. + inline float operator()( int i, int j ) const { return elem[ i * cols + j ]; } + inline float &operator()( int i, int j ) { return elem[ i * cols + j ]; } + inline int Rows () const { return rows; } + inline int Cols () const { return cols; } + inline float *Array () const { return elem; } + + private: + int rows; // Number of rows in the matrix. + int cols; // Number of columns in the matrix. + float *elem; // Pointer to the actual data. + }; + + + extern Vector operator * ( const Matrix &, const Vector & ); + extern Vector operator * ( const Vector &, const Matrix & ); + extern Vector& operator *= ( Vector &, const Matrix & ); + extern Matrix Outer ( const Vector &, const Vector & ); // Outer product. + extern Matrix operator + ( const Matrix &, const Matrix & ); + extern Matrix operator - ( const Matrix & ); + extern Matrix operator - ( const Matrix &, const Matrix & ); + extern Matrix operator * ( const Matrix &, const Matrix & ); + extern Matrix operator * ( const Matrix &, float ); + extern Matrix operator * ( float , const Matrix & ); + extern Matrix operator / ( const Matrix &, float ); + extern Matrix& operator += ( Matrix &, const Matrix & ); + extern Matrix& operator *= ( Matrix &, float ); + extern Matrix& operator *= ( Matrix &, const Matrix & ); + extern Matrix& operator /= ( Matrix &, float ); + extern Matrix Ident ( int n ); + extern Matrix Householder ( const Vector & ); + extern Matrix Rotation ( const Vector &Axis, float angle ); + extern Matrix Rotation ( const Vector &Axis, const Vector &Origin, float angle ); + extern Matrix Rotation ( float, float, float ); // For random 3D rotations. + extern Matrix Xrotation ( float ); + extern Matrix Yrotation ( float ); + extern Matrix Zrotation ( float ); + extern Matrix Diag ( const Vector & ); + extern Vector Diag ( const Matrix & ); + extern Matrix Diag ( float, float, float ); + extern Matrix Adjoint ( const Matrix & ); + extern Matrix Adjoint ( const Matrix &, double &det ); + extern Matrix AATransp ( const Matrix & ); + extern Matrix ATranspA ( const Matrix & ); + extern double OneNorm ( const Matrix & ); + extern double SupNorm ( const Matrix & ); + extern double Determinant ( const Matrix & ); + extern double Trace ( const Matrix & ); + extern Matrix Transp ( const Matrix & ); + extern Matrix Inverse ( const Matrix & ); + extern int Null ( const Matrix & ); + extern int Square ( const Matrix & ); + extern Vector ToVector ( const Matrix & ); // Only for vector-shaped matrices. + + enum pivot_type { + pivot_off, + pivot_partial, + pivot_total + }; + + extern int GaussElimination( + const Matrix &A, + const Vector &b, // This is the right-hand side. + Vector &x, // This is the matrix we are solving for. + pivot_type = pivot_off + ); + + extern int LeastSquares( + const Matrix &A, + const Vector &b, + Vector &x + ); + + extern int WeightedLeastSquares( + const Matrix &A, + const Vector &b, + const Vector &w, + Vector &x + ); + + std::ostream &operator<<( + std::ostream &out, + const Matrix & + ); +}; + +#endif diff --git a/src/nvtt/bc7/arvo/Perm.cpp b/src/nvtt/bc7/arvo/Perm.cpp new file mode 100644 index 0000000..87e98e3 --- /dev/null +++ b/src/nvtt/bc7/arvo/Perm.cpp @@ -0,0 +1,503 @@ +/*************************************************************************** +* Perm.C * +* * +* This file defines permutation class: that is, a class for creating and * +* manipulating finite sequences of distinct integers. The main feature * +* of the class is the "++" operator that can be used to step through all * +* N! permutations of a sequence of N integers. As the set of permutations * +* forms a multiplicative group, a multiplication operator and an * +* exponentiation operator are also defined. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 07/01/93 Added the Partition class. * +* arvo 03/23/93 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1999, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#include +#include +#include "Perm.h" +#include "ArvoMath.h" +#include "Char.h" + +namespace ArvoMath { + + /*************************************************************************** + * + * L O C A L F U N C T I O N S + * + ***************************************************************************/ + + static void Reverse( int *p, int n ) + { + int k = n >> 1; + int m = n - 1; + for( int i = 0; i < k; i++ ) Swap( p[i], p[m-i] ); + } + + static void Error( char *msg ) + { + fprintf( stderr, "ERROR: Perm, %s.\n", msg ); + } + + /*************************************************************************** + ** + ** M E M B E R F U N C T I O N S + ** + ***************************************************************************/ + + Perm::Perm( int Left, int Right ) + { + a = ( Left < Right ) ? Left : Right; + b = ( Left > Right ) ? Left : Right; + p = new int[ Size() ]; + Reset( *this ); + } + + Perm::Perm( const Perm &Q ) + { + a = Q.Min(); + b = Q.Max(); + p = new int[ Q.Size() ]; + for( int i = 0; i < Size(); i++ ) p[i] = Q[i]; + } + + Perm::Perm( const char *str ) + { + (*this) = str; + } + + Perm &Perm::operator=( const char *str ) + { + int k, m = 0, n = 0; + char dig[10]; + char c; + if( p != NULL ) delete[] p; + p = new int[ strlen(str)/2 + 1 ]; + for(;;) + { + c = *str++; + if( isDigit(c) ) dig[m++] = c; + else if( m > 0 ) + { + dig[m] = NullChar; + sscanf( dig, "%d", &k ); + if( n == 0 ) a = k; else if( k < a ) a = k; + if( n == 0 ) b = k; else if( k > b ) b = k; + p[n++] = k; + m = 0; + } + if( c == NullChar ) break; + } + for( int i = 0; i < n; i++ ) + { + int N = i + a; + int okay = 0; + for( int j = 0; j < n; j++ ) + if( p[j] == N ) { okay = 1; break; } + if( !okay ) + { + Error( "string is not a valid permutation" ); + return *this; + } + } + return *this; + } + + void Perm::Get( char *str ) const + { + for( int i = 0; i < Size(); i++ ) + str += sprintf( str, "%d ", p[i] ); + *str = NullChar; + } + + int Perm::Next() + { + int i, m, k = 0; + int N, M = 0; + + // Look for the first element of p that is larger than its successor. + // If no such element exists, we are done. + + M = p[0]; // M is always the "previous" value. + for( i = 1; i < Size(); i++ ) // Now start with second element. + { + if( p[i] > M ) { k = i; break; } + M = p[i]; + } + if( k == 0 ) return 0; // Already in descending order. + m = k - 1; + + // Find the largest entry before k that is less than p[k]. + // One exists because p[k] is bigger than M, i.e. p[k-1]. + + N = p[k]; + for( i = 0; i < k - 1; i++ ) + { + if( p[i] < N && p[i] > M ) { M = p[i]; m = i; } + } + Swap( p[m], p[k] ); // Entries 0..k-1 are still decreasing. + Reverse( p, k ); // Make first k elements increasing. + return 1; + } + + int Perm::Prev() + { + int i, m, k = 0; + int N, M = 0; + + // Look for the first element of p that is less than its successor. + // If no such element exists, we are done. + + M = p[0]; // M will always be the "previous" value. + for( i = 1; i < Size(); i++ ) // Start with the second element. + { + if( p[i] < M ) { k = i; break; } + M = p[i]; + } + if( k == 0 ) return 0; // Already in ascending order. + m = k - 1; + + // Find the smallest entry before k that is greater than p[k]. + // One exists because p[k] is less than M, i.e. p[k-1]. + + N = p[k]; + for( i = 0; i < k - 1; i++ ) + { + if( p[i] > N && p[i] < M ) { M = p[i]; m = i; } + } + Swap( p[m], p[k] ); // Entries 0..k-1 are still increasing. + Reverse( p, k ); // Make first k elements decreasing. + return 1; + } + + + /*************************************************************************** + ** + ** O P E R A T O R S + ** + ***************************************************************************/ + + int Perm::operator++() + { + return Next(); + } + + int Perm::operator--() + { + return Prev(); + } + + Perm &Perm::operator+=( int n ) + { + int i; + if( n > 0 ) for( i = 0; i < n; i++ ) if( !Next() ) break; + if( n < 0 ) for( i = n; i < 0; i++ ) if( !Prev() ) break; + return *this; + } + + Perm &Perm::operator-=( int n ) + { + int i; + if( n > 0 ) for( i = 0; i < n; i++ ) if( !Prev() ) break; + if( n < 0 ) for( i = n; i < 0; i++ ) if( !Next() ) break; + return *this; + } + + int Perm::operator[]( int n ) const + { + if( n < 0 || Size() <= n ) + { + Error( "permutation index[] out of range" ); + return 0; + } + return p[ n ]; + } + + int Perm::operator()( int n ) const + { + if( n < Min() || Max() < n ) + { + Error( "permutation index() out of range" ); + return 0; + } + return p[ n - Min() ]; + } + + Perm &Perm::operator=( const Perm &Q ) + { + if( Size() != Q.Size() ) + { + delete[] p; + p = new int[ Q.Size() ]; + } + a = Q.Min(); + b = Q.Max(); + for( int i = 0; i < Size(); i++ ) p[i] = Q[i]; + return *this; + } + + Perm Perm::operator*( const Perm &Q ) const + { + if( Min() != Q.Min() ) return Perm(0); + if( Max() != Q.Max() ) return Perm(0); + Perm A( Min(), Max() ); + for( int i = 0; i < Size(); i++ ) A.Elem(i) = p[ Q[i] - Min() ]; + return A; + } + + Perm Perm::operator^( int n ) const + { + Perm A( Min(), Max() ); + int pn = n; + if( n < 0 ) // First compute the inverse. + { + for( int i = 0; i < Size(); i++ ) + A.Elem( p[i] - Min() ) = i + Min(); + pn = -n; + } + for( int i = 0; i < Size(); i++ ) + { + int k = ( n < 0 ) ? A[i] : p[i]; + for( int j = 1; j < pn; j++ ) k = p[ k - Min() ]; + A.Elem(i) = k; + } + return A; + } + + Perm &Perm::operator()( int i, int j ) + { + Swap( p[ i - Min() ], p[ j - Min() ] ); + return *this; + } + + int Perm::operator==( const Perm &Q ) const + { + int i; + if( Min() != Q.Min() ) return 0; + if( Max() != Q.Max() ) return 0; + for( i = 0; i < Size(); i++ ) if( p[i] != Q[i] ) return 0; + return 1; + } + + int Perm::operator<=( const Perm &Q ) const + { + int i; + if( Min() != Q.Min() ) return 0; + if( Max() != Q.Max() ) return 0; + for( i = 0; i < Size(); i++ ) if( p[i] != Q[i] ) return p[i] < Q[i]; + return 1; + } + + void Reset( Perm &P ) + { + for( int i = 0; i < P.Size(); i++ ) P.Elem(i) = P.Min() + i; + } + + int End( const Perm &P ) + { + int c = P[0]; + for( int i = 1; i < P.Size(); i++ ) + { + if( c < P[i] ) return 0; + c = P[i]; + } + return 1; + } + + void Print( const Perm &P ) + { + if( P.Size() > 0 ) + { + printf( "%d", P[0] ); + for( int i = 1; i < P.Size(); i++ ) printf( " %d", P[i] ); + printf( "\n" ); + } + } + + int Even( const Perm &P ) + { + return !Odd( P ); + } + + int Odd( const Perm &P ) + { + int count = 0; + Perm Q( P ); + for( int i = P.Min(); i < P.Max(); i++ ) + { + if( Q(i) == i ) continue; + for( int j = P.Min(); j <= P.Max(); j++ ) + { + if( j == i ) continue; + if( Q(j) == i ) + { + Q(i,j); + count = ( j - i ) + ( count % 2 ); + } + } + } + return count % 2; + } + + + /*************************************************************************** + ** + ** P A R T I T I O N S + ** + ***************************************************************************/ + + Partition::Partition( ) + { + Bin = NULL; + bins = 0; + balls = 0; + } + + Partition::Partition( const Partition &Q ) + { + Bin = new int[ Q.Bins() ]; + bins = Q.Bins(); + balls = Q.Balls(); + for( int i = 0; i < bins; i++ ) Bin[i] = Q[i]; + } + + Partition::Partition( int bins_, int balls_ ) + { + bins = bins_; + balls = balls_; + Bin = new int[ bins ]; + Reset( *this ); + } + + void Partition::operator+=( int bin ) // Add a ball to this bin. + { + if( bin < 0 || bin >= bins ) fprintf( stderr, "ERROR -- bin number out of range.\n" ); + balls++; + Bin[ bin ]++; + } + + int Partition::operator==( const Partition &P ) const // Compare two partitions. + { + if( Balls() != P.Balls() ) return 0; + if( Bins () != P.Bins () ) return 0; + for( int i = 0; i < bins; i++ ) + { + if( Bin[i] != P[i] ) return 0; + } + return 1; + } + + void Partition::operator=( int n ) // Set to the n'th configuration. + { + Reset( *this ); + for( int i = 0; i < n; i++ ) ++(*this); + } + + int Partition::operator!=( const Partition &P ) const + { + return !( *this == P ); + } + + void Partition::operator=( const Partition &Q ) + { + if( bins != Q.Bins() ) + { + delete[] Bin; + Bin = new int[ Q.Bins() ]; + } + bins = Q.Bins(); + balls = Q.Balls(); + for( int i = 0; i < bins; i++ ) Bin[i] = Q[i]; + } + + void Partition::Get( char *str ) const + { + for( int i = 0; i < bins; i++ ) + str += sprintf( str, "%d ", Bin[i] ); + *str = NullChar; + } + + int Partition::operator[]( int i ) const + { + if( i < 0 || i >= bins ) return 0; + else return Bin[i]; + } + + long Partition::NumCombinations() const // How many distinct configurations. + { + // Think of the k "bins" as being k - 1 "partitions" mixed in with + // the n "balls". If the balls and partitions were each distinguishable + // objects, there would be (n + k - 1)! distinct configurations. + // But since both the balls and the partitions are indistinguishable, + // we simply divide by n! (k - 1)!. This is the binomial coefficient + // ( n + k - 1, n ). + // + if( balls == 0 ) return 0; + if( bins == 1 ) return 1; + return (long)floor( BinomialCoeff( balls + bins - 1, balls ) + 0.5 ); + } + + /*************************************************************************** + * O P E R A T O R + + (Next Partition) * + * * + * Rearranges the n "balls" in k "bins" into the next configuration. * + * The first config is assumed to be all balls in the first bin -- i.e. * + * Bin[0]. All possible groupings are generated, each exactly once. The * + * function returns 1 if successful, 0 if the last config has already been * + * reached. (Algorithm by Harold Zatz) * + * * + ***************************************************************************/ + int Partition::operator++() + { + int i; + if( Bin[0] > 0 ) + { + Bin[1] += 1; + Bin[0] -= 1; + } + else + { + for( i = 1; Bin[i] == 0; i++ ); + if( i == bins - 1 ) return 0; + Bin[i+1] += 1; + Bin[0] = Bin[i] - 1; + Bin[i] = 0; + } + return 1; + } + + void Reset( Partition &P ) + { + P.Bin[0] = P.Balls(); + for( int i = 1; i < P.Bins(); i++ ) P.Bin[i] = 0; + } + + int End( const Partition &P ) + { + return P[ P.Bins() - 1 ] == P.Balls(); + } + + void Print( const Partition &P ) + { + if( P.Bins() > 0 ) + { + printf( "%d", P[0] ); + for( int i = 1; i < P.Bins(); i++ ) printf( " %d", P[i] ); + printf( "\n" ); + } + } +}; diff --git a/src/nvtt/bc7/arvo/Perm.h b/src/nvtt/bc7/arvo/Perm.h new file mode 100644 index 0000000..2af4776 --- /dev/null +++ b/src/nvtt/bc7/arvo/Perm.h @@ -0,0 +1,111 @@ +/*************************************************************************** +* Perm.h * +* * +* This file defines permutation class: that is, a class for creating and * +* manipulating finite sequences of distinct integers. The main feature * +* of the class is the "++" operator that can be used to step through all * +* N! permutations of a sequence of N integers. As the set of permutations * +* forms a multiplicative group, a multiplication operator and an * +* exponentiation operator are also defined. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 07/01/93 Added the Partition class. * +* arvo 03/23/93 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1999, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#ifndef __PERM_INCLUDED__ +#define __PERM_INCLUDED__ + +namespace ArvoMath { + + class Perm { + public: + Perm( const Perm & ); // Initialize from a permutation. + Perm( int a = 0, int b = 0 ); // Create permutation of ints a...b. + Perm( const char * ); // Create from string of numbers. + ~Perm() { delete p; } // Destructor. + void Get( char * ) const; // Gets a string representation. + int Size() const { return b - a + 1;} // The number of elements. + int Min () const { return a; } // The smallest value. + int Max () const { return b; } // The largest value. + int operator++(); // Make "next" permutation. + int operator--(); // Make "previous" permutation. + Perm &operator+=( int n ); // Advances by n permutations. + Perm &operator-=( int n ); // Decrement by n permutations. + Perm &operator =( const char * ) ; // Resets from string of numbers. + Perm &operator =( const Perm & ) ; // Copy from another permutation. + Perm &operator()( int i, int j ) ; // Swap entries i and j. + int operator()( int n ) const; // Index from Min() to Max(). + int operator[]( int n ) const; // Index from 0 to Size() - 1. + Perm operator ^( int n ) const; // Exponentiation: -1 means inverse. + Perm operator *( const Perm & ) const; // Multiplication means composition. + int operator==( const Perm & ) const; // True if all elements match. + int operator<=( const Perm & ) const; // Lexicographic order relation. + private: + int& Elem( int i ) { return p[i]; } + int Next(); + int Prev(); + int a, b; + int *p; + friend void Reset( Perm & ); + }; + + + // A "Partition" is a collection of k indistinguishable "balls" in n "bins". + // The Partition class encapsulates this notion and provides a convenient means + // of generating all possible partitions of k objects among n bins exactly once. + // Starting with all objects in bin zero, the ++ operator creates new and distinct + // distributions among the bins until all objects are in the last bin. + + class Partition { + public: + Partition( ); // Creates a null partition. + Partition( const Partition & ); // Initialize from another partition. + Partition( int bins, int balls ); // Specify # of bins & balls. + ~Partition() { delete Bin; } // Descructor. + void Get( char * ) const; // Gets a string representation. + int Bins () const { return bins; } // The number of bins. + int Balls() const { return balls; } // The number of balls. + void operator+=( int bin ); // Add a ball to this bin. + void operator =( int n ); // Set to the n'th configuration. + void operator =( const Partition& ); // Copy from another partition. + int operator==( const Partition& ) const; // Compare two partitions. + int operator!=( const Partition& ) const; // Compare two partitions. + int operator++(); // Make "next" partition. + int operator[]( int i ) const; // Return # of balls in bin i. + long NumCombinations() const; // Number of distinct configurations. + private: + int bins; + int balls; + int* Bin; + friend void Reset( Partition & ); + }; + + + // Predicates for determining when a permutation or partition is the last of + // the sequence, functions for printing, resetting, and miscellaneous operations. + + extern int End ( const Partition & ); // True if all balls in last bin. + extern int End ( const Perm & ); // True if descending. + extern int Even ( const Perm & ); // True if even # of 2-cycles. + extern int Odd ( const Perm & ); // True if odd # of 2-cycles. + extern void Print( const Partition & ); // Write to standard out. + extern void Print( const Perm & ); // Write to standard out. + extern void Reset( Partition & ); // Reset to all balls in bin 0. + extern void Reset( Perm & ); // Reset to ascending order. +}; +#endif diff --git a/src/nvtt/bc7/arvo/Rand.cpp b/src/nvtt/bc7/arvo/Rand.cpp new file mode 100644 index 0000000..5f3025b --- /dev/null +++ b/src/nvtt/bc7/arvo/Rand.cpp @@ -0,0 +1,230 @@ +/*************************************************************************** +* Rand.C (Random Number Generators) * +* * +* Source file for pseudo-random number utilities. Rand is the * +* base class for several different algorithms for generating pseudo-random * +* numbers. Any method can generate individual samples or arrays of * +* samples using "Eval". The random seed can be reset at any time by * +* calling "Seed" with any integer. Random permutations of the integers * +* 0,1,...(n-1) are generated by "Perm(n,P)". * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 08/04/97 Changed to virtual functions. * +* arvo 06/06/93 Optimization, especially for array evaluators. * +* arvo 10/06/91 Converted to C++ * +* arvo 11/20/89 Added "gen_seed" function to handle. * +* arvo 10/30/89 "state" allocation now done in rand_alloc. * +* arvo 07/08/89 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1989, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#include +#include +#include "Rand.h" + +namespace ArvoMath { +#ifndef ABS +#define ABS( x ) ((x) > 0 ? (x) : -(x)) +#endif + + /*-------------------------------------------------------------------------* + * M E T H O D 1 * + * * + * From "Numerical Recipes," by William H. Press, Brian P. Flannery, * + * Saul A. Teukolsky, and William T. Vetterling, p. 197. * + * * + *-------------------------------------------------------------------------*/ + static const long M1 = 714025; + static const long IA = 1366; + static const long IC = 150889; + static const double RM = 1.400512E-6; + + float RandGen_1::Eval() + { + register long *elem; + register long offset; + register float rand; + offset = 1 + ( 97 * index ) / M1; + if( offset > 97 ) offset = 97; + if( offset < 1 ) offset = 1; + elem = shuffle + offset; + rand = ( index = *elem ) * RM; + *elem = ( seed = ( IA * seed + IC ) % M1 ); + return rand; + } + + void RandGen_1::Eval( int n, float *array ) + { + register long *shfl = shuffle; + register long *elem; + register long offset; + for( int i = 0; i < n; i++ ) + { + offset = 1 + ( 97 * index ) / M1; + if( offset > 97 ) offset = 97; + if( offset < 1 ) offset = 1; + elem = shfl + offset; + *array++ = ( index = *elem ) * RM; + *elem = ( seed = ( IA * seed + IC ) % M1 ); + } + } + + void RandGen_1::Seed( long seed ) + { + long t = ( IC + ABS( seed ) + 1 ) % M1; + for( register int k = 1; k <= 97; k++ ) + { + t = ( IA * t + IC ) % M1; + shuffle[k] = ABS( t ); + } + t = ( IA * t + IC ) % M1; + seed = ABS( t ); + index = ABS( t ); + } + + /*-------------------------------------------------------------------------* + * M E T H O D 2 * + * * + * From "The Multiple Prime Random Number Generator," by Alexander Haas, * + * ACM Transactions on Mathematical Software, Vol. 13, No. 4, December * + * 1987, pp. 368-381. * + * * + *-------------------------------------------------------------------------*/ + float RandGen_2::Eval() + { + if( (m += 7 ) >= 9973 ) m -= 9871; + if( (i += 1907 ) >= 99991 ) i -= 89989; + if( (j += 73939) >= 224729 ) j -= 96233; + r = ((r * m + i + j) % 100000) / 10; + return r * 1.00010001E-4; + } + + void RandGen_2::Eval( int n, float *array ) + { + for( register int k = 0; k < n; k++ ) + { + if( (m += 7 ) >= 9973 ) m -= 9871; + if( (i += 1907 ) >= 99991 ) i -= 89989; + if( (j += 73939) >= 224729 ) j -= 96233; + r = ((r * m + i + j) % 100000) / 10; + *array++ = r * 1.00010001E-4; + } + } + + void RandGen_2::Seed( long seed ) + { + r = ABS( seed ); + m = ABS( seed * 7 ); + i = ABS( seed * 11 ); + j = ABS( seed * 13 ); + if( m < 100 ) m += 100; + if( i < 10000 ) i += 10000; + if( j < 128000 ) j += 128000; + } + + /*-------------------------------------------------------------------------* + * M E T H O D 3 * + * * + * From "A More Portable Fortran Random Number Generator," by Linus * + * Schrage, ACM Transactions on Mathematical Software, Vol. 5, No, 2, * + * June 1979, pp. 132-138. * + * * + *-------------------------------------------------------------------------*/ + static const long A3 = 16807; + static const long P3 = 2147483647; + + float RandGen_3::Eval() + { + long xhi = ix >> 16; + long xalo = ( ix & 0xFFFF ) * A3; + long leftlo = xalo >> 16; + long fhi = xhi * A3 + leftlo; + long k = fhi >> 15; + ix = ( ((xalo - (leftlo << 16)) - P3) + + ((fhi - (k << 15)) << 16) ) + k; + if( ix < 0 ) ix += P3; + return ix * 4.656612875E-10; + } + + void RandGen_3::Eval( int n, float *array ) + { + register long xhi, xalo, leftlo; + register long fhi, k; + for( register int i = 0; i < n; i++ ) + { + xhi = ix >> 16; + xalo = ( ix & 0xFFFF ) * A3; + leftlo = xalo >> 16; + fhi = xhi * A3 + leftlo; + k = fhi >> 15; + ix = ( ((xalo - (leftlo << 16)) - P3) + + ((fhi - (k << 15)) << 16) ) + k; + if( ix < 0 ) ix += P3; + *array++ = ix * 4.656612875E-10; + } + } + + void RandGen_3::Seed( long seed ) + { + ix = ABS( seed ); + } + + /*-------------------------------------------------------------------------* + * R A N D : : P E R M (Permutation) * + * * + * This routine fills an integer array of length "len" with a random * + * permutation of the integers 0, 1, 2, ... (len-1). * + * * + * For efficiency, the random numbers are generated in batches of up to * + * "Nmax" at a time. The constant Nmax can be set to any value >= 1. * + * * + *-------------------------------------------------------------------------*/ + static const int Nmax = 20; + + void RandGen::Perm( int len, int perm[] ) + { + float R[ Nmax ]; // A buffer for getting random numbers. + int L = len - 1; // Total number of random numbers needed. + int N = 0; // How many to generate when we call Eval. + int n = 0; // The array index into R. + + // First initialize the array "perm" to the identity permutation. + + for( int j = 0; j < len; j++ ) perm[j] = j; + + // Now swap a random element in the front with the i'th element. + // When i gets down to 0, we're done. + + for( int i = len - 1; i > 0; i-- ) // Element i is a swap candidate. + { + if( n == N ) // Generate more random numbers. + { + N = ( L < Nmax ) ? L : Nmax; // Can't get more than "Nmax". + Eval( N, R ); // Generate N random numbers. + L -= N; // Decrement total counter. + n = 0; // Start index at beginning of R. + } + float r = ( i + 1 ) * R[ n++ ]; // Pick a float in [0,i+1]. + int k = (int)r; // Truncate r to an integer. + if( k < i ) // Disregard k == i and k == i+1. + { + int tmp = perm[i]; // Swap elements i and k. + perm[i] = perm[k]; + perm[k] = tmp; + } + } + } +}; diff --git a/src/nvtt/bc7/arvo/Rand.h b/src/nvtt/bc7/arvo/Rand.h new file mode 100644 index 0000000..a8ef5d9 --- /dev/null +++ b/src/nvtt/bc7/arvo/Rand.h @@ -0,0 +1,114 @@ +/*************************************************************************** +* Rand.h (Random Number Generators) * +* * +* Header file for Rand.C, pseudo-random number utilities. Rand is the * +* base class for several different algorithms for generating pseudo-random * +* numbers. Any method can generate individual samples or arrays of * +* samples using "Eval". The random seed can be reset at any time by * +* calling "Seed" with any integer. Random permutations of the integers * +* 0,1,...(n-1) are generated by "Perm(n,P)". * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 08/04/97 Changed to virtual functions. * +* arvo 06/06/93 Optimization, especially for array evaluators. * +* arvo 10/06/91 Converted to C++ * +* arvo 11/20/89 Added "gen_seed" function to handle. * +* arvo 10/30/89 "state" allocation now done in rand_alloc. * +* arvo 07/08/89 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1989, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#ifndef __RAND_INCLUDED__ +#define __RAND_INCLUDED__ + +namespace ArvoMath { + + // Base class for random number generators. This class contains + // several pure virtual functions, so it cannot be instanced directly. + + class RandGen { + public: + RandGen() {} + virtual float Eval( ) = 0; + virtual void Eval( int n, float x[] ) = 0; + virtual void Seed( long seed ) = 0; + public: + void Perm( int n, int P[] ); + float Interval( float a, float b ); + void Eval( float &x ) { x = Eval(); } + }; + + + // Method 1: From "Numerical Recipes," by William H. Press, Brian P. + // Flannery, Saul A. Teukolsky, and William T. Vetterling, p. 197. + + class RandGen_1 : public RandGen { + public: + RandGen_1( ) { Seed( 1 ); } + RandGen_1( long seed ) { Seed( seed ); } + virtual float Eval( ); + virtual void Eval( int n, float x[] ); + virtual void Seed( long seed ); + private: + long index; + long seed; + long shuffle[ 98 ]; + }; + + + // Method 2: From "The Multiple Prime Random Number Generator," by + // Alexander Haas, ACM Transactions on Mathematical Software, + // Vol. 13, No. 4, December 1987, pp. 368-381. * + + class RandGen_2 : public RandGen { + public: + RandGen_2( ) { Seed( 1 ); } + RandGen_2( long seed ) { Seed( seed ); } + virtual float Eval( ); + virtual void Eval( int n, float x[] ); + virtual void Seed( long seed ); + private: + long r; + long m; + long i; + long j; + }; + + + // Method 3: From "A More Portable Fortran Random Number Generator," + // by Linus Schrage, ACM Transactions on Mathematical Software, + // Vol. 5, No, 2, June 1979, pp. 132-138. * + + class RandGen_3 : public RandGen { + public: + RandGen_3( ) { Seed( 1 ); } + RandGen_3( long seed ) { Seed( seed ); } + virtual float Eval( ); + virtual void Eval( int n, float x[] ); + virtual void Seed( long seed ); + private: + long ix; + }; + + + inline float RandGen::Interval( float a, float b ) + { + return ( a < b ) ? + a + Eval() * ( b - a ) : + b + Eval() * ( a - b ) ; + } +}; +#endif diff --git a/src/nvtt/bc7/arvo/SI_units.h b/src/nvtt/bc7/arvo/SI_units.h new file mode 100644 index 0000000..69cc8cc --- /dev/null +++ b/src/nvtt/bc7/arvo/SI_units.h @@ -0,0 +1,232 @@ +/***************************************************************************** +** +** MODULE NAME SI_units.h International System of Units (SI) +** +** DESCRIPTION +** The purpose of this header file is to provide a simple and efficient +** mechanism for associating physically meaningful units with floating +** point numbers. No extra space is required, and no runtime overhead +** is introduced; all type-checking occurs at compile time. +** +** +** HISTORY +** Name Date Description +** +** arvo 02/09/92 Replaced conversion macros with inline functions. +** arvo 10/16/91 Initial implementation. +** +** +** (c) Copyright 1991, 1992 +** Program of Computer Graphics, Cornell University, Ithaca, NY +** ALL RIGHTS RESERVED +** +*****************************************************************************/ + +#ifndef SI_UNITS_H +#define SI_UNITS_H + +#include + +namespace ArvoMath { + + const float + SI_deci = 1.0E-1, + SI_centi = 1.0E-2, + SI_milli = 1.0E-3, + SI_micro = 1.0E-6, + SI_nano = 1.0E-9, + SI_kilo = 1.0E+3, + SI_mega = 1.0E+6, + SI_giga = 1.0E+9, + SI_tera = 1.0E+12; + + /******************************************************************************* + * * + * I N T E R N A T I O N A L S Y S T E M O F U N I T S * + * * + ******************************************************************************** + * * + * DIMENSION CLASS INITIALIZER SYMBOL BASE UNITS * + * * + * length SI_length meter m m * + * time SI_time second s s * + * mass SI_mass kilogram kg kg * + * angle SI_angle radian rad rad * + * solid angle SI_solid_angle steradian sr sr * + * temperature SI_temperature kelvin K K * + * luminous intensity SI_lum_inten candela cd cd * + * area SI_area meter2 m2 m2 * + * volume SI_volume meter3 m3 m3 * + * frequency SI_frequency hertz Hz 1/s * + * force SI_force newton N m kg/s2 * + * energy SI_energy joule J m2 kg/s2 * + * power SI_power watt W m2 kg/s3 * + * radiance SI_radiance watts_per_m2sr W/m2sr kg/(s3 sr) * + * irradiance SI_irradiance watts_per_m2 W/m2 kg/s3 * + * radiant intensity SI_rad_inten watts_per_sr W/sr m2 kg/(s3 sr) * + * luminance SI_luminance candela_per_m2 cd/m2 cd/m2 * + * illuminance SI_illuminance lux lx cd sr/m2 * + * luminous flux SI_lum_flux lumen lm cd sr * + * luminous energy SI_lum_energy talbot tb cd sr s * + * * + *******************************************************************************/ + + class SI_dimensionless { + public: + float Value() const { return value; } + ostream& Put( ostream &s, char *a ) { return s << value << " " << a; } + protected: + SI_dimensionless() { value = 0; } + SI_dimensionless( float x ){ value = x; } + float value; + }; + + /******************************************************************************* + * The following macro is used for creating new quantity classes and their * + * corresponding initializing functions and abbreviations. This macro is * + * not intended to be used outside of this file -- it is a compact means of * + * defining generic operations for each quantity (e.g. scaling & comparing). * + *******************************************************************************/ + +#define SI_Make( C, Initializer, Symbol ) \ + struct C : SI_dimensionless { \ + C ( ) : SI_dimensionless( ) {}; \ + C ( float x ) : SI_dimensionless( x ) {}; \ + C operator * ( float x ) { return C( value * x ); } \ + C operator / ( float x ) { return C( value / x ); } \ + C operator /= ( float x ) { return C( value /= x ); } \ + C operator *= ( float x ) { return C( value *= x ); } \ + C operator + ( C x ) { return C( value + x.Value() ); } \ + C operator - ( ) { return C(-value ); } \ + C operator - ( C x ) { return C( value - x.Value() ); } \ + C operator += ( C x ) { return C( value += x.Value() ); } \ + C operator -= ( C x ) { return C( value -= x.Value() ); } \ + C operator = ( C x ) { return C( value = x.Value() ); } \ + int operator > ( C x ) { return ( value > x.Value() ); } \ + int operator < ( C x ) { return ( value < x.Value() ); } \ + int operator >= ( C x ) { return ( value >= x.Value() ); } \ + int operator <= ( C x ) { return ( value <= x.Value() ); } \ + float operator / ( C x ) { return ( value / x.Value() ); } \ + }; \ + inline ostream& operator<<(ostream &s, C x) {return x.Put(s,Symbol);} \ + inline C Initializer( float x ) { return C( x ); } \ + inline C operator * ( float x, C y ) { return C( x * y.Value() ); } + + /******************************************************************************* + * The following macros define permissible arithmetic operations among * + * variables with different physical meanings. This ensures that the * + * result of any such operation is ALWAYS another meaningful quantity. * + *******************************************************************************/ + +#define SI_Square( A, B ) \ + inline B operator*( A x, A y ) { return B( x.Value() * y.Value() ); } \ + inline A operator/( B x, A y ) { return A( x.Value() / y.Value() ); } + +#define SI_Recip( A, B ) \ + inline B operator/( float x, A y ) { return B( x / y.Value() ); } \ + inline A operator/( float x, B y ) { return A( x / y.Value() ); } \ + inline float operator*( A x, B y ) { return x.Value() * y.Value(); } \ + inline float operator*( B x, A y ) { return x.Value() * y.Value(); } + +#define SI_Times( A, B, C ) \ + inline C operator*( A x, B y ) { return C( x.Value() * y.Value() ); } \ + inline C operator*( B x, A y ) { return C( x.Value() * y.Value() ); } \ + inline A operator/( C x, B y ) { return A( x.Value() / y.Value() ); } \ + inline B operator/( C x, A y ) { return B( x.Value() / y.Value() ); } + + /******************************************************************************* + * The following macros create classes for a variety of quantities. These * + * include base qunatities such as "time" and "length" as well as derived * + * quantities such as "power" and "volume". Each quantity is provided with * + * an initialization function in SI units and an abbreviation for printing. * + *******************************************************************************/ + + SI_Make( SI_length , meter , "m" ); // Base Units: + SI_Make( SI_mass , kilogram , "kg" ); + SI_Make( SI_time , second , "s" ); + SI_Make( SI_lum_inten , candela , "cd" ); + SI_Make( SI_temperature , kelvin , "K" ); + SI_Make( SI_angle , radian , "rad" ); // Supplementary: + SI_Make( SI_solid_angle , steradian , "sr" ); + SI_Make( SI_area , meter2 , "m2" ); // Derived units: + SI_Make( SI_volume , meter3 , "m3" ); + SI_Make( SI_frequency , hertz , "Hz" ); + SI_Make( SI_force , newton , "N" ); + SI_Make( SI_energy , joule , "J" ); + SI_Make( SI_power , watt , "W" ); + SI_Make( SI_radiance , watts_per_m2sr , "W/m2sr" ); + SI_Make( SI_irradiance , watts_per_m2 , "W/m2" ); + SI_Make( SI_rad_inten , watts_per_sr , "W/sr" ); + SI_Make( SI_luminance , candela_per_m2 , "cd/m2" ); + SI_Make( SI_illuminance , lux , "lx" ); + SI_Make( SI_lum_flux , lumen , "lm" ); + SI_Make( SI_lum_energy , talbot , "tb" ); + SI_Make( SI_time2 , second2 , "s2" ); // Intermediate: + SI_Make( SI_sa_area , meter2_sr , "m2sr" ); + SI_Make( SI_inv_area , inv_meter2 , "1/m2" ); + SI_Make( SI_inv_solid_angle, inv_steradian , "1/sr" ); + SI_Make( SI_length_temp , meters_kelvin , "m K" ); + SI_Make( SI_power_area , watts_m2 , "W m2" ); + SI_Make( SI_power_per_volume, watts_per_m3 , "W/m3" ); + + SI_Square( SI_length , SI_area ); + SI_Square( SI_time , SI_time2 ); + SI_Recip ( SI_time , SI_frequency ); + SI_Recip ( SI_area , SI_inv_area ); + SI_Recip ( SI_solid_angle , SI_inv_solid_angle ); + + SI_Times( SI_area , SI_length , SI_volume ); + SI_Times( SI_force , SI_length , SI_energy ); + SI_Times( SI_power , SI_time , SI_energy ); + SI_Times( SI_lum_flux , SI_time , SI_lum_energy ); + SI_Times( SI_lum_inten , SI_solid_angle , SI_lum_flux ); + SI_Times( SI_radiance , SI_solid_angle , SI_irradiance ); + SI_Times( SI_rad_inten , SI_solid_angle , SI_power ); + SI_Times( SI_irradiance , SI_area , SI_power ); + SI_Times( SI_illuminance , SI_area , SI_lum_flux ); + SI_Times( SI_solid_angle , SI_area , SI_sa_area ); + SI_Times( SI_radiance , SI_sa_area , SI_power ); + SI_Times( SI_irradiance , SI_inv_solid_angle, SI_radiance ); + SI_Times( SI_power , SI_inv_solid_angle, SI_rad_inten ); + SI_Times( SI_length , SI_temperature , SI_length_temp ); + SI_Times( SI_power , SI_area , SI_power_area ); + + /******************************************************************************* + * Following are some useful non-SI units. These units can be used in place of * + * the unit-initializers above. Thus, a variable of type SI_length, for example* + * may be initialized in "meters", "inches", or "centimeters". In all cases, * + * however, the value is converted to the underlying SI unit (e.g. meters). * + *******************************************************************************/ + +#define SI_Convert( SI, New, Old ) inline SI New( float x ) { return x * Old; } + + SI_Convert( SI_time , minute , second( 60.0 ) ); + SI_Convert( SI_time , hour , minute( 60.0 ) ); + SI_Convert( SI_force , dyne , newton( 1.0E-5 ) ); + SI_Convert( SI_energy , erg , joule( 1.0E-7 ) ); + SI_Convert( SI_power , kilowatt , watt( SI_kilo ) ); + SI_Convert( SI_mass , gram , kilogram( SI_milli ) ); + SI_Convert( SI_length , inch , meter( 2.54E-2 ) ); + SI_Convert( SI_length , foot , inch( 12.0 ) ); + SI_Convert( SI_length , centimeter , meter( SI_centi ) ); + SI_Convert( SI_length , micron , meter( SI_micro ) ); + SI_Convert( SI_length , angstrom , meter( 1.0E-10 ) ); + SI_Convert( SI_area , barn , meter2( 1.0E-28 ) ); + SI_Convert( SI_angle , degree , radian( 0.017453 ) ); + SI_Convert( SI_illuminance , phot , lux( 1.0E+4 ) ); + SI_Convert( SI_illuminance , footcandle , lux( 9.29E-2 ) ); + SI_Convert( SI_luminance , stilb , candela_per_m2( 1.0E+4 ) ); + + /******************************************************************************* + * Often there are multiple names for a single quantity. Below are some * + * synonyms for the quantities defined above. These can be used in place of * + * the original quantities and may be clearer in some contexts. * + *******************************************************************************/ + + typedef SI_power SI_radiant_flux; + typedef SI_irradiance SI_radiant_flux_density; + typedef SI_irradiance SI_radiant_exitance; + typedef SI_radiance SI_intensity; + typedef SI_irradiance SI_radiosity; +}; +#endif \ No newline at end of file diff --git a/src/nvtt/bc7/arvo/SVD.cpp b/src/nvtt/bc7/arvo/SVD.cpp new file mode 100644 index 0000000..36f0ea6 --- /dev/null +++ b/src/nvtt/bc7/arvo/SVD.cpp @@ -0,0 +1,398 @@ +/*************************************************************************** +* SVD.C * +* * +* Singular Value Decomposition. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 08/22/2000 Copied to CIT library. * +* arvo 06/28/1993 Rewritten from "Numerical Recipes" C-code. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 2000, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#include +#include +#include "ArvoMath.h" +#include "Vector.h" +#include "Matrix.h" +#include "SVD.h" + +namespace ArvoMath { + static const int MaxIterations = 30; + + static double svd_pythag( double a, double b ) + { + double at = Abs(a); + double bt = Abs(b); + if( at > bt ) + return at * sqrt( 1.0 + Sqr( bt / at ) ); + else if( bt > 0.0 ) + return bt * sqrt( 1.0 + Sqr( at / bt ) ); + else return 0.0; + } + + static inline double SameSign( double a, double b ) + { + double t; + if( b >= 0.0 ) t = Abs( a ); + else t = -Abs( a ); + return t; + } + + static int ComputeRank( const Matrix &D, double epsilon ) + { + int rank = 0; + for( int i = 0; i < D.Rows(); i++ ) + if( Abs(D(i,i)) > epsilon ) rank++; + return rank; + } + + SVD::SVD( ) : Q_(0), D_(0), R_(0) + { + } + + SVD::SVD( const Matrix &M ) : Q_(0), D_(0), R_(0) + { + (*this) = M; + } + + void SVD::operator=( const Matrix &A ) + { + if( A.Rows() >= A.Cols() ) Q_ = A; + else + { + Q_ = Matrix( A.Cols() ); + for( int i = 0; i < A.Rows(); i++ ) + for( int j = 0; j < A.Cols(); j++ ) Q_(i,j) = A(i,j); + } + R_ = Matrix( A.Cols() ); + Decompose( Q_, D_, R_ ); + } + + const Matrix &SVD::Q( double epsilon ) const + { + int rank = 0; + if( epsilon != 0.0 ) rank = ComputeRank( D_, epsilon ); + return Q_; + } + + const Matrix &SVD::D( double epsilon ) const + { + int rank = 0; + if( epsilon != 0.0 ) rank = ComputeRank( D_, epsilon ); + return D_; + } + + const Matrix &SVD::R( double epsilon ) const + { + int rank = 0; + if( epsilon != 0.0 ) rank = ComputeRank( D_, epsilon ); + return R_; + } + + int SVD::Rank( double epsilon ) const + { + return ComputeRank( D_, epsilon ); + } + + int SVD::Decompose( Matrix &Q, Matrix &D, Matrix &R ) + { + int i, j, k, l, m, n, p, q, iter; + double c, f, h, s, x, y, z; + double norm = 0.0; + double g = 0.0; + double scale = 0.0; + + m = Q.Rows(); + n = Q.Cols(); + + Vector Temp( n ); + Vector diag( n ); + + for( i = 0; i < n; i++ ) + { + + Temp(i) = scale * g; + scale = 0.0; + g = 0.0; + s = 0.0; + l = i + 1; + + if( i < m ) + { + for( k = i; k < m; k++ ) scale += Abs( Q(k,i) ); + if( scale != 0.0 ) + { + for( k = i; k < m; k++ ) + { + Q(k,i) /= scale; + s += Sqr( Q(k,i) ); + } + f = Q(i,i); + g = -SameSign( sqrt(s), f ); + h = f * g - s; + Q(i,i) = f - g; + if( i != n - 1 ) + { + for( j = l; j < n; j++ ) + { + s = 0.0; + for( k = i; k < m; k++ ) s += Q(k,i) * Q(k,j); + f = s / h; + for( k = i; k < m; k++ ) Q(k,j) += f * Q(k,i); + } + } + for( k = i; k < m; k++ ) Q(k,i) *= scale; + } + } + + diag(i) = scale * g; + g = 0.0; + s = 0.0; + scale = 0.0; + + if( i < m && i != n - 1 ) + { + for( k = l; k < n; k++ ) scale += Abs( Q(i,k) ); + if( scale != 0.0 ) + { + for( k = l; k < n; k++ ) + { + Q(i,k) /= scale; + s += Sqr( Q(i,k) ); + } + f = Q(i,l); + g = -SameSign( sqrt(s), f ); + h = f * g - s; + Q(i,l) = f - g; + for( k = l; k < n; k++ ) Temp(k) = Q(i,k) / h; + if( i != m - 1 ) + { + for( j = l; j < m; j++ ) + { + s = 0.0; + for( k = l; k < n; k++ ) s += Q(j,k) * Q(i,k); + for( k = l; k < n; k++ ) Q(j,k) += s * Temp(k); + } + } + for( k = l; k < n; k++ ) Q(i,k) *= scale; + } + } + norm = Max( norm, Abs( diag(i) ) + Abs( Temp(i) ) ); + } + + + for( i = n - 1; i >= 0; i-- ) + { + if( i < n - 1 ) + { + if( g != 0.0 ) + { + for( j = l; j < n; j++ ) R(i,j) = ( Q(i,j) / Q(i,l) ) / g; + for( j = l; j < n; j++ ) + { + s = 0.0; + for( k = l; k < n; k++ ) s += Q(i,k) * R(j,k); + for( k = l; k < n; k++ ) R(j,k) += s * R(i,k); + } + } + for( j = l; j < n; j++ ) + { + R(i,j) = 0.0; + R(j,i) = 0.0; + } + } + R(i,i) = 1.0; + g = Temp(i); + l = i; + } + + + for( i = n - 1; i >= 0; i-- ) + { + l = i + 1; + g = diag(i); + if( i < n - 1 ) for( j = l; j < n; j++ ) Q(i,j) = 0.0; + if( g != 0.0 ) + { + g = 1.0 / g; + if( i != n - 1 ) + { + for( j = l; j < n; j++ ) + { + s = 0.0; + for( k = l; k < m; k++ ) s += Q(k,i) * Q(k,j); + f = ( s / Q(i,i) ) * g; + for( k = i; k < m; k++ ) Q(k,j) += f * Q(k,i); + } + } + for( j = i; j < m; j++ ) Q(j,i) *= g; + } + else + { + for( j = i; j < m; j++ ) Q(j,i) = 0.0; + } + Q(i,i) += 1.0; + } + + + for( k = n - 1; k >= 0; k-- ) + { + for( iter = 1; iter <= MaxIterations; iter++ ) + { + int jump; + + for( l = k; l >= 0; l-- ) + { + q = l - 1; + if( Abs( Temp(l) ) + norm == norm ) { jump = 1; break; } + if( Abs( diag(q) ) + norm == norm ) { jump = 0; break; } + } + + if( !jump ) + { + c = 0.0; + s = 1.0; + for( i = l; i <= k; i++ ) + { + f = s * Temp(i); + Temp(i) *= c; + if( Abs( f ) + norm == norm ) break; + g = diag(i); + h = svd_pythag( f, g ); + diag(i) = h; + h = 1.0 / h; + c = g * h; + s = -f * h; + for( j = 0; j < m; j++ ) + { + y = Q(j,q); + z = Q(j,i); + Q(j,q) = y * c + z * s; + Q(j,i) = z * c - y * s; + } + } + } + + z = diag(k); + if( l == k ) + { + if( z < 0.0 ) + { + diag(k) = -z; + for( j = 0; j < n; j++ ) R(k,j) *= -1.0; + } + break; + } + if( iter >= MaxIterations ) return 0; + x = diag(l); + q = k - 1; + y = diag(q); + g = Temp(q); + h = Temp(k); + f = ( ( y - z ) * ( y + z ) + ( g - h ) * ( g + h ) ) / ( 2.0 * h * y ); + g = svd_pythag( f, 1.0 ); + f = ( ( x - z ) * ( x + z ) + h * ( ( y / ( f + SameSign( g, f ) ) ) - h ) ) / x; + c = 1.0; + s = 1.0; + for( j = l; j <= q; j++ ) + { + i = j + 1; + g = Temp(i); + y = diag(i); + h = s * g; + g = c * g; + z = svd_pythag( f, h ); + Temp(j) = z; + c = f / z; + s = h / z; + f = x * c + g * s; + g = g * c - x * s; + h = y * s; + y = y * c; + for( p = 0; p < n; p++ ) + { + x = R(j,p); + z = R(i,p); + R(j,p) = x * c + z * s; + R(i,p) = z * c - x * s; + } + z = svd_pythag( f, h ); + diag(j) = z; + if( z != 0.0 ) + { + z = 1.0 / z; + c = f * z; + s = h * z; + } + f = c * g + s * y; + x = c * y - s * g; + for( p = 0; p < m; p++ ) + { + y = Q(p,j); + z = Q(p,i); + Q(p,j) = y * c + z * s; + Q(p,i) = z * c - y * s; + } + } + Temp(l) = 0.0; + Temp(k) = f; + diag(k) = x; + } + } + + // Sort the singular values into descending order. + + for( i = 0; i < n - 1; i++ ) + { + double biggest = diag(i); // Biggest singular value so far. + int bindex = i; // The row/col it occurred in. + for( j = i + 1; j < n; j++ ) + { + if( diag(j) > biggest ) + { + biggest = diag(j); + bindex = j; + } + } + if( bindex != i ) // Need to swap rows and columns. + { + Q.SwapCols( i, bindex ); // Swap columns in Q. + R.SwapRows( i, bindex ); // Swap rows in R. + diag.Swap ( i, bindex ); // Swap elements in diag. + } + } + + D = Diag( diag ); + return 1; + } + + + const Matrix &SVD::PseudoInverse( double epsilon ) + { + if( Null(P_) ) + { + Matrix D_Inverse( D_ ); + for( int i = 0; i < D_Inverse.Rows(); i++ ) + { + if( Abs( D_Inverse(i,i) ) > epsilon ) + D_Inverse(i,i) = 1.0 / D_Inverse(i,i); + else D_Inverse(i,i) = 0.0; + } + P_ = Q_ * D_Inverse * R_; + } + return P_; + } +}; diff --git a/src/nvtt/bc7/arvo/SVD.h b/src/nvtt/bc7/arvo/SVD.h new file mode 100644 index 0000000..d6bf850 --- /dev/null +++ b/src/nvtt/bc7/arvo/SVD.h @@ -0,0 +1,54 @@ +/*************************************************************************** +* SVD.h * +* * +* Singular Value Decomposition. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 08/22/2000 Split off from Matrix.h * +* arvo 06/28/1993 Rewritten from "Numerical Recipes" C-code. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 2000, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#ifndef __SVD_INCLUDED__ +#define __SVD_INCLUDED__ + +#include "Vector.h" +#include "Matrix.h" + +namespace ArvoMath { + + class SVD { + public: + SVD( ); + SVD( const SVD & ); // Copies the decomposition. + SVD( const Matrix & ); // Performs the decomposition. + ~SVD() {}; + const Matrix &Q( double epsilon = 0.0 ) const; + const Matrix &D( double epsilon = 0.0 ) const; + const Matrix &R( double epsilon = 0.0 ) const; + const Matrix &PseudoInverse( double epsilon = 0.0 ); + int Rank( double epsilon = 0.0 ) const; + void operator=( const Matrix & ); // Performs the decomposition. + private: + int Decompose( Matrix &Q, Matrix &D, Matrix &R ); + Matrix Q_; + Matrix D_; + Matrix R_; + Matrix P_; // Pseudo inverse. + int error; + }; +}; +#endif diff --git a/src/nvtt/bc7/arvo/SphTri.cpp b/src/nvtt/bc7/arvo/SphTri.cpp new file mode 100644 index 0000000..40de956 --- /dev/null +++ b/src/nvtt/bc7/arvo/SphTri.cpp @@ -0,0 +1,292 @@ +/*************************************************************************** +* SphTri.C * +* * +* This file defines the SphericalTriangle class definition, which * +* supports member functions for Monte Carlo sampling, point containment, * +* and other basic operations on spherical triangles. * +* * +* Changes: * +* 01/01/2000 arvo Added New_{Alpha,Beta,Gamma} methods. * +* 12/30/1999 arvo Added VecIrrad method for "Vector Irradiance". * +* 04/08/1995 arvo Further optimized sampling algorithm. * +* 10/11/1994 arvo Added analytic sampling algorithm. * +* 06/14/1994 arvo Initial implementation. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1995, 2000, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#include +#include +#include "SphTri.h" +#include "form.h" +namespace ArvoMath { + /*-------------------------------------------------------------------------* + * Constructor * + * * + * Construct a spherical triangle from three (non-zero) vectors. The * + * vectors needn't be of unit length. * + * * + *-------------------------------------------------------------------------*/ + SphericalTriangle::SphericalTriangle( const Vec3 &A0, const Vec3 &B0, const Vec3 &C0 ) + { + Init( A0, B0, C0 ); + } + + /*-------------------------------------------------------------------------* + * Init * + * * + * Construct the spherical triange from three vertices. Assume that the * + * sphere is centered at the origin. The vectors A, B, and C need not * + * be normalized. * + * * + *-------------------------------------------------------------------------*/ + void SphericalTriangle::Init( const Vec3 &A0, const Vec3 &B0, const Vec3 &C0 ) + { + // Normalize the three vectors -- these are the vertices. + + A_ = Unit( A0 ); + B_ = Unit( B0 ); + C_ = Unit( C0 ); + + // Compute and save the cosines of the edge lengths. + + cos_a = B_ * C_; + cos_b = A_ * C_; + cos_c = A_ * B_; + + // Compute and save the edge lengths. + + a_ = ArcCos( cos_a ); + b_ = ArcCos( cos_b ); + c_ = ArcCos( cos_c ); + + // Compute the cosines of the internal (i.e. dihedral) angles. + + cos_alpha = CosDihedralAngle( C_, A_, B_ ); + cos_beta = CosDihedralAngle( A_, B_, C_ ); + cos_gamma = CosDihedralAngle( A_, C_, B_ ); + + // Compute the (dihedral) angles. + + alpha = ArcCos( cos_alpha ); + beta = ArcCos( cos_beta ); + gamma = ArcCos( cos_gamma ); + + // Compute the solid angle of the spherical triangle. + + area = alpha + beta + gamma - Pi; + + // Compute the orientation of the triangle. + + orient = Sign( A_ * ( B_ ^ C_ ) ); + + // Initialize three variables that are used for sampling the triangle. + + U = Unit( C_ / A_ ); // In plane of AC orthogonal to A. + sin_alpha = sin( alpha ); + product = sin_alpha * cos_c; + } + + /*-------------------------------------------------------------------------* + * Init * + * * + * Initialize all fields. Create a null spherical triangle. * + * * + *-------------------------------------------------------------------------*/ + void SphericalTriangle::Init() + { + a_ = 0; A_ = 0; cos_alpha = 0; cos_a = 0; alpha = 0; + b_ = 0; B_ = 0; cos_beta = 0; cos_b = 0; beta = 0; + c_ = 0; C_ = 0; cos_gamma = 0; cos_c = 0; gamma = 0; + area = 0; + orient = 0; + sin_alpha = 0; + product = 0; + U = 0; + } + + /*-------------------------------------------------------------------------* + * "( A, B, C )" operator. * + * * + * Construct the spherical triange from three vertices. Assume that the * + * sphere is centered at the origin. The vectors A, B, and C need not * + * be normalized. * + * * + *-------------------------------------------------------------------------*/ + SphericalTriangle & SphericalTriangle::operator()( + const Vec3 &A0, + const Vec3 &B0, + const Vec3 &C0 ) + { + Init( A0, B0, C0 ); + return *this; + } + + /*-------------------------------------------------------------------------* + * Inside * + * * + * Determine if the vector W is inside the triangle. W need not be a * + * unit vector * + * * + *-------------------------------------------------------------------------*/ + int SphericalTriangle::Inside( const Vec3 &W ) const + { + Vec3 Z = Orient() * W; + if( Z * ( A() ^ B() ) < 0.0 ) return 0; + if( Z * ( B() ^ C() ) < 0.0 ) return 0; + if( Z * ( C() ^ A() ) < 0.0 ) return 0; + return 1; + } + + /*-------------------------------------------------------------------------* + * Chart * + * * + * Generate samples from the current spherical triangle. If x1 and x2 are * + * random variables uniformly distributed over [0,1], then the returned * + * points are uniformly distributed over the solid angle. * + * * + *-------------------------------------------------------------------------*/ + Vec3 SphericalTriangle::Chart( float x1, float x2 ) const + { + // Use one random variable to select the area of the sub-triangle. + // Save the sine and cosine of the angle phi. + + register float phi = x1 * area - Alpha(); + register float s = sin( phi ); + register float t = cos( phi ); + + // Compute the pair (u,v) that determines the new angle beta. + + register float u = t - cos_alpha; + register float v = s + product ; // sin_alpha * cos_c + + // Compute the cosine of the new edge b. + + float q = ( cos_alpha * ( v * t - u * s ) - v ) / + ( sin_alpha * ( u * t + v * s ) ); + + // Compute the third vertex of the sub-triangle. + + Vec3 C_new = q * A() + Sqrt( 1.0 - q * q ) * U; + + // Use the other random variable to select the height z. + + float z = 1.0 - x2 * ( 1.0 - C_new * B() ); + + // Construct the corresponding point on the sphere. + + Vec3 D = C_new / B(); // Remove B component of C_new. + return z * B() + Sqrt( ( 1.0 - z * z ) / ( D * D ) ) * D; + } + + /*-------------------------------------------------------------------------* + * Coord * + * * + * Compute the two coordinates (x1,x2) corresponding to a point in the * + * spherical triangle. This is the inverse of "Chart". * + * * + *-------------------------------------------------------------------------*/ + Vec2 SphericalTriangle::Coord( const Vec3 &P1 ) const + { + Vec3 P = Unit( P1 ); + + // Compute the new C vertex, which lies on the arc defined by B-P + // and the arc defined by A-C. + + Vec3 C_new = Unit( ( B() ^ P ) ^ ( C() ^ A() ) ); + + // Adjust the sign of C_new. Make sure it's on the arc between A and C. + + if( C_new * ( A() + C() ) < 0.0 ) C_new = -C_new; + + // Compute x1, the area of the sub-triangle over the original area. + + float cos_beta = CosDihedralAngle( A(), B(), C_new ); + float cos_gamma = CosDihedralAngle( A(), C_new , B() ); + float sub_area = Alpha() + acos( cos_beta ) + acos( cos_gamma ) - Pi; + float x1 = sub_area / SolidAngle(); + + // Now compute the second coordinate using the new C vertex. + + float z = P * B(); + float x2 = ( 1.0 - z ) / ( 1.0 - C_new * B() ); + + if( x1 < 0.0 ) x1 = 0.0; if( x1 > 1.0 ) x1 = 1.0; + if( x2 < 0.0 ) x2 = 0.0; if( x2 > 1.0 ) x2 = 1.0; + return Vec2( x1, x2 ); + } + + /*-------------------------------------------------------------------------* + * Dual * + * * + * Construct the dual triangle of the current triangle, which is another * + * spherical triangle. * + * * + *-------------------------------------------------------------------------*/ + SphericalTriangle SphericalTriangle::Dual() const + { + Vec3 dual_A = B() ^ C(); if( dual_A * A() < 0.0 ) dual_A *= -1.0; + Vec3 dual_B = A() ^ C(); if( dual_B * B() < 0.0 ) dual_B *= -1.0; + Vec3 dual_C = A() ^ B(); if( dual_C * C() < 0.0 ) dual_C *= -1.0; + return SphericalTriangle( dual_A, dual_B, dual_C ); + } + + /*-------------------------------------------------------------------------* + * VecIrrad * + * * + * Return the "vector irradiance" due to a light source of unit brightness * + * whose spherical projection is this spherical triangle. The negative of * + * this vector dotted with the surface normal gives the (scalar) * + * irradiance at the origin. * + * * + *-------------------------------------------------------------------------*/ + Vec3 SphericalTriangle::VecIrrad() const + { + Vec3 Phi = + a() * Unit( B() ^ C() ) + + b() * Unit( C() ^ A() ) + + c() * Unit( A() ^ B() ) ; + if( Orient() ) Phi *= -1.0; + return Phi; + } + + /*-------------------------------------------------------------------------* + * New_Alpha * + * * + * Returns a new spherical triangle derived from the original one by * + * moving the "C" vertex along the edge "BC" until the new "alpha" angle * + * equals the given argument. * + * * + *-------------------------------------------------------------------------*/ + SphericalTriangle SphericalTriangle::New_Alpha( float alpha ) const + { + Vec3 V1( A() ), V2( B() ), V3( C() ); + Vec3 E1 = Unit( V2 ^ V1 ); + Vec3 E2 = E1 ^ V1; + Vec3 G = ( cos(alpha) * E1 ) + ( sin(alpha) * E2 ); + Vec3 D = Unit( V3 / V2 ); + Vec3 C2 = ((G * D) * V2) - ((G * V2) * D); + if( Triple( V1, V2, C2 ) > 0.0 ) C2 *= -1.0; + return SphericalTriangle( V1, V2, C2 ); + } + + std::ostream &operator<<( std::ostream &out, const SphericalTriangle &T ) + { + out << "SphericalTriangle:\n" + << " " << T.A() << "\n" + << " " << T.B() << "\n" + << " " << T.C() << std::endl; + return out; + } + +}; diff --git a/src/nvtt/bc7/arvo/SphTri.h b/src/nvtt/bc7/arvo/SphTri.h new file mode 100644 index 0000000..7336dc7 --- /dev/null +++ b/src/nvtt/bc7/arvo/SphTri.h @@ -0,0 +1,124 @@ +/*************************************************************************** +* SphTri.h * +* * +* This file defines the SphericalTriangle class definition, which * +* supports member functions for Monte Carlo sampling, point containment, * +* and other basic operations on spherical triangles. * +* * +* Changes: * +* 01/01/2000 arvo Added New_{Alpha,Beta,Gamma} methods. * +* 12/30/1999 arvo Added VecIrrad method for "Vector Irradiance". * +* 04/08/1995 arvo Further optimized sampling algorithm. * +* 10/11/1994 arvo Added analytic sampling algorithm. * +* 06/14/1994 arvo Initial implementation. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1995, 2000, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#ifndef __SPHTRI_INCLUDED__ +#define __SPHTRI_INCLUDED__ + +#include "Vec3.h" +#include "Vec2.h" + +namespace ArvoMath { + + /* + * The (Oblique) Spherical Triangle ABC. Edge lengths (segments of great + * circles) are a, b, and c. The (dihedral) angles are Alpha, Beta, and Gamma. + * + * B + * o + * / \ + * / \ + * /Beta \ + * / \ + * c / \ a + * / \ + * / \ + * / \ + * / \ + * / \ + * /Alpha Gamma \ + * o-----------------------o + * A b C + * + */ + + class SphericalTriangle { + + public: // methods + SphericalTriangle() { Init(); } + SphericalTriangle( const SphericalTriangle &T ) { *this = T; } + SphericalTriangle( const Vec3 &, const Vec3 &, const Vec3 & ); + SphericalTriangle & operator()( const Vec3 &, const Vec3 &, const Vec3 & ); + ~SphericalTriangle( ) {} + void operator=( const SphericalTriangle &T ) { *this = T; } + Vec3 Chart ( float x, float y ) const; // Const-Jacobian map from square. + Vec2 Coord ( const Vec3 &P ) const; // Get 2D coords of a point. + int Orient( ) const { return orient; } + int Inside( const Vec3 & ) const; + float SolidAngle() const { return area; } + float SignedSolidAngle() const { return -orient * area; } // CC is pos. + const Vec3 &A() const { return A_ ; } + const Vec3 &B() const { return B_ ; } + const Vec3 &C() const { return C_ ; } + float a() const { return a_ ; } + float b() const { return b_ ; } + float c() const { return c_ ; } + float Cos_a() const { return cos_a ; } + float Cos_b() const { return cos_b ; } + float Cos_c() const { return cos_c ; } + float Alpha() const { return alpha ; } + float Beta () const { return beta ; } + float Gamma() const { return gamma ; } + float CosAlpha() const { return cos_alpha; } + float CosBeta () const { return cos_beta ; } + float CosGamma() const { return cos_gamma; } + Vec3 VecIrrad() const; // Returns the vector irradiance. + SphericalTriangle Dual() const; + SphericalTriangle New_Alpha( float alpha ) const; + SphericalTriangle New_Beta ( float beta ) const; + SphericalTriangle New_Gamma( float gamma ) const; + + private: // methods + void Init( ); + void Init( const Vec3 &A, const Vec3 &B, const Vec3 &C ); + + private: // data + Vec3 A_, B_, C_, U; // The vertices (and a temp vector). + float a_, b_, c_; // The edge lengths. + float alpha, beta, gamma; // The angles. + float cos_a, cos_b, cos_c; + float cos_alpha, cos_beta, cos_gamma; + float area; + float sin_alpha, product; // Used in sampling algorithm. + int orient; // Orientation. + }; + + inline double CosDihedralAngle( const Vec3 &A, const Vec3 &B, const Vec3 &C ) + { + float x = Unit( A ^ B ) * Unit( C ^ B ); + if( x < -1.0 ) x = -1.0; + if( x > 1.0 ) x = 1.0; + return x; + } + + inline double DihedralAngle( const Vec3 &A, const Vec3 &B, const Vec3 &C ) + { + return acos( CosDihedralAngle( A, B, C ) ); + } + + extern std::ostream &operator<<( std::ostream &out, const SphericalTriangle & ); +}; +#endif diff --git a/src/nvtt/bc7/arvo/Token.cpp b/src/nvtt/bc7/arvo/Token.cpp new file mode 100644 index 0000000..9575d92 --- /dev/null +++ b/src/nvtt/bc7/arvo/Token.cpp @@ -0,0 +1,913 @@ +/*************************************************************************** +* Token.h * +* * +* The Token class ecapsulates a lexical analyzer for C++-like syntax. * +* A token instance is associated with one or more text files, and * +* grabs C++ tokens from them sequentially. There are many member * +* functions designed to make parsing easy, such as "==" operators for * +* strings and characters, and automatic conversion of numeric tokens * +* into numeric values. * +* * +* Files can be nested via #include directives, and both styles of C++ * +* comments are supported. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 10/05/99 Fixed bug in TokFrame string allocation. * +* arvo 01/15/95 Added ifdef, ifndef, else, and endif. * +* arvo 02/13/94 Added Debug() member function. * +* arvo 01/22/94 Several sections rewritten. * +* arvo 06/19/93 Converted to C++ * +* arvo 07/15/89 Rewritten for scene description parser. * +* arvo 01/22/89 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1999, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#include +#include +#include "Token.h" +#include "Char.h" + +namespace ArvoMath { + + FILE* Token::debug = NULL; // Static data member of Token class. + int Token::argc = 0; + char** Token::argv = NULL; + + typedef TokMacro *TokMacroPtr; + + static const int True = 1; + static const int False = 0; + static const int HashConst = 217; // Size of hash-table for macros. + + + TokFrame::TokFrame() + { + next = NULL; + source = NULL; + fname = NULL; + line = 0; + column = 0; + } + + TokFrame::~TokFrame() + { + if( fname != NULL ) delete[] fname; + if( source != NULL ) fclose( source ); + } + + void TokFrame::operator=( const TokFrame &frame ) + { + next = frame.next; + source = frame.source; + fname = strdup( frame.fname ); + line = frame.line; + column = frame.column; + } + + static int HashName( const char *str ) + { + static int prime[5] = { 7, 11, 17, 23, 3 }; + int k = 0; + int h = 0; + while( *str != NullChar ) + { + h += (*str++) * prime[k++]; + if( k == 5 ) k = 0; + } + if( h < 0 ) h = 0; // Check for overflow. + return h % HashConst; + } + + TokMacro *Token::MacroLookup( const char *str ) const + { + if( table == NULL ) return NULL; + int i = HashName( str ); + for( TokMacro *m = table[i]; m != NULL; m = m->next ) + { + if( strcmp( str, m->macro ) == 0 ) return m; + } + return NULL; + } + + int Token::MacroReplace( char *str, int &length, TokType &type ) const + { + TokMacro *m = MacroLookup( str ); + if( m == NULL ) return 0; + strcpy( str, m->repl ); + length = strlen( str ); + type = m->type; + return 1; + } + + /*-------------------------------------------------------------------------* + * D e b u g P r i n t * + * * + * This routine is used to record the entire token stream in a file to * + * use as a debugging aid. It does not affect the action of the lexer; * + * it merely records a "shadow" copy of all the tokens that are read by * + * ANY Token instance. The data that is written to the file is * + * * + * * + * * + *-------------------------------------------------------------------------*/ + static void DebugPrint( const Token &tok, FILE *fp ) + { + fprintf( fp, "%3d %3d ", tok.Line(), tok.Column() ); + fprintf( fp, "%s " , tok.FileName() ); + fprintf( fp, "%s\n" , tok.Spelling() ); + fflush ( fp ); + } + + /*-------------------------------------------------------------------------* + * T o k e n (Constructors) * + * * + *-------------------------------------------------------------------------*/ + Token::Token( const char *file_name ) + { + Init(); + Open( file_name ); + } + + Token::Token( FILE *fp ) + { + Init(); + Open( fp ); + } + + Token::Token( ) + { + Init(); + } + + /*-------------------------------------------------------------------------* + * T o k e n (Destructor) * + * * + * Close all files and deletes all frames and paths. * + * * + *-------------------------------------------------------------------------*/ + Token::~Token( ) + { + // Don't try to delete "frame" as its a member of this class, not + // something that we've allocated. + TokFrame *f = frame.next; + while( f != NULL ) + { + TokFrame *n = f->next; + delete f; + f = n; + } + ClearPaths(); + } + + /*-------------------------------------------------------------------------* + * O p e n * + * * + * Establish a new file to read from, either by name, or by pointer. * + * * + *-------------------------------------------------------------------------*/ + void Token::Open( const char *file_name ) + { + FILE *fp = fopen( file_name, "r" ); + if( fp == NULL ) return; + Open( fp ); + frame.fname = strdup( file_name ); + } + + void Token::Open( FILE *fp ) + { + frame.source = fp; + frame.line = 1; + frame.column = 0; + pushed = NullChar; + } + + /*-------------------------------------------------------------------------* + * O p e r a t o r == * + * * + * A token can be compared with a string, a single character, or a type. * + * * + *-------------------------------------------------------------------------*/ + int Token::operator==( const char *s ) const + { + const char *t = spelling; + if( case_sensitive ) + { + do { if( *s != *t ) return False; } + while( *s++ && *t++ ); + } + else + { + do { if( ToUpper(*s) != ToUpper(*t) ) return False; } + while( *s++ && *t++ ); + } + return True; + } + + int Token::operator==( char c ) const + { + if( length != 1 ) return False; + if( case_sensitive ) return spelling[0] == c; + else return ToUpper(spelling[0]) == ToUpper(c); + } + + int Token::operator==( TokType _type_ ) const + { + int match = 0; + switch( _type_ ) + { + case T_char : match = ( type == T_string && Len() == 1 ); break; + case T_numeric: match = ( type == T_integer || type == T_float ); break; + default : match = ( type == _type_ ); break; + } + return match; + } + + /*-------------------------------------------------------------------------* + * O p e r a t o r != * + * * + * Define negations of the three types of "==" tests. * + * * + *-------------------------------------------------------------------------*/ + int Token::operator!=( const char *s ) const { return !( *this == s ); } + int Token::operator!=( char c ) const { return !( *this == c ); } + int Token::operator!=( TokType t ) const { return !( *this == t ); } + + /*-------------------------------------------------------------------------* + * E r r o r * + * * + * Print error message to "stderr" followed by optional "name". * + * * + *-------------------------------------------------------------------------*/ + void Token::Error( TokError error, const char *name ) + { + char *s; + switch( error ) + { + case T_malformed_float : s = "malformed real number "; break; + case T_unterm_string : s = "unterminated string "; break; + case T_unterm_comment : s = "unterminated comment "; break; + case T_file_not_found : s = "include file not found: "; break; + case T_unknown_directive : s = "unknown # directive "; break; + case T_string_expected : s = "string expected "; break; + case T_putback_error : s = "putback overflow "; break; + case T_name_too_long : s = "file name is too long "; break; + case T_no_endif : s = "#endif directive missing"; break; + case T_extra_endif : s = "#endif with no #ifdef "; break; + case T_extra_else : s = "#else with no #ifdef "; break; + default : s = "unknown error type "; break; + } + fprintf( stderr, "LEXICAL ERROR, line %d, column %d: %s", + frame.line, frame.column, s ); + if( name == NULL ) + fprintf( stderr, " \n" ); + else fprintf( stderr, "%s\n", name ); + exit( 1 ); + } + + /*-------------------------------------------------------------------------* + * G e t c * + * * + * This routine fetches one character at a time from the current file * + * being read. It is responsible for keeping track of the column number * + * and for handling single characters that have been "put back". * + * * + *-------------------------------------------------------------------------*/ + int Token::Getc( int &c ) + { + if( pushed != NullChar ) // Return the pushed character. + { + c = pushed; + pushed = NullChar; + } + else // Get a new character from the source file. + { + c = getc( frame.source ); + frame.column++; + } + return c; + } + + /*-------------------------------------------------------------------------* + * N o n W h i t e * + * * + * This routine implements a simple finite state machine that skips * + * white space and recognizes the two styles of comments used in C++. * + * It returns the first non-white character not part of a comment. * + * * + *-------------------------------------------------------------------------*/ + int Token::NonWhite( int &c ) + { +start_state: + Getc( c ); + if( c == Space ) goto start_state; + if( c == Tab ) goto start_state; + if( c == NewLine ) goto start_new_line; + if( c == Slash ) goto start_comment; + goto return_char; + +start_comment: + Getc( c ); + if( c == Star ) goto in_comment1; + if( c == Slash ) goto in_comment2; + Unget( c ); + c = Slash; + goto return_char; + +in_comment1: + Getc( c ); + if( c == Star ) goto end_comment1; + if( c == NewLine ) goto newline_in_comment; + if( c == EOF ) goto return_char; + goto in_comment1; + +end_comment1: + Getc( c ); + if( c == Slash ) goto start_state; + if( c == NewLine ) goto newline_in_comment; + if( c == EOF ) goto unterm_comment; + goto in_comment1; + +in_comment2: + Getc( c ); + if( c == NewLine ) goto start_new_line; + if( c == EOF ) goto return_char; + goto in_comment2; + +unterm_comment: + Error( T_unterm_comment ); + c = EOF; + goto return_char; + +start_new_line: + frame.line++; + frame.column = 0; + goto start_state; + +newline_in_comment: + frame.line++; + frame.column = 0; + goto in_comment1; + +return_char: + Tcolumn = frame.column; // This is where the token starts. + return c; + } + + /*-------------------------------------------------------------------------* + * N e x t R a w T o k * + * * + *-------------------------------------------------------------------------*/ + int Token::NextRawTok( ) + { + static int Trans0[] = { 0, 1, 3, 3, 3 }; // Found a digit. + static int Trans1[] = { 5, 6, 4, 6, 7 }; // Found a sign. + static int Trans2[] = { 1, 6, 7, 6, 7 }; // Found decimal point. + static int Trans3[] = { 2, 2, 7, 6, 7 }; // Found an exponent. + static int Trans4[] = { 5, 6, 7, 6, 7 }; // Found something else. + char *tok = spelling; + int state; + int c; + + length = 0; + type = T_null; + + // Skip comments and whitespace. + + if( NonWhite( c ) == EOF ) goto endtok; + + // Is this the beginning of an identifier? If so, get the rest. + + if( isAlpha( c ) ) + { + type = T_ident; + do { + *tok++ = c; + length++; + if( Getc( c ) == EOF ) goto endtok; + } + while( isAlpha( c ) || isDigit( c ) || c == Underscore ); + Unget( c ); + goto endtok; + } + + // Is this the beginning of a number? + + else if( isDigit( c ) || c == Minus || c == Period ) + { + char c1 = c; + state = 0; + for(;;) + { + *tok++ = c; + length++; + switch( Getc( c ) ) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': state = Trans0[ state ]; break; + case '+': + case '-': state = Trans1[ state ]; break; + case '.': state = Trans2[ state ]; break; + case 'e': + case 'E': state = Trans3[ state ]; break; + default : state = Trans4[ state ]; break; + } + switch( state ) + { + case 5 : Unget( c ); + type = ( c1 == Period ) ? T_float : T_integer; + goto endtok; + case 6 : Unget( c ); type = T_float ; goto endtok; + case 7 : Error( T_malformed_float ) ; break; + default: continue; + } + } // for + } // if numeric + + // Is this the beginning of an operator? + + if( c == '*' || c == '>' || c == '<' || c == '+' || c == '-' || c == '!' ) + { + char oldc = c; + type = T_other; + *tok++ = c; + length++; + if( Getc( c ) == EOF ) goto endtok; + if( c == oldc || c == EqualSign ) + { + *tok++ = c; + length++; + } + else Unget( c ); + goto endtok; + } + + // Is this the beginning of a string? + + else if( c == DoubleQuote ) + { + type = T_string; + while( Getc( c ) != EOF && length < MaxTokenLen ) + { + if( c == DoubleQuote ) goto endtok; + *tok++ = c; + length++; + } + Error( T_unterm_string ); + } + + // Is this the beginning of a "#" directive? + + else if( c == Hash ) + { + type = T_directive; + NonWhite( c ); + while( isAlpha( c ) ) + { + *tok++ = c; + length++; + Getc( c ); + } + Unget( c ); + goto endtok; + } + + // This must be a one-character token. + + else + { + *tok++ = c; + length = 1; + type = T_other; + } + +endtok: // Jump to here when token is completed. + + *tok = NullChar; // Terminate the string. + if( debug != NULL ) DebugPrint( *this, debug ); + + return length; + } + + /*-------------------------------------------------------------------------* + * N e x t T o k * + * * + *-------------------------------------------------------------------------*/ + int Token::NextTok( ) + { + NextRawTok(); + + // If the token is an identifier, see if it's a macro. + // If the macro substitution is null, get another token. + + if( type == T_ident ) + { + if( table != NULL ) + { + if( MacroReplace( spelling, length, type ) && debug != NULL ) + DebugPrint( *this, debug ); + } + if( type == T_nullmacro ) NextTok(); + } + return length; + } + + /*-------------------------------------------------------------------------* + * O p e r a t o r - - * + * * + * Puts back the last token found. Only one token can be put back. * + * * + *-------------------------------------------------------------------------*/ + Token & Token::operator--( ) // Put the last token back. + { + if( put_back ) Error( T_putback_error ); // Can only handle one putback. + put_back = 1; + return *this; + } + + Token & Token::operator--( int ) // Postfix decrement. + { + fprintf( stderr, "Postfix decrement is not implemented for the Token class.\n" ); + return *this; + } + + /*-------------------------------------------------------------------------* + * H a n d l e D i r e c t i v e * + * * + * Directive beginning with "#" must be handled by the lexer, as they * + * determine the current source file via "#include", etc. * + * * + * Returns 1 if, after handling this directive, we now have the next * + * token. * + * * + *-------------------------------------------------------------------------*/ + int Token::HandleDirective( ) + { + FILE *fp; + char name[128]; + if( *this == "define" ) + { + NextRawTok(); + strcpy( tempbuff, Spelling() ); // This is the macro name. + int line = Line(); + NextRawTok(); + if( Line() == line ) + AddMacro( tempbuff, Spelling(), Type() ); + else + { + // If next token is on a different line; we went too far. + AddMacro( tempbuff, "", T_nullmacro ); + return 1; // Signal that we already have the next token. + } + } + else if( *this == "include" ) + { + NextRawTok(); + if( *this == "<" ) + { + GetName( name, sizeof(name) ); + PushFrame( ResolveName( name ), name ); + } + else if( type == T_string ) + { + fp = fopen( spelling, "r" ); + if( fp == NULL ) Error( T_file_not_found, spelling ); + else PushFrame( fp, spelling ); + } + else Error( T_string_expected ); + } + else if( *this == "ifdef" ) + { + NextRawTok(); + TokMacro *m = MacroLookup( Spelling() ); + if( m == NULL ) // Skip until else or endif. + { + while( *this != T_null ) + { + NextRawTok(); + if( *this != T_directive ) continue; + if( *this == "endif" ) break; + if( *this == "else" ) { if_nesting++; break; } // Like m != NULL. + } + if( *this == T_null ) Error( T_no_endif ); + return 0; // Ready to get the next token. + } + else if_nesting++; + } + else if( *this == "ifndef" ) + { + NextRawTok(); + TokMacro *m = MacroLookup( Spelling() ); + if( m != NULL ) // Skip until else or endif. + { + while( *this != T_null ) + { + NextRawTok(); + if( *this != T_directive ) continue; + if( *this == "endif" ) break; + if( *this == "else" ) { if_nesting++; break; } // Like m == NULL. + } + if( *this == T_null ) Error( T_no_endif ); + return 0; // Ready to get the next token. + } + else if_nesting++; + } + else if( *this == "else" ) // Skip until #endif. + { + if( if_nesting == 0 ) Error( T_extra_else ); + while( *this != T_null ) + { + NextRawTok(); + if( *this == T_directive && *this == "endif" ) break; + } + if( *this == T_null ) Error( T_no_endif ); + if_nesting--; + return 0; // Ready to get next token. + } + else if( *this == "endif" ) + { + if( if_nesting == 0 ) Error( T_extra_endif ); + if_nesting--; + return 0; // Ready to get next token. + } + else if( *this == "error" ) + { + int line = Line(); + NextTok(); // Allow macro substitution. + if( Line() == line ) + { + fprintf( stderr, "(preprocessor, line %d) %s\n", line, Spelling() ); + return 0; // Ready to get next token. + } + else + { + // If next token is on a different line; we went too far. + fprintf( stderr, "(null preprocessor message, line %d)\n", line ); + return 1; // Signal that we already have the next token. + } + } + return 0; + } + + + /*-------------------------------------------------------------------------* + * O p e r a t o r + + * + * * + * Grab the next token from the current source file. If at end of file, * + * pick up where we left off in the previous file. If there is no * + * previous file, return "T_null". * + * * + *-------------------------------------------------------------------------*/ + Token & Token::operator++( ) + { + if( put_back ) + { + put_back = 0; + return *this; + } + + // If we've reached the end of an include file, pop the stack. + + for(;;) + { + NextTok(); + if( type == T_directive ) + { + if( HandleDirective() ) break; + } + else if( type == T_null ) + { + fclose( frame.source ); + if( !PopFrame() ) break; + } + else break; // We have a real token. + } + + // Now fill in the value fields if the token is a number. + + switch( type ) + { + case T_integer : ivalue = atoi( spelling ); break; + case T_float : fvalue = atof( spelling ); break; + case T_null : if( if_nesting > 0 ) Error( T_no_endif ); break; + default : break; + } + + return *this; + } + + Token & Token::operator++( int ) + { + fprintf( stderr, "Postfix increment is not implemented for the Token class.\n" ); + return *this; + } + + /*-------------------------------------------------------------------------* + * T o k e n Push & Pop Frame * + * * + * These functions are used to create and destroy the context "frames" * + * that are used to handle nested files (via "include"). * + * * + *-------------------------------------------------------------------------*/ + void Token::PushFrame( FILE *fp, char *fname ) + { + // Create a copy of the current (top-level) frame. + + TokFrame *n = new TokFrame; + *n = frame; + + // Now overwrite the top-level frame with the new state. + + frame.next = n; + frame.source = fp; + frame.line = 1; + frame.column = 0; + frame.fname = strdup( fname ); + pushed = NullChar; + } + + int Token::PopFrame() + { + if( frame.next == NULL ) return 0; + TokFrame *old = frame.next; + frame = *old; + delete old; // Delete the frame that we just copied from. + return 1; + } + + /*-------------------------------------------------------------------------* + * Miscellaneous Functions * + * * + *-------------------------------------------------------------------------*/ + void Token::Init() + { + case_sensitive = 1; + put_back = 0; + pushed = NullChar; + if_nesting = 0; + frame.source = NULL; + frame.next = NULL; + frame.fname = NULL; + first = NULL; + last = NULL; + table = NULL; + pushed = NullChar; + SearchArgs(); // Search command-line args for macro definitions. + } + + const char* Token::Spelling() const + { + return spelling; + } + + char Token::Char() const + { + return spelling[0]; + } + + const char* Token::FileName() const + { + static char *null_string = ""; + if( frame.fname == NULL ) return null_string; + else return frame.fname; + } + + float Token::Fvalue() const + { + float val = 0.0; + if( type == T_float ) val = fvalue; + if( type == T_integer ) val = ivalue; + return val; + } + + void Token::GetName( char *name, int max ) + { + int c; + for( int i = 1; i < max; i++ ) + { + if( NonWhite(c) == '>' ) + { + *name = NullChar; + return; + } + *name++ = c; + } + Error( T_name_too_long ); + } + + void Token::AddPath( const char *new_path ) + { + char *name = strdup( new_path ); + if( name == NULL ) return; + TokPath *p = new TokPath; + p->next = NULL; + p->path = name; + if( first == NULL ) first = p; + else last->next = p; + last = p; + } + + void Token::ClearPaths() + { + TokPath *p = first; + while( p != NULL ) + { + TokPath *q = p->next; + delete[] p->path; // delete the string. + delete p; // delete the path structure. + p = q; + } + first = NULL; + last = NULL; + } + + FILE *Token::ResolveName( const char *name ) + { + char resolved[128]; + for( const TokPath *p = first; p != NULL; p = p->next ) + { + strcpy( resolved, p->path ); + strcat( resolved, "/" ); + strcat( resolved, name ); + FILE *fp = fopen( resolved, "r" ); + if( fp != NULL ) return fp; + } + Error( T_file_not_found, name ); + return NULL; + } + + void Token::CaseSensitive( int on_off = 1 ) + { + case_sensitive = on_off; + } + + void Token::Debug( FILE *fp ) + { + debug = fp; + } + + void Token::AddMacro( const char *macro, const char *repl, TokType t ) + { + if( table == NULL ) // Create and initialize the table. + { + table = new TokMacroPtr[ HashConst ]; + for( int j = 0; j < HashConst; j++ ) table[j] = NULL; + } + int i = HashName( macro ); + TokMacro *m = new TokMacro; + m->next = table[i]; + m->macro = strdup( macro ); + m->repl = strdup( repl ); + m->type = t; + table[i] = m; + } + + void Token::Args( int argc_, char *argv_[] ) + { + argc = argc_; // Set the static variables. + argv = argv_; + } + + void Token::SearchArgs( ) + { + TokType type = T_null; + for( int i = 1; i < argc; i++ ) + { + if( strcmp( argv[i], "-macro" ) == 0 ) + { + if( i+2 >= argc ) + { + fprintf( stderr, "(Token) ERROR macro argument(s) missing\n" ); + return; + } + char *macro = argv[i+1]; + char *repl = argv[i+2]; + if( isAlpha ( repl[0] ) ) type = T_ident ; else + if( isInteger( repl ) ) type = T_integer; else + type = T_float ; + AddMacro( macro, repl, type ); + i += 2; + } + } + } +}; diff --git a/src/nvtt/bc7/arvo/Token.h b/src/nvtt/bc7/arvo/Token.h new file mode 100644 index 0000000..eabdacc --- /dev/null +++ b/src/nvtt/bc7/arvo/Token.h @@ -0,0 +1,203 @@ +/*************************************************************************** +* Token.h * +* * +* The Token class ecapsulates a lexical analyzer for C++-like syntax. * +* A token instance is associated with one or more text files, and * +* grabs C++ tokens from them sequentially. There are many member * +* functions designed to make parsing easy, such as "==" operators for * +* strings and characters, and automatic conversion of numeric tokens * +* into numeric values. * +* * +* Files can be nested via #include directives, and both styles of C++ * +* comments are supported. * +* * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 10/05/99 Fixed bug in TokFrame string allocation. * +* arvo 01/15/95 Added ifdef, ifndef, else, and endif. * +* arvo 02/13/94 Added Debug() member function. * +* arvo 01/22/94 Several sections rewritten. * +* arvo 06/19/93 Converted to C++ * +* arvo 07/15/89 Rewritten for scene description parser. * +* arvo 01/22/89 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1999, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#ifndef __TOKEN_INCLUDED__ +#define __TOKEN_INCLUDED__ + +#include +#include + +namespace ArvoMath { + + const int MaxTokenLen = 128; + + typedef enum { + T_null, + T_char, // A string of length 1. + T_string, + T_integer, + T_float, + T_ident, + T_other, + T_numeric, // Either T_float or T_int (use with == operator). + T_directive, // Directives like #include are not returned to the user. + T_nullmacro + } TokType; + + typedef enum { + T_malformed_float, + T_unterm_string, + T_unterm_comment, + T_file_not_found, + T_unknown_directive, + T_string_expected, + T_putback_error, + T_name_too_long, + T_no_endif, + T_extra_endif, + T_extra_else + } TokError; + + class TokFrame { + public: + TokFrame(); + TokFrame( const TokFrame &frame ) { *this = frame; } + ~TokFrame(); + void operator=( const TokFrame & ); + public: + TokFrame *next; + FILE *source; + char *fname; + int line; + int column; + }; + + struct TokPath { + char *path; + TokPath *next; + }; + + struct TokMacro { + char *macro; + char *repl; + TokType type; + TokMacro *next; + }; + + class Token { + + public: + // Constructors and destructor. + + Token(); + Token( const char *file_name ); + Token( FILE *file_pointer ); + ~Token(); + + // Const data members for querying token information. + + TokType Type() const { return type; } // The type of token found. + int Len() const { return length; } // The length of the token. + int Line() const { return frame.line; } // The line it was found on. + int Column() const { return Tcolumn; } // The column it began in. + long Ivalue() const { return ivalue; } // Token value if an integer. + float Fvalue() const; // Token value if int or float. + char Char() const; // The token (if a Len() == 1). + + // Operators. + + int operator == ( const char* ) const; // 1 if strings match. + int operator != ( const char* ) const; // 0 if strings match. + int operator == ( char ) const; // 1 if token is this char. + int operator != ( char ) const; // 0 if token is this char. + int operator == ( TokType ) const; // 1 if token is of this type. + int operator != ( TokType ) const; // 0 if token is of this type. + Token & operator ++ ( ); // (prefix) Get the next token. + Token & operator -- ( ); // (prefix) Put back one token. + Token & operator ++ ( int ); // (postfix) Undefined. + Token & operator -- ( int ); // (postfix) Undefined. + + // State-setting member functions. + + void Open( FILE * ); // Read already opened file. + void Open( const char * ); // Open the named file. + void CaseSensitive( int on_off ); // Applies to == and != operators. + void AddPath( const char * ); // Adds path for <...> includes. + void ClearPaths(); // Remove all search paths. + + // Miscellaneous. + + const char* Spelling() const; // The token itself. + const char* FileName() const; // Current file being lexed. + static void Debug( FILE * ); // Write all token streams to a file. + static void Args ( int argc, char *argv[] ); // Search args for macro settings. + void AddMacro( const char*, const char*, TokType type ); + void SearchArgs(); + + private: + + // Private member functions. + + void Init(); + int Getc ( int & ); + void Unget( int c ) { pushed = c; } + void Error( TokError error, const char *name = NULL ); + int NonWhite( int & ); + int HandleDirective(); + int NextRawTok(); // No macro substitutions. + int NextTok(); + void PushFrame( FILE *fp, char *fname = NULL ); + int PopFrame(); + void GetName( char *name, int max ); + FILE *ResolveName( const char *name ); + TokMacro *MacroLookup( const char *str ) const; + int MacroReplace( char *str, int &length, TokType &type ) const; + + // Private data members. + + TokPath *first; + TokPath *last; + TokMacro **table; + TokFrame frame; + TokType type; + long ivalue; + float fvalue; + int length; + int Tcolumn; + int put_back; + int case_sensitive; + int pushed; + int if_nesting; + char spelling[ MaxTokenLen ]; + char tempbuff[ MaxTokenLen ]; + + // Static data members. + + static int argc; + static char **argv; + static FILE *debug; + }; + + + // Predicate-style functions for testing token types. + + inline int Null ( const Token &t ) { return t.Type() == T_null; } + inline int Numeric( const Token &t ) { return t.Type() == T_numeric; } + inline int StringP( const Token &t ) { return t.Type() == T_string; } +}; +#endif diff --git a/src/nvtt/bc7/arvo/Vec2.cpp b/src/nvtt/bc7/arvo/Vec2.cpp new file mode 100644 index 0000000..cca6723 --- /dev/null +++ b/src/nvtt/bc7/arvo/Vec2.cpp @@ -0,0 +1,94 @@ +/*************************************************************************** +* Vec2.C * +* * +* Basic operations on 2-dimensional vectors. This special case is useful * +* because nearly all operations are performed inline. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 05/22/98 Added TimedVec2, extending Vec2. * +* arvo 06/17/93 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1999, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#include +#include "ArvoMath.h" +#include "Vec2.h" +#include "form.h" + +namespace ArvoMath { + + const Vec2 Vec2::Zero; + const Vec2 Vec2::Xaxis( 1, 0 ); + const Vec2 Vec2::Yaxis( 0, 1 ); + + // Most routines are now inline. + + float Normalize( Vec2 &A ) + { + float d = Len( A ); + if( d != 0.0 ) + { + A.X() /= d; + A.Y() /= d; + } + return d; + } + + Vec2 Min( const Vec2 &A, const Vec2 &B ) + { + return Vec2( Min( A.X(), B.X() ), Min( A.Y(), B.Y() ) ); + } + + Vec2 Max( const Vec2 &A, const Vec2 &B ) + { + return Vec2( Max( A.X(), B.X() ), Max( A.Y(), B.Y() ) ); + } + + std::ostream &operator<<( std::ostream &out, const Vec2 &A ) + { + out << form( " %9.5f %9.5f\n", A.X(), A.Y() ); + return out; + } + + std::ostream &operator<<( std::ostream &out, const Mat2x2 &M ) + { + out << form( " %9.5f %9.5f\n", M(0,0), M(0,1) ) + << form( " %9.5f %9.5f\n", M(1,0), M(1,1) ) + << std::endl; + return out; + } + + Mat2x2::Mat2x2( const Vec2 &c1, const Vec2 &c2 ) + { + m[0][0] = c1.X(); + m[1][0] = c1.Y(); + m[0][1] = c2.X(); + m[1][1] = c2.Y(); + } + + // Return solution x of the system Ax = b. + Vec2 Solve( const Mat2x2 &A, const Vec2 &b ) + { + float MachEps = MachineEpsilon(); + Vec2 x; + double d = det( A ); + double n = Norm1( A ); + if( n <= MachEps || Abs(d) <= MachEps * n ) return Vec2::Zero; + x.X() = A(1,1) * b.X() - A(0,1) * b.Y(); + x.Y() = -A(1,0) * b.X() + A(0,0) * b.Y(); + return x / d; + } +}; diff --git a/src/nvtt/bc7/arvo/Vec2.h b/src/nvtt/bc7/arvo/Vec2.h new file mode 100644 index 0000000..7aca458 --- /dev/null +++ b/src/nvtt/bc7/arvo/Vec2.h @@ -0,0 +1,358 @@ +/*************************************************************************** +* Vec2.h * +* * +* Basic operations on 2-dimensional vectors. This special case is useful * +* because nearly all operations are performed inline. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 05/22/98 Added TimedVec2, extending Vec2. * +* arvo 06/17/93 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1999, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#ifndef __VEC2_INCLUDED__ +#define __VEC2_INCLUDED__ + +#include +#include +#include "ArvoMath.h" + +namespace ArvoMath { + + class Vec2; // 2-D floating-point vector. + class TimedVec2; // 2-D vector with a time stamp. + class Mat2x2; // 2x2 floating-point matrix. + + class Vec2 { + public: + Vec2( ) { x = 0.0; y = 0.0; } + Vec2( float a, float b ) { x = a; y = b; } + Vec2( const Vec2 &A ) { x = A.X(); y = A.Y(); } + ~Vec2() {} + Vec2 &operator=( float s ) { return Set( s, s ); } + Vec2 &operator=( const Vec2 &A ) { return Set( A.X(), A.Y() ); } + float X() const { return x; } + float Y() const { return y; } + float &X() { return x; } + float &Y() { return y; } + float operator[]( int i ) const { return *( &x + i ); } + float &operator[]( int i ) { return *( &x + i ); } + Vec2 &Set( float a, float b ) { x = a; y = b; return *this; } + Vec2 &Set( const Vec2 &A ) { return Set( A.X(), A.Y() ); } + public: + static const Vec2 Zero; + static const Vec2 Xaxis; + static const Vec2 Yaxis; + protected: + float x, y; + }; + + // This class simply adds a time field to the Vec2 class so that time-stamped + // coordinates can be easily inserted into objects such as Polylines. + + class TimedVec2 : public Vec2 { + public: + TimedVec2() { time = 0; } + TimedVec2( const Vec2 &p , long u = 0 ) { Set( p ); time = u; } + TimedVec2( float x, float y, long u = 0 ) { Set(x,y); time = u; } + ~TimedVec2() {} + Vec2 &Coord() { return *this; } + Vec2 Coord() const { return *this; } + long Time () const { return time; } + void SetTime( long u ) { time = u; } + protected: + long time; + }; + + class Mat2x2 { + public: + Mat2x2( ) { Set( 0, 0, 0, 0 ); } + Mat2x2( float a, float b, float c, float d ) { Set( a, b, c, d ); } + Mat2x2( const Vec2 &c1, const Vec2 &c2 ); + ~Mat2x2( ) {} + Mat2x2 &operator*=( float scale ); + Mat2x2 operator* ( float scale ) const; + void Set( float a, float b, float c, float d ) + { m[0][0] = a; m[0][1] = b; m[1][0] = c; m[1][1] = d; } + float operator()( int i, int j ) const { return m[i][j]; } + float &operator()( int i, int j ) { return m[i][j]; } + private: + float m[2][2]; + }; + + + //========================================== + //=== Miscellaneous external functions === + //========================================== + + extern float Normalize( Vec2 &A ); + extern Vec2 Min ( const Vec2 &A, const Vec2 &B ); + extern Vec2 Max ( const Vec2 &A, const Vec2 &B ); + + + //========================================== + //=== Norm-related functions === + //========================================== + + inline double LenSqr ( const Vec2 &A ) { return Sqr(A[0]) + Sqr(A[1]); } + inline double Len ( const Vec2 &A ) { return sqrt( LenSqr( A ) ); } + inline double OneNorm( const Vec2 &A ) { return Abs( A.X() ) + Abs( A.Y() ); } + inline double TwoNorm( const Vec2 &A ) { return Len(A); } + inline float SupNorm( const Vec2 &A ) { return MaxAbs( A.X(), A.Y() ); } + + + //========================================== + //=== Addition === + //========================================== + + inline Vec2 operator+( const Vec2 &A, const Vec2 &B ) + { + return Vec2( A.X() + B.X(), A.Y() + B.Y() ); + } + + inline Vec2& operator+=( Vec2 &A, const Vec2 &B ) + { + A.X() += B.X(); + A.Y() += B.Y(); + return A; + } + + + //========================================== + //=== Subtraction === + //========================================== + + inline Vec2 operator-( const Vec2 &A, const Vec2 &B ) + { + return Vec2( A.X() - B.X(), A.Y() - B.Y() ); + } + + inline Vec2 operator-( const Vec2 &A ) + { + return Vec2( -A.X(), -A.Y() ); + } + + inline Vec2& operator-=( Vec2 &A, const Vec2 &B ) + { + A.X() -= B.X(); + A.Y() -= B.Y(); + return A; + } + + + //========================================== + //=== Multiplication === + //========================================== + + inline Vec2 operator*( float c, const Vec2 &A ) + { + return Vec2( c * A.X(), c * A.Y() ); + } + + inline Vec2 operator*( const Vec2 &A, float c ) + { + return Vec2( c * A.X(), c * A.Y() ); + } + + inline float operator*( const Vec2 &A, const Vec2 &B ) // Inner product + { + return A.X() * B.X() + A.Y() * B.Y(); + } + + inline Vec2& operator*=( Vec2 &A, float c ) + { + A.X() *= c; + A.Y() *= c; + return A; + } + + //========================================== + //=== Division === + //========================================== + + inline Vec2 operator/( const Vec2 &A, float c ) + { + return Vec2( A.X() / c, A.Y() / c ); + } + + inline Vec2 operator/( const Vec2 &A, const Vec2 &B ) + { + return A - B * (( A * B ) / LenSqr( B )); + } + + + //========================================== + //=== Comparison === + //========================================== + + inline int operator==( const Vec2 &A, const Vec2 &B ) + { + return A.X() == B.X() && A.Y() == B.Y(); + } + + inline int operator!=( const Vec2 &A, const Vec2 &B ) + { + return A.X() != B.X() || A.Y() != B.Y(); + } + + inline int operator<=( const Vec2 &A, const Vec2 &B ) + { + return A.X() <= B.X() && A.Y() <= B.Y(); + } + + inline int operator<( const Vec2 &A, const Vec2 &B ) + { + return A.X() < B.X() && A.Y() < B.Y(); + } + + inline int operator>=( const Vec2 &A, const Vec2 &B ) + { + return A.X() >= B.X() && A.Y() >= B.Y(); + } + + inline int operator>( const Vec2 &A, const Vec2 &B ) + { + return A.X() > B.X() && A.Y() > B.Y(); + } + + //========================================== + //=== Miscellaneous === + //========================================== + + inline float operator|( const Vec2 &A, const Vec2 &B ) // Inner product + { + return A * B; + } + + inline Vec2 Unit( const Vec2 &A ) + { + float c = LenSqr( A ); + if( c > 0.0 ) c = 1.0 / sqrt( c ); + return c * A; + } + + inline Vec2 Unit( const Vec2 &A, float &len ) + { + float c = LenSqr( A ); + if( c > 0.0 ) + { + len = sqrt( c ); + return A / len; + } + len = 0.0; + return A; + } + + inline Vec2 Unit( float x, float y ) + { + return Unit( Vec2( x, y ) ); + } + + inline double dist( const Vec2 &A, const Vec2 &B ) + { + return Len( A - B ); + } + + inline float operator^( const Vec2 &A, const Vec2 &B ) + { + return A.X() * B.Y() - A.Y() * B.X(); + } + + inline int Quadrant( const Vec2 &A ) + { + if( A.Y() >= 0.0 ) return A.X() >= 0.0 ? 1 : 2; + return A.X() >= 0.0 ? 4 : 3; + } + + inline Vec2 OrthogonalTo( const Vec2 &A ) // A vector orthogonal to that given. + { + return Vec2( -A.Y(), A.X() ); + } + + inline Vec2 Interpolate( const Vec2 &A, const Vec2 &B, float t ) + { + // Compute a point along the segment joining points A and B + // according to the normalized parameter t in [0,1]. + return ( 1.0 - t ) * A + t * B; + } + + //========================================== + //=== Operations involving Matrices === + //========================================== + + inline Mat2x2 Outer( const Vec2 &A, const Vec2 &B ) // Outer product. + { + Mat2x2 C; + C(0,0) = A.X() * B.X(); + C(0,1) = A.X() * B.Y(); + C(1,0) = A.Y() * B.X(); + C(1,1) = A.Y() * B.Y(); + return C; + } + + inline Vec2 operator*( const Mat2x2 &M, const Vec2 &A ) + { + return Vec2( + M(0,0) * A.X() + M(0,1) * A.Y(), + M(1,0) * A.X() + M(1,1) * A.Y() + ); + } + + inline Mat2x2 &Mat2x2::operator*=( float scale ) + { + m[0][0] *= scale; + m[0][1] *= scale; + m[1][0] *= scale; + m[1][1] *= scale; + return *this; + } + + inline Mat2x2 Mat2x2::operator*( float scale ) const + { + return Mat2x2( + scale * m[0][0], scale * m[0][1], + scale * m[1][0], scale * m[1][1] + ); + } + + inline Mat2x2 operator*( float scale, const Mat2x2 &M ) + { + return M * scale; + } + + inline float Norm1( const Mat2x2 &A ) + { + return Max( Abs(A(0,0)) + Abs(A(0,1)), Abs(A(1,0)) + Abs(A(1,1)) ); + } + + inline double det( const Mat2x2 &A ) + { + return A(0,0) * A(1,1) - A(1,0) * A(0,1); + } + + extern Vec2 Solve( // Return solution x of the system Ax = b. + const Mat2x2 &A, + const Vec2 &b + ); + + //========================================== + //=== Output routines === + //========================================== + + extern std::ostream &operator<<( std::ostream &out, const Vec2 & ); + extern std::ostream &operator<<( std::ostream &out, const Mat2x2 & ); +}; +#endif diff --git a/src/nvtt/bc7/arvo/Vec3.cpp b/src/nvtt/bc7/arvo/Vec3.cpp new file mode 100644 index 0000000..1033f84 --- /dev/null +++ b/src/nvtt/bc7/arvo/Vec3.cpp @@ -0,0 +1,119 @@ +/*************************************************************************** +* Vec3.C * +* * +* Basic operations on 3-dimensional vectors. This special case is useful * +* because many operations are performed inline. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 10/27/94 Reorganized (removed Col & Row distinction). * +* arvo 06/14/93 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1994, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#include +#include +#include "ArvoMath.h" +#include "Vec3.h" +#include "form.h" + +namespace ArvoMath { + + float Normalize( Vec3 &A ) + { + float d = Len( A ); + if( d > 0.0 ) + { + double c = 1.0 / d; + A.X() *= c; + A.Y() *= c; + A.Z() *= c; + } + return( d ); + } + + double Angle( const Vec3 &A, const Vec3 &B ) + { + double t = LenSqr(A) * LenSqr(B); + if( t <= 0.0 ) return 0.0; + return ArcCos( (A * B) / sqrt(t) ); + } + + /*-------------------------------------------------------------------------* + * O R T H O N O R M A L * + * * + * On Input A, B....: Two linearly independent 3-space vectors. * + * * + * On Return A.......: Unit vector pointing in original A direction. * + * B.......: Unit vector orthogonal to A and in subspace spanned * + * by original A and B vectors. * + * C.......: Unit vector orthogonal to both A and B, chosen so * + * that A-B-C forms a right-handed coordinate system. * + * * + *-------------------------------------------------------------------------*/ + int Orthonormal( Vec3 &A, Vec3 &B, Vec3 &C ) + { + if( Normalize( A ) == 0.0 ) return 1; + B /= A; + if( Normalize( B ) == 0.0 ) return 1; + C = A ^ B; + return 0; + } + + int Orthonormal( Vec3 &A, Vec3 &B ) + { + if( Normalize( A ) == 0.0 ) return 1; + B /= A; + if( Normalize( B ) == 0.0 ) return 1; + return 0; + } + + /*-------------------------------------------------------------------------* + * O R T H O G O N A L T O * + * * + * Returns a vector that is orthogonal to A (but of arbitrary length). * + * * + *-------------------------------------------------------------------------*/ + Vec3 OrthogonalTo( const Vec3 &A ) + { + float c = 0.5 * SupNorm( A ); + if( c == 0.0 ) return Vec3( 1.0, 0.0, 0.0 ); + if( c <= Abs(A.X()) ) return Vec3( -A.Y(), A.X(), 0.0 ); + if( c <= Abs(A.Y()) ) return Vec3( 0.0, -A.Z(), A.Y() ); + return Vec3( A.Z(), 0.0, -A.X() ); + } + + Vec3 Min( const Vec3 &A, const Vec3 &B ) + { + return Vec3( + Min( A.X(), B.X() ), + Min( A.Y(), B.Y() ), + Min( A.Z(), B.Z() )); + } + + Vec3 Max( const Vec3 &A, const Vec3 &B ) + { + return Vec3( + Max( A.X(), B.X() ), + Max( A.Y(), B.Y() ), + Max( A.Z(), B.Z() )); + } + + std::ostream &operator<<( std::ostream &out, const Vec3 &A ) + { + out << form( " %9.5f %9.5f %9.5f", A.X(), A.Y(), A.Z() ) << std::endl; + return out; + } +}; diff --git a/src/nvtt/bc7/arvo/Vec3.h b/src/nvtt/bc7/arvo/Vec3.h new file mode 100644 index 0000000..b9d539f --- /dev/null +++ b/src/nvtt/bc7/arvo/Vec3.h @@ -0,0 +1,517 @@ +/*************************************************************************** +* Vec3.h * +* * +* Basic operations on 3-dimensional vectors. This special case is useful * +* because many operations are performed inline. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 10/27/94 Reorganized (removed Col & Row distinction). * +* arvo 06/14/93 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1994, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#ifndef __VEC3_INCLUDED__ +#define __VEC3_INCLUDED__ + +#include +#include +#include "Vec2.h" + +namespace ArvoMath { + + class Vec3 { + public: + Vec3( float c = 0.0 ) { x = c; y = c; z = c; } + Vec3( float a, float b, float c ) { x = a; y = b; z = c; } + Vec3( const Vec3 &A ) { x = A.X(); y = A.Y(); z = A.Z(); } + void operator=( float c ) { x = c; y = c; z = c; } + void operator=( const Vec3 &A ) { x = A.X(); y = A.Y(); z = A.Z(); } + void operator=( const Vec2 &A ) { x = A.X(); y = A.Y(); z = 0.0; } + ~Vec3() {} + float X() const { return x; } + float Y() const { return y; } + float Z() const { return z; } + float & X() { return x; } + float & Y() { return y; } + float & Z() { return z; } + float operator[]( int i ) const { return *( &x + i ); } + float & operator[]( int i ) { return *( &x + i ); } + private: + float x, y, z; + }; + + //class Mat3x3 { + //public: + // inline Mat3x3( ); + // Mat3x3( const Mat3x3 &M ) { *this = M; } + // Mat3x3( const Vec3 &, const Vec3 &, const Vec3 & ); // Three columns. + // ~Mat3x3( ) {} + // float operator()( int i, int j ) const { return m[i][j]; } + // float & operator()( int i, int j ) { return m[i][j]; } + // Mat3x3 & operator=( float ); + // Mat3x3 & operator=( const Mat3x3 & ); + // inline void ScaleRows( float, float, float ); + // inline void ScaleCols( float, float, float ); + // void Col( int n, const Vec3 & ); + // const float *Base() const { return &(m[0][0]); } + //private: + // float m[3][3]; + //}; + + //class Mat4x4 { + //public: + // Mat4x4( ); + // Mat4x4( const Mat4x4 &M ) { *this = M; } + // Mat4x4( const Mat3x3 &M ) ; + // ~Mat4x4( ) {} + // float operator()( int i, int j ) const { return m[i][j]; } + // float & operator()( int i, int j ) { return m[i][j]; } + // Mat4x4 & operator=( float ); + // Mat4x4 & operator=( const Mat4x4 & ); + // void Row( int i, int j, const Vec3 & ); + // void Col( int i, int j, const Vec3 & ); + // void ScaleRows( float, float, float, float ); + // void ScaleCols( float, float, float, float ); + // const float *Base() const { return &(m[0][0]); } + //private: + // float m[4][4]; + //}; + + + //========================================== + //=== External operators === + //========================================== + + //extern Vec3 operator * ( const Mat4x4 &, const Vec3 & ); + //extern Vec3 operator * ( const Vec3 &, const Mat4x4 & ); + //extern Mat3x3 operator * ( float , const Mat3x3 & ); + //extern Mat3x3 operator * ( const Mat3x3 &, float ); + //extern Mat3x3 operator / ( const Mat3x3 &, double ); + //extern Mat3x3 & operator *=( Mat3x3 &, float ); + //extern Mat3x3 & operator *=( Mat3x3 &, const Mat3x3 & ); + //extern Mat3x3 operator * ( const Mat3x3 &, const Mat3x3 & ); + //extern Mat3x3 operator + ( const Mat3x3 &, const Mat3x3 & ); + //extern Mat3x3 & operator +=( Mat3x3 &, const Mat3x3 & ); + //extern Mat3x3 operator - ( const Mat3x3 &, const Mat3x3 & ); + //extern Mat3x3 & operator -=( Mat3x3 &, const Mat3x3 & ); + //extern Mat4x4 operator * ( float , const Mat4x4 & ); + //extern Mat4x4 operator * ( const Mat4x4 &, float ); + //extern Mat4x4 operator / ( const Mat4x4 &, float ); + //extern Mat4x4 & operator *=( Mat4x4 &, float ); + //extern Mat4x4 operator * ( const Mat4x4 &, const Mat4x4 & ); + //extern Mat4x4 operator + ( const Mat4x4 &, const Mat4x4 & ); + //extern Mat4x4 & operator +=( Mat4x4 &, const Mat4x4 & ); + //extern Mat4x4 operator - ( const Mat4x4 &, const Mat4x4 & ); + //extern Mat4x4 & operator -=( Mat4x4 &, const Mat4x4 & ); + + + //========================================== + //=== Miscellaneous external functions === + //========================================== + + //extern Vec3 OrthogonalTo( const Vec3 & ); // A vector orthogonal to that given. + //extern Vec3 Min ( const Vec3 &, const Vec3 & ); + //extern Vec3 Max ( const Vec3 &, const Vec3 & ); + //extern double Angle ( const Vec3 &, const Vec3 & ); + //extern int Orthonormal ( Vec3 &, Vec3 & ); + //extern int Orthonormal ( Vec3 &, Vec3 &, Vec3 & ); + //extern float Trace ( const Mat3x3 & ); + //extern float Normalize ( Vec3 & ); + //extern float Norm1 ( const Mat3x3 & ); + //extern float SupNorm ( const Mat3x3 & ); + //extern double Determinant ( const Mat3x3 & ); + //extern Mat3x3 Transp ( const Mat3x3 & ); + //extern Mat3x3 Householder ( const Vec3 &, const Vec3 & ); + //extern Mat3x3 Householder ( const Vec3 & ); + //extern Mat3x3 Rotation3x3 ( float, float, float ); // Values in [0,1]. + //extern Mat3x3 Inverse ( const Mat3x3 & ); + //extern Mat3x3 Diag3x3 ( const Vec3 & ); + //extern Mat3x3 Diag3x3 ( float, float, float ); + //extern Mat3x3 Rotation3x3 ( const Vec3 &Axis, float angle ); + //extern Mat4x4 Rotation4x4 ( const Vec3 &Axis, const Vec3 &Origin, float angle ); + + + //========================================== + //=== Norm-related functions === + //========================================== + + inline double LenSqr ( const Vec3 &A ) { return Sqr(A[0]) + Sqr(A[1]) + Sqr(A[2]); } + inline double Len ( const Vec3 &A ) { return Sqrt( LenSqr( A ) ); } + inline double Norm1 ( const Vec3 &A ) { return Abs(A[0]) + Abs(A[1]) + Abs(A[2]); } + inline double Norm2 ( const Vec3 &A ) { return Len( A ); } + inline float SupNorm( const Vec3 &A ) { return MaxAbs( A[0], A[1], A[2] ); } + + + //========================================== + //=== Addition === + //========================================== + + inline Vec3 operator+( const Vec3 &A, const Vec3 &B ) + { + return Vec3( A.X() + B.X(), A.Y() + B.Y(), A.Z() + B.Z() ); + } + + inline Vec3& operator+=( Vec3 &A, const Vec3 &B ) + { + A.X() += B.X(); + A.Y() += B.Y(); + A.Z() += B.Z(); + return A; + } + + + //========================================== + //=== Subtraction === + //========================================== + + inline Vec3 operator-( const Vec3 &A, const Vec3 &B ) + { + return Vec3( A.X() - B.X(), A.Y() - B.Y(), A.Z() - B.Z() ); + } + + inline Vec3 operator-( const Vec3 &A ) + { + return Vec3( -A.X(), -A.Y(), -A.Z() ); + } + + inline Vec3& operator-=( Vec3 &A, const Vec3 &B ) + { + A.X() -= B.X(); + A.Y() -= B.Y(); + A.Z() -= B.Z(); + return A; + } + + + //========================================== + //=== Multiplication === + //========================================== + + inline Vec3 operator*( float a, const Vec3 &x ) + { + return Vec3( a * x.X(), a * x.Y(), a * x.Z() ); + } + + inline Vec3 operator*( const Vec3 &x, float a ) + { + return Vec3( a * x.X(), a * x.Y(), a * x.Z() ); + } + + inline float operator*( const Vec3 &A, const Vec3 &B ) // Inner product. + { + return A.X() * B.X() + A.Y() * B.Y() + A.Z() * B.Z(); + } + + inline Vec3& operator*=( Vec3 &A, float a ) + { + A.X() *= a; + A.Y() *= a; + A.Z() *= a; + return A; + } + + //inline Vec3& operator*=( Vec3 &A, const Mat3x3 &M ) // A = M * A + //{ + // float x = M(0,0) * A.X() + M(0,1) * A.Y() + M(0,2) * A.Z(); + // float y = M(1,0) * A.X() + M(1,1) * A.Y() + M(1,2) * A.Z(); + // float z = M(2,0) * A.X() + M(2,1) * A.Y() + M(2,2) * A.Z(); + // A.X() = x; + // A.Y() = y; + // A.Z() = z; + // return A; + //} + + //inline Vec3& operator*=( Vec3 &A, const Mat4x4 &M ) // A = M * A + //{ + // float x = M(0,0) * A.X() + M(0,1) * A.Y() + M(0,2) * A.Z() + M(0,3); + // float y = M(1,0) * A.X() + M(1,1) * A.Y() + M(1,2) * A.Z() + M(1,3); + // float z = M(2,0) * A.X() + M(2,1) * A.Y() + M(2,2) * A.Z() + M(2,3); + // A.X() = x; + // A.Y() = y; + // A.Z() = z; + // return A; + //} + + + //========================================== + //=== Division === + //========================================== + + inline Vec3 operator/( const Vec3 &A, double c ) + { + double t = 1.0 / c; + return Vec3( A.X() * t, A.Y() * t, A.Z() * t ); + } + + inline Vec3& operator/=( Vec3 &A, double a ) + { + A.X() /= a; + A.Y() /= a; + A.Z() /= a; + return A; + } + + inline Vec3 operator/( const Vec3 &A, const Vec3 &B ) // Remove component parallel to B. + { + Vec3 C; // Cumbersome due to compiler falure. + double x = LenSqr( B ); + if( x > 0.0 ) C = A - B * (( A * B ) / x); else C = A; + return C; + } + + inline void operator/=( Vec3 &A, const Vec3 &B ) // Remove component parallel to B. + { + double x = LenSqr( B ); + if( x > 0.0 ) A -= B * (( A * B ) / x); + } + + + //========================================== + //=== Miscellaneous === + //========================================== + + inline float operator|( const Vec3 &A, const Vec3 &B ) // Inner product. + { + return A * B; + } + + inline Vec3 Unit( const Vec3 &A ) + { + double d = LenSqr( A ); + return d > 0.0 ? A / sqrt(d) : Vec3(0,0,0); + } + + inline Vec3 Unit( float x, float y, float z ) + { + return Unit( Vec3( x, y, z ) ); + } + + inline Vec3 Ortho( const Vec3 &A, const Vec3 &B ) + { + return Unit( A / B ); + } + + inline int operator==( const Vec3 &A, float x ) + { + return (A[0] == x) && (A[1] == x) && (A[2] == x); + } + + inline Vec3 operator^( const Vec3 &A, const Vec3 &B ) + { + return Vec3( + A.Y() * B.Z() - A.Z() * B.Y(), + A.Z() * B.X() - A.X() * B.Z(), + A.X() * B.Y() - A.Y() * B.X() ); + } + + inline double dist( const Vec3 &A, const Vec3 &B ) + { + return Len( A - B ); + } + + inline double Dihedral( const Vec3 &A, const Vec3 &B, const Vec3 &C ) + { + return ArcCos( Unit( A ^ B ) * Unit( C ^ B ) ); + } + + inline Vec3 operator>>( const Vec3 &A, const Vec3 &B ) // Project A onto B. + { + Vec3 C; + double x = LenSqr( B ); + if( x > 0.0 ) C = B * (( A * B ) / x); + return C; + } + + inline Vec3 operator<<( const Vec3 &A, const Vec3 &B ) // Project B onto A. + { + return B >> A; + } + + inline double Triple( const Vec3 &A, const Vec3 &B, const Vec3 &C ) + { + return ( A ^ B ) * C; + } + + + //========================================== + //=== Operations involving Matrices === + //========================================== + + //inline Mat3x3 Outer( const Vec3 &A, const Vec3 &B ) // Outer product. + //{ + // Mat3x3 C; + // C(0,0) = A.X() * B.X(); + // C(0,1) = A.X() * B.Y(); + // C(0,2) = A.X() * B.Z(); + // C(1,0) = A.Y() * B.X(); + // C(1,1) = A.Y() * B.Y(); + // C(1,2) = A.Y() * B.Z(); + // C(2,0) = A.Z() * B.X(); + // C(2,1) = A.Z() * B.Y(); + // C(2,2) = A.Z() * B.Z(); + // return C; + //} + + //inline Vec3 operator*( const Mat3x3 &M, const Vec3 &A ) + //{ + // return Vec3( + // M(0,0) * A[0] + M(0,1) * A[1] + M(0,2) * A[2], + // M(1,0) * A[0] + M(1,1) * A[1] + M(1,2) * A[2], + // M(2,0) * A[0] + M(2,1) * A[1] + M(2,2) * A[2]); + //} + + //inline Vec3 operator*( const Vec3 &A, const Mat3x3 &M ) + //{ + // return Vec3( + // A[0] * M(0,0) + A[1] * M(1,0) + A[2] * M(2,0), + // A[0] * M(0,1) + A[1] * M(1,1) + A[2] * M(2,1), + // A[0] * M(0,2) + A[1] * M(1,2) + A[2] * M(2,2)); + //} + + ////========================================== + ////=== Operations on Matrices === + ////========================================== + + //inline Mat3x3 operator+( const Mat3x3 &A, const Mat3x3 &B ) + //{ + // Mat3x3 C; + // C(0,0) = A(0,0) + B(0,0); C(0,1) = A(0,1) + B(0,1); C(0,2) = A(0,2) + B(0,2); + // C(1,0) = A(1,0) + B(1,0); C(1,1) = A(1,1) + B(1,1); C(1,2) = A(1,2) + B(1,2); + // C(2,0) = A(2,0) + B(2,0); C(2,1) = A(2,1) + B(2,1); C(2,2) = A(2,2) + B(2,2); + // return C; + //} + + //inline Mat3x3 operator-( const Mat3x3 &A, const Mat3x3 &B ) + //{ + // Mat3x3 C; + // C(0,0) = A(0,0) - B(0,0); C(0,1) = A(0,1) - B(0,1); C(0,2) = A(0,2) - B(0,2); + // C(1,0) = A(1,0) - B(1,0); C(1,1) = A(1,1) - B(1,1); C(1,2) = A(1,2) - B(1,2); + // C(2,0) = A(2,0) - B(2,0); C(2,1) = A(2,1) - B(2,1); C(2,2) = A(2,2) - B(2,2); + // return C; + //} + + //inline Mat3x3 operator*( const Mat3x3 &A, const Mat3x3 &B ) + //{ + // Mat3x3 C; + // C(0,0) = A(0,0) * B(0,0) + A(0,1) * B(1,0) + A(0,2) * B(2,0); + // C(0,1) = A(0,0) * B(0,1) + A(0,1) * B(1,1) + A(0,2) * B(2,1); + // C(0,2) = A(0,0) * B(0,2) + A(0,1) * B(1,2) + A(0,2) * B(2,2); + // C(1,0) = A(1,0) * B(0,0) + A(1,1) * B(1,0) + A(1,2) * B(2,0); + // C(1,1) = A(1,0) * B(0,1) + A(1,1) * B(1,1) + A(1,2) * B(2,1); + // C(1,2) = A(1,0) * B(0,2) + A(1,1) * B(1,2) + A(1,2) * B(2,2); + // C(2,0) = A(2,0) * B(0,0) + A(2,1) * B(1,0) + A(2,2) * B(2,0); + // C(2,1) = A(2,0) * B(0,1) + A(2,1) * B(1,1) + A(2,2) * B(2,1); + // C(2,2) = A(2,0) * B(0,2) + A(2,1) * B(1,2) + A(2,2) * B(2,2); + // return C; + //} + + //inline void Mat3x3::ScaleRows( float a, float b, float c ) + //{ + // m[0][0] *= a; m[0][1] *= a; m[0][2] *= a; + // m[1][0] *= b; m[1][1] *= b; m[1][2] *= b; + // m[2][0] *= c; m[2][1] *= c; m[2][2] *= c; + //} + + //inline void Mat3x3::ScaleCols( float a, float b, float c ) + //{ + // m[0][0] *= a; m[0][1] *= b; m[0][2] *= c; + // m[1][0] *= a; m[1][1] *= b; m[1][2] *= c; + // m[2][0] *= a; m[2][1] *= b; m[2][2] *= c; + //} + + + //========================================== + //=== Special Matrices === + //========================================== + + //inline Mat3x3::Mat3x3() + //{ + // m[0][0] = 0; m[0][1] = 0; m[0][2] = 0; + // m[1][0] = 0; m[1][1] = 0; m[1][2] = 0; + // m[2][0] = 0; m[2][1] = 0; m[2][2] = 0; + //} + + //inline Mat3x3 Ident3x3() + //{ + // Mat3x3 I; + // I(0,0) = 1.0; + // I(1,1) = 1.0; + // I(2,2) = 1.0; + // return I; + //} + + //inline Mat4x4 Ident4x4() + //{ + // Mat4x4 I; + // I(0,0) = 1.0; + // I(1,1) = 1.0; + // I(2,2) = 1.0; + // I(3,3) = 1.0; + // return I; + //} + + //inline void Adjoint( const Mat3x3 &M, Mat3x3 &A ) + //{ + // A(0,0) = M(1,1) * M(2,2) - M(1,2) * M(2,1); + // A(0,1) = M(1,2) * M(2,0) - M(1,0) * M(2,2); + // A(0,2) = M(1,0) * M(2,1) - M(1,1) * M(2,0); + + // A(1,0) = M(0,2) * M(2,1) - M(0,1) * M(2,2); + // A(1,1) = M(0,0) * M(2,2) - M(0,2) * M(2,0); + // A(1,2) = M(0,1) * M(2,0) - M(0,0) * M(2,1); + + // A(2,0) = M(0,1) * M(1,2) - M(0,2) * M(1,1); + // A(2,1) = M(0,2) * M(1,0) - M(0,0) * M(1,2); + // A(2,2) = M(0,0) * M(1,1) - M(0,1) * M(1,0); + //} + + //inline void TranspAdjoint( const Mat3x3 &M, Mat3x3 &A ) + //{ + // A(0,0) = M(1,1) * M(2,2) - M(1,2) * M(2,1); + // A(1,0) = M(1,2) * M(2,0) - M(1,0) * M(2,2); + // A(2,0) = M(1,0) * M(2,1) - M(1,1) * M(2,0); + + // A(0,1) = M(0,2) * M(2,1) - M(0,1) * M(2,2); + // A(1,1) = M(0,0) * M(2,2) - M(0,2) * M(2,0); + // A(2,1) = M(0,1) * M(2,0) - M(0,0) * M(2,1); + + // A(0,2) = M(0,1) * M(1,2) - M(0,2) * M(1,1); + // A(1,2) = M(0,2) * M(1,0) - M(0,0) * M(1,2); + // A(2,2) = M(0,0) * M(1,1) - M(0,1) * M(1,0); + //} + + //inline void Adjoint( const Mat3x3 &M, Mat3x3 &A, double &det ) + //{ + // Adjoint( M, A ); + // det = A(0,0) * M(0,0) + A(1,0) * M(1,0) + A(2,0) * M(2,0); + //} + + //inline void TranspAdjoint( const Mat3x3 &M, Mat3x3 &A, double &det ) + //{ + // TranspAdjoint( M, A ); + // det = A(0,0) * M(0,0) + A(0,1) * M(1,0) + A(0,2) * M(2,0); + //} + + + //========================================== + //=== Output routines === + //========================================== + + extern std::ostream &operator<<( std::ostream &out, const Vec3 & ); + //extern std::ostream &operator<<( std::ostream &out, const Mat3x3 & ); + //extern std::ostream &operator<<( std::ostream &out, const Mat4x4 & ); +}; +#endif diff --git a/src/nvtt/bc7/arvo/Vec4.cpp b/src/nvtt/bc7/arvo/Vec4.cpp new file mode 100644 index 0000000..286a203 --- /dev/null +++ b/src/nvtt/bc7/arvo/Vec4.cpp @@ -0,0 +1,79 @@ +/*************************************************************************** +* Vec4.C * +* * +* Basic operations on 3-dimensional vectors. This special case is useful * +* because many operations are performed inline. * +* * +* HISTORY * +* Name Date Description * +* * +* walt 6/26/07 Edited Vec4 to make this new class * +* arvo 10/27/94 Reorganized (removed Col & Row distinction). * +* arvo 06/14/93 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1994, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#include +#include +#include "ArvoMath.h" +#include "Vec4.h" +#include "form.h" + +namespace ArvoMath { + + float Normalize( Vec4 &A ) + { + float d = Len( A ); + if( d > 0.0 ) + { + double c = 1.0 / d; + A.X() *= c; + A.Y() *= c; + A.Z() *= c; + A.W() *= c; + } + return( d ); + } + + double Angle( const Vec4 &A, const Vec4 &B ) + { + double t = LenSqr(A) * LenSqr(B); + if( t <= 0.0 ) return 0.0; + return ArcCos( (A * B) / sqrt(t) ); + } + + Vec4 Min( const Vec4 &A, const Vec4 &B ) + { + return Vec4( + Min( A.X(), B.X() ), + Min( A.Y(), B.Y() ), + Min( A.Z(), B.Z() ), + Min( A.W(), B.W() ) ); + } + + Vec4 Max( const Vec4 &A, const Vec4 &B ) + { + return Vec4( + Max( A.X(), B.X() ), + Max( A.Y(), B.Y() ), + Max( A.Z(), B.Z() ), + Max( A.W(), B.W() ) ); + } + + std::ostream &operator<<( std::ostream &out, const Vec4 &A ) + { + out << form( " %9.5f %9.5f %9.5f %9.5f", A.X(), A.Y(), A.Z(), A.W() ) << std::endl; + return out; + } +}; diff --git a/src/nvtt/bc7/arvo/Vec4.h b/src/nvtt/bc7/arvo/Vec4.h new file mode 100644 index 0000000..efe1f3f --- /dev/null +++ b/src/nvtt/bc7/arvo/Vec4.h @@ -0,0 +1,238 @@ +/*************************************************************************** +* Vec4.h * +* * +* Basic operations on 4-dimensional vectors. This special case is useful * +* because many operations are performed inline. * +* * +* HISTORY * +* Name Date Description * +* * +* walt 6/26/07 Edited Vec3 to make this new class * +* arvo 10/27/94 Reorganized (removed Col & Row distinction). * +* arvo 06/14/93 Initial coding. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 1994, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#ifndef __Vec4_INCLUDED__ +#define __Vec4_INCLUDED__ + +#include +#include +#include "Vec2.h" +#include "Vec3.h" + +namespace ArvoMath { + + class Vec4 { + public: + Vec4( float c = 0.0 ) { x = c; y = c; z = c; w = c; } + Vec4( float a, float b, float c, float d ) { x = a; y = b; z = c; w = d; } + Vec4( const Vec4 &A ) { x = A.X(); y = A.Y(); z = A.Z(); w = A.W(); } + Vec4( const Vec3 &A, float d ) { x = A.X(); y = A.Y(); z = A.Z(); w = d; } + void operator=( float c ) { x = c; y = c; z = c; w = c; } + void operator=( const Vec4 &A ) { x = A.X(); y = A.Y(); z = A.Z(); w = A.W(); } + void operator=( const Vec3 &A ) { x = A.X(); y = A.Y(); z = A.Z(); w = 0.0; } + void operator=( const Vec2 &A ) { x = A.X(); y = A.Y(); z = 0.0; w = 0.0; } + ~Vec4() {} + float X() const { return x; } + float Y() const { return y; } + float Z() const { return z; } + float W() const { return w; } + float & X() { return x; } + float & Y() { return y; } + float & Z() { return z; } + float & W() { return w; } + float operator[]( int i ) const { return *( &x + i ); } + float & operator[]( int i ) { return *( &x + i ); } + private: + float x, y, z, w; + }; + + //========================================== + //=== Norm-related functions === + //========================================== + + inline double LenSqr ( const Vec4 &A ) { return Sqr(A[0]) + Sqr(A[1]) + Sqr(A[2]) + Sqr(A[3]); } + inline double Len ( const Vec4 &A ) { return Sqrt( LenSqr( A ) ); } + inline double Norm1 ( const Vec4 &A ) { return Abs(A[0]) + Abs(A[1]) + Abs(A[2]) + Abs(A[3]); } + inline double Norm2 ( const Vec4 &A ) { return Len( A ); } + inline float SupNorm( const Vec4 &A ) { return MaxAbs( A[0], A[1], A[2], A[3] ); } + + + //========================================== + //=== Addition === + //========================================== + + inline Vec4 operator+( const Vec4 &A, const Vec4 &B ) + { + return Vec4( A.X() + B.X(), A.Y() + B.Y(), A.Z() + B.Z(), A.W() + B.W() ); + } + + inline Vec4& operator+=( Vec4 &A, const Vec4 &B ) + { + A.X() += B.X(); + A.Y() += B.Y(); + A.Z() += B.Z(); + A.W() += B.W(); + return A; + } + + + //========================================== + //=== Subtraction === + //========================================== + + inline Vec4 operator-( const Vec4 &A, const Vec4 &B ) + { + return Vec4( A.X() - B.X(), A.Y() - B.Y(), A.Z() - B.Z(), A.W() - B.W()); + } + + inline Vec4 operator-( const Vec4 &A ) + { + return Vec4( -A.X(), -A.Y(), -A.Z(), -A.W() ); + } + + inline Vec4& operator-=( Vec4 &A, const Vec4 &B ) + { + A.X() -= B.X(); + A.Y() -= B.Y(); + A.Z() -= B.Z(); + A.W() -= B.W(); + return A; + } + + + //========================================== + //=== Multiplication === + //========================================== + + inline Vec4 operator*( float a, const Vec4 &x ) + { + return Vec4( a * x.X(), a * x.Y(), a * x.Z(), a * x.W() ); + } + + inline Vec4 operator*( const Vec4 &x, float a ) + { + return Vec4( a * x.X(), a * x.Y(), a * x.Z(), a * x.W() ); + } + + inline float operator*( const Vec4 &A, const Vec4 &B ) // Inner product. + { + return A.X() * B.X() + A.Y() * B.Y() + A.Z() * B.Z() + A.W() * B.W(); + } + + inline Vec4& operator*=( Vec4 &A, float a ) + { + A.X() *= a; + A.Y() *= a; + A.Z() *= a; + A.W() *= a; + return A; + } + + //========================================== + //=== Division === + //========================================== + + inline Vec4 operator/( const Vec4 &A, double c ) + { + double t = 1.0 / c; + return Vec4( A.X() * t, A.Y() * t, A.Z() * t, A.W() * t); + } + + inline Vec4& operator/=( Vec4 &A, double a ) + { + A.X() /= a; + A.Y() /= a; + A.Z() /= a; + A.W() /= a; + return A; + } + + inline Vec4 operator/( const Vec4 &A, const Vec4 &B ) // Remove component parallel to B. + { + Vec4 C; // Cumbersome due to compiler falure. + double x = LenSqr( B ); + if( x > 0.0 ) C = A - B * (( A * B ) / x); else C = A; + return C; + } + + inline void operator/=( Vec4 &A, const Vec4 &B ) // Remove component parallel to B. + { + double x = LenSqr( B ); + if( x > 0.0 ) A -= B * (( A * B ) / x); + } + + + //========================================== + //=== Miscellaneous === + //========================================== + + inline float operator|( const Vec4 &A, const Vec4 &B ) // Inner product. + { + return A * B; + } + + inline Vec4 Unit( const Vec4 &A ) + { + double d = LenSqr( A ); + return d > 0.0 ? A / sqrt(d) : Vec4(0,0,0,0); + } + + inline Vec4 Unit( float x, float y, float z, float w ) + { + return Unit( Vec4( x, y, z, w ) ); + } + + inline Vec4 Ortho( const Vec4 &A, const Vec4 &B ) + { + return Unit( A / B ); + } + + inline int operator==( const Vec4 &A, float x ) + { + return (A[0] == x) && (A[1] == x) && (A[2] == x) && (A[3] == x); + } + +// inline Vec4 operator^( const Vec4 &A, const Vec4 &B ) there is no 4ED "cross product" of 2 4D vectors -- we need six dimensions + + inline double dist( const Vec4 &A, const Vec4 &B ) + { + return Len( A - B ); + } + +// inline double Dihedral( const Vec4 &A, const Vec4 &B, const Vec4 &C ) + + inline Vec4 operator>>( const Vec4 &A, const Vec4 &B ) // Project A onto B. + { + Vec4 C; + double x = LenSqr( B ); + if( x > 0.0 ) C = B * (( A * B ) / x); + return C; + } + + inline Vec4 operator<<( const Vec4 &A, const Vec4 &B ) // Project B onto A. + { + return B >> A; + } + +// inline double Triple( const Vec4 &A, const Vec4 &B, const Vec4 &C ) + + //========================================== + //=== Output routines === + //========================================== + + extern std::ostream &operator<<( std::ostream &out, const Vec4 & ); +}; +#endif diff --git a/src/nvtt/bc7/arvo/Vector.cpp b/src/nvtt/bc7/arvo/Vector.cpp new file mode 100644 index 0000000..af3bc11 --- /dev/null +++ b/src/nvtt/bc7/arvo/Vector.cpp @@ -0,0 +1,366 @@ +/*************************************************************************** +* Vector.C * +* * +* General Vector and Matrix classes, with all the associated methods. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 08/16/2000 Revamped for CIT tools. * +* arvo 10/31/1994 Combined RowVec & ColVec into Vector. * +* arvo 06/30/1993 Added singular value decomposition class. * +* arvo 06/25/1993 Major revisions. * +* arvo 09/08/1991 Initial implementation. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 2000, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#include +#include +#include "ArvoMath.h" +#include "Vector.h" +#include "form.h" + +namespace ArvoMath { + + const Vector Vector::Null(0); + + /*-------------------------------------------------------------------------* + * * + * C O N S T R U C T O R S * + * * + *-------------------------------------------------------------------------*/ + Vector::Vector( const float *x, int n ) + { + Create( n ); + for( register int i = 0; i < size; i++ ) elem[i] = x[i]; + } + + Vector::Vector( const Vector &A ) + { + Create( A.Size() ); + for( register int i = 0; i < A.Size(); i++ ) elem[i] = A(i); + } + + Vector::Vector( int n ) + { + Create( n ); + for( register int i = 0; i < n; i++ ) elem[i] = 0.0; + } + + Vector::Vector( float x, float y ) + { + Create( 2 ); + elem[0] = x; + elem[1] = y; + } + + Vector::Vector( float x, float y, float z ) + { + Create( 3 ); + elem[0] = x; + elem[1] = y; + elem[2] = z; + } + + void Vector::SetSize( int new_size ) + { + if( size != new_size ) + { + delete[] elem; + Create( new_size ); + for( register int i = 0; i < new_size; i++ ) elem[i] = 0.0; + } + } + + Vector &Vector::Swap( int i, int j ) + { + float temp = elem[i]; + elem[i] = elem[j]; + elem[j] = temp; + return *this; + } + + Vector Vector::GetBlock( int i, int j ) const + { + assert( 0 <= i && i <= j && j < size ); + int n = j - i + 1; + Vector V( n ); + register float *v = V.Array(); + register float *e = elem + i; + for( register int k = 0; k < n; k++ ) *v++ = *e++; + return V; + } + + void Vector::SetBlock( int i, int j, const Vector &V ) + { + assert( 0 <= i && i <= j && j < size ); + int n = j - i + 1; + assert( n == V.Size() ); + register float *v = V.Array(); + register float *e = elem + i; + for( register int k = 0; k < n; k++ ) *e++ = *v++; + } + + /*-------------------------------------------------------------------------* + * * + * O P E R A T O R S * + * * + *-------------------------------------------------------------------------*/ + double operator*( const Vector &A, const Vector &B ) + { + assert( A.Size() == B.Size() ); + double sum = A(0) * B(0); + for( register int i = 1; i < A.Size(); i++ ) sum += A(i) * B(i); + return sum; + } + + void Vector::operator=( float c ) + { + for( register int i = 0; i < size; i++ ) elem[i] = c; + } + + Vector operator*( const Vector &A, float s ) + { + Vector C( A.Size() ); + for( register int i = 0; i < A.Size(); i++ ) C(i) = A(i) * s; + return C; + } + + Vector operator*( float s, const Vector &A ) + { + Vector C( A.Size() ); + for( register int i = 0; i < A.Size(); i++ ) C(i) = A(i) * s; + return C; + } + + Vector operator/( const Vector &A, float s ) + { + assert( s != 0.0 ); + Vector C( A.Size() ); + for( register int i = 0; i < A.Size(); i++ ) C(i) = A(i) / s; + return C; + } + + Vector& operator+=( Vector &A, const Vector &B ) + { + assert( A.Size() == B.Size() ); + for( register int i = 0; i < A.Size(); i++ ) A(i) += B(i); + return A; + } + + Vector& operator*=( Vector &A, float scale ) + { + for( register int i = 0; i < A.Size(); i++ ) A(i) *= scale; + return A; + } + + Vector& operator/=( Vector &A, float scale ) + { + for( register int i = 0; i < A.Size(); i++ ) A(i) /= scale; + return A; + } + + Vector& Vector::operator=( const Vector &A ) + { + SetSize( A.Size() ); + for( register int i = 0; i < size; i++ ) elem[i] = A(i); + return *this; + } + + Vector operator+( const Vector &A, const Vector &B ) + { + assert( A.Size() == B.Size() ); + Vector C( A.Size() ); + for( register int i = 0; i < A.Size(); i++ ) C(i) = A(i) + B(i); + return C; + } + + Vector operator-( const Vector &A, const Vector &B ) + { + assert( A.Size() == B.Size() ); + Vector C( A.Size() ); + for( register int i = 0; i < A.Size(); i++ ) C(i) = A(i) - B(i); + return C; + } + + Vector operator-( const Vector &A ) // Unary minus. + { + Vector B( A.Size() ); + for( register int i = 0; i < A.Size(); i++ ) B(i) = -A(i); + return B; + } + + Vector operator^( const Vector &A, const Vector &B ) + { + Vector C(3); + assert( A.Size() == B.Size() ); + if( A.Size() == 2 ) // Assume z components of A and B are zero. + { + C(0) = 0.0; + C(1) = 0.0; + C(2) = A(0) * B(1) - A(1) * B(0); + } + else + { + assert( A.Size() == 3 ); + C(0) = A(1) * B(2) - A(2) * B(1); + C(1) = A(2) * B(0) - A(0) * B(2); + C(2) = A(0) * B(1) - A(1) * B(0); + } + return C; + } + + /*-------------------------------------------------------------------------* + * * + * M I S C E L L A N E O U S F U N C T I O N S * + * * + *-------------------------------------------------------------------------*/ + Vector Min( const Vector &A, const Vector &B ) + { + assert( A.Size() == B.Size() ); + Vector C( A.Size() ); + for( register int i = 0; i < A.Size(); i++ ) C(i) = Min( A(i), B(i) ); + return C; + } + + Vector Max( const Vector &A, const Vector &B ) + { + assert( A.Size() == B.Size() ); + Vector C( A.Size() ); + for( register int i = 0; i < A.Size(); i++ ) C(i) = Max( A(i), B(i) ); + return C; + } + + Vector Unit( const Vector &A ) + { + double norm = TwoNorm( A ); + assert( norm > 0.0 ); + return A * ( 1.0 / norm ); + } + + double Normalize( Vector &A ) + { + double norm = TwoNorm( A ); + assert( norm > 0.0 ); + for( register int i = 0; i < A.Size(); i++ ) A(i) /= norm; + return norm; + } + + int Null( const Vector &A ) + { + return A.Size() == 0; + } + + double TwoNormSqr( const Vector &A ) + { + double sum = A(0) * A(0); + for( register int i = 1; i < A.Size(); i++ ) sum += A(i) * A(i); + return sum; + } + + double TwoNorm( const Vector &A ) + { + return sqrt( TwoNormSqr( A ) ); + } + + double dist( const Vector &A, const Vector &B ) + { + return TwoNorm( A - B ); + } + + double OneNorm( const Vector &A ) + { + double norm = Abs( A(0) ); + for( register int i = 1; i < A.Size(); i++ ) norm += Abs( A(i) ); + return norm; + } + + double SupNorm( const Vector &A ) + { + double norm = Abs( A(0) ); + for( register int i = 1; i < A.Size(); i++ ) + { + double a = Abs( A(i) ); + if( a > norm ) norm = a; + } + return norm; + } + + Vec2 ToVec2( const Vector &V ) + { + assert( V.Size() == 2 ); + return Vec2( V(0), V(1) ); + } + + Vec3 ToVec3( const Vector &V ) + { + assert( V.Size() == 3 ); + return Vec3( V(0), V(1), V(2) ); + } + + Vector ToVector( const Vec2 &V ) + { + return Vector( V.X(), V.Y() ); + } + + Vector ToVector( const Vec3 &V ) + { + return Vector( V.X(), V.Y(), V.Z() ); + } + + // + // Returns a vector that is orthogonal to A (but of arbitrary length). + // + Vector OrthogonalTo( const Vector &A ) + { + Vector B( A.Size() ); + double c = 0.5 * SupNorm( A ); + + if( A.Size() < 2 ) + { + // Just return the zero-vector. + } + else if( c == 0.0 ) + { + B(0) = 1.0; + } + else for( register int i = 0; i < A.Size(); i++ ) + { + if( Abs( A(i)) > c ) + { + int k = ( i > 0 ) ? i - 1 : i + 1; + B(k) = -A(i); + B(i) = A(k); + break; + } + } + return B; + } + + std::ostream &operator<<( std::ostream &out, const Vector &A ) + { + if( A.Size() == 0 ) + { + out << "NULL"; + } + else for( register int i = 0; i < A.Size(); i++ ) + { + out << form( "%3d: %10.5g\n", i, A(i) ); + } + out << std::endl; + return out; + } + + +}; diff --git a/src/nvtt/bc7/arvo/Vector.h b/src/nvtt/bc7/arvo/Vector.h new file mode 100644 index 0000000..01e66df --- /dev/null +++ b/src/nvtt/bc7/arvo/Vector.h @@ -0,0 +1,103 @@ +/*************************************************************************** +* Vector.h * +* * +* General Vector and Matrix classes, with all the associated methods. * +* * +* HISTORY * +* Name Date Description * +* * +* arvo 08/16/2000 Revamped for CIT tools. * +* arvo 10/31/1994 Combined RowVec & ColVec into Vector. * +* arvo 06/30/1993 Added singular value decomposition class. * +* arvo 06/25/1993 Major revisions. * +* arvo 09/08/1991 Initial implementation. * +* * +*--------------------------------------------------------------------------* +* Copyright (C) 2000, James Arvo * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU General Public License as published by the * +* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * +* * +* This program is distributed in the hope that it will be useful, but * +* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * +* any particular purpose. See the GNU General Public License for more * +* details. * +* * +***************************************************************************/ +#ifndef __VECTOR_INCLUDED__ +#define __VECTOR_INCLUDED__ + +#include +#include "Vec2.h" +#include "Vec3.h" + +namespace ArvoMath { + class Vector { + public: + Vector( int size = 0 ); + Vector( const Vector & ); + Vector( float, float ); + Vector( float, float, float ); + Vector( const float *x, int n ); + Vector &operator=( const Vector & ); + void operator=( float ); + void SetSize( int ); + Vector &Swap( int i, int j ); + Vector GetBlock( int i, int j ) const; + void SetBlock( int i, int j, const Vector & ); + static const Vector Null; + + public: // Inlined functions. + inline float operator()( int i ) const { return elem[i]; } + inline float& operator()( int i ) { return elem[i]; } + inline float* Array() const { return elem; } + inline int Size () const { return size; } + inline ~Vector() { delete[] elem; } + + private: + void Create( int n = 0 ) { size = n; elem = new float[n]; } + int size; + float* elem; + }; + + extern Vector operator + ( const Vector &, const Vector & ); + extern Vector operator - ( const Vector &, const Vector & ); // Binary minus. + extern Vector operator - ( const Vector & ); // Unary minus. + extern Vector operator * ( const Vector &, float ); + extern Vector operator * ( float , const Vector & ); + extern Vector operator / ( const Vector &, float ); + extern Vector operator / ( const Vector &, const Vector & ); + extern Vector operator ^ ( const Vector &, const Vector & ); + extern Vector& operator += ( Vector &, const Vector & ); + extern Vector& operator *= ( Vector &, float ); + extern Vector& operator /= ( Vector &, float ); + extern Vector Min ( const Vector &, const Vector & ); + extern Vector Max ( const Vector &, const Vector & ); + extern double operator * ( const Vector &, const Vector & ); // Inner product. + extern double dist ( const Vector &, const Vector & ); + extern Vector OrthogonalTo( const Vector & ); // Returns some orthogonal vector. + extern Vector Unit ( const Vector & ); + extern double Normalize ( Vector & ); + extern double OneNorm ( const Vector & ); + extern double TwoNorm ( const Vector & ); + extern double TwoNormSqr ( const Vector & ); + extern double SupNorm ( const Vector & ); + extern int Null ( const Vector & ); + extern Vec2 ToVec2 ( const Vector & ); + extern Vec3 ToVec3 ( const Vector & ); + extern Vector ToVector ( const Vec2 & ); + extern Vector ToVector ( const Vec3 & ); + + std::ostream &operator<<( + std::ostream &out, + const Vector & + ); +}; +#endif + + + + + + diff --git a/src/nvtt/bc7/arvo/form.h b/src/nvtt/bc7/arvo/form.h new file mode 100644 index 0000000..48aef94 --- /dev/null +++ b/src/nvtt/bc7/arvo/form.h @@ -0,0 +1,26 @@ +#ifndef __FORM_INCLUDED__ +#define __FORM_INCLUDED__ + +#include +#include +#include +#include + +namespace ArvoMath { + + inline const char *form(char *fmt, ...) + { + static char printbfr[65536]; + va_list arglist; + + va_start(arglist,fmt); + int length = vsprintf(printbfr,fmt,arglist); + va_end(arglist); + + assert(length > 65536); + + return printbfr; + } +}; + +#endif diff --git a/src/nvtt/bc7/avpcl.cpp b/src/nvtt/bc7/avpcl.cpp new file mode 100644 index 0000000..6cbb972 --- /dev/null +++ b/src/nvtt/bc7/avpcl.cpp @@ -0,0 +1,263 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +// the avpcl compressor and decompressor + +#include +#include +#include +#include +#include + +#include "ImfArray.h" +#include "RGBA.h" +#include "tile.h" +#include "avpcl.h" +#include "targa.h" + +#ifndef MIN +#define MIN(x,y) ((x)<(y)?(x):(y)) +#endif + +using namespace std; + +void AVPCL::compress(const Tile &t, char *block, FILE *errfile) +{ + char tempblock[AVPCL::BLOCKSIZE]; + double msebest = DBL_MAX; + + double mse_mode0 = AVPCL::compress_mode0(t, tempblock); if(mse_mode0 < msebest) { msebest = mse_mode0; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } + double mse_mode1 = AVPCL::compress_mode1(t, tempblock); if(mse_mode1 < msebest) { msebest = mse_mode1; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } + double mse_mode2 = AVPCL::compress_mode2(t, tempblock); if(mse_mode2 < msebest) { msebest = mse_mode2; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } + double mse_mode3 = AVPCL::compress_mode3(t, tempblock); if(mse_mode3 < msebest) { msebest = mse_mode3; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } + double mse_mode4 = AVPCL::compress_mode4(t, tempblock); if(mse_mode4 < msebest) { msebest = mse_mode4; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } + double mse_mode5 = AVPCL::compress_mode5(t, tempblock); if(mse_mode5 < msebest) { msebest = mse_mode5; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } + double mse_mode6 = AVPCL::compress_mode6(t, tempblock); if(mse_mode6 < msebest) { msebest = mse_mode6; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } + double mse_mode7 = AVPCL::compress_mode7(t, tempblock); if(mse_mode7 < msebest) { msebest = mse_mode7; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } + + if (errfile) + { + double errs[21]; + int nerrs = 8; + errs[0] = mse_mode0; + errs[1] = mse_mode1; + errs[2] = mse_mode2; + errs[3] = mse_mode3; + errs[4] = mse_mode4; + errs[5] = mse_mode5; + errs[6] = mse_mode6; + errs[7] = mse_mode7; + if (fwrite(errs, sizeof(double), nerrs, errfile) != nerrs) + throw "Write error on error file"; + } +} + +static int getbit(char *b, int start) +{ + if (start < 0 || start >= 128) return 0; // out of range + + int ix = start >> 3; + return (b[ix] & (1 << (start & 7))) != 0; +} + +static int getbits(char *b, int start, int len) +{ + int out = 0; + for (int i=0; i= 128) return; // out of range + + int ix = start >> 3; + + if (bit & 1) + b[ix] |= (1 << (start & 7)); + else + b[ix] &= ~(1 << (start & 7)); +} + +static void setbits(char *b, int start, int len, int bits) +{ + for (int i=0; i> i); +} + +void AVPCL::decompress(const char *cblock, Tile &t) +{ + Vec4 zero(0); + + char block[16]; + + for (int i=0; i<16; ++i) block[i] = cblock[i]; + + switch(getmode(block)) + { + case 0: AVPCL::decompress_mode0(block, t); break; + case 1: AVPCL::decompress_mode1(block, t); break; + case 2: AVPCL::decompress_mode2(block, t); break; + case 3: AVPCL::decompress_mode3(block, t); break; + case 4: AVPCL::decompress_mode4(block, t); break; + case 5: AVPCL::decompress_mode5(block, t); break; + case 6: AVPCL::decompress_mode6(block, t); break; + case 7: AVPCL::decompress_mode7(block, t); break; + case 8: // return a black tile if you get a reserved mode + for (int y=0; y pixels; + int w, h; + char block[AVPCL::BLOCKSIZE]; + + Targa::read(inf, pixels, w, h); + FILE *avpclfile = fopen(avpclf.c_str(), "wb"); + if (avpclfile == NULL) throw "Unable to open .avpcl file for write"; + FILE *errfile = NULL; + if (errf != "") + { + errfile = fopen(errf.c_str(), "wb"); + if (errfile == NULL) throw "Unable to open error file for write"; + } + + // Look at alpha channel and override the premult flag if alpha is constant (but only if premult is set) + if (AVPCL::flag_premult) + { + if (AVPCL::mode_rgb) + { + AVPCL::flag_premult = false; + cout << endl << "NOTE: Source image alpha is constant 255, turning off premultiplied-alpha error metric." << endl << endl; + } + } + + // stuff for progress bar O.o + int ntiles = ((h+Tile::TILE_H-1)/Tile::TILE_H)*((w+Tile::TILE_W-1)/Tile::TILE_W); + int tilecnt = 0; + clock_t start, prev, cur; + + start = prev = clock(); + + // convert to tiles and compress each tile + for (int y=0; y> thing; + return thing; +} + +// avpcl file name is ...-w-h-RGB[A].avpcl, extract width and height +static void extract(string avpclf, int &w, int &h, bool &mode_rgb) +{ + size_t n = avpclf.rfind('.', avpclf.length()-1); + size_t n1 = avpclf.rfind('-', n-1); + size_t n2 = avpclf.rfind('-', n1-1); + size_t n3 = avpclf.rfind('-', n2-1); + // ...-wwww-hhhh-RGB[A].avpcl + // ^ ^ ^ ^ + // n3 n2 n1 n n3 pixels; + int w, h; + char block[AVPCL::BLOCKSIZE]; + + extract(avpclf, w, h, AVPCL::mode_rgb); + FILE *avpclfile = fopen(avpclf.c_str(), "rb"); + if (avpclfile == NULL) throw "Unable to open .avpcl file for read"; + pixels.resizeErase(h, w); + + // convert to tiles and decompress each tile + for (int y=0; y +#include + +#include "tile.h" +#include "bits.h" + +using namespace std; + +#define EXTERNAL_RELEASE 1 // define this if we're releasing this code externally +#define DISABLE_EXHAUSTIVE 1 // define this if you don't want to spend a lot of time on exhaustive compression +#define USE_ZOH_INTERP 1 // use zoh interpolator, otherwise use exact avpcl interpolators +#define USE_ZOH_INTERP_ROUNDED 1 // use the rounded versions! + +#define NREGIONS_TWO 2 +#define NREGIONS_THREE 3 +#define DBL_MAX (1.0e37) // doesn't have to be really dblmax, just bigger than any possible squared error + +class AVPCL +{ +public: + static const int BLOCKSIZE=16; + static const int BITSIZE=128; + + // global flags + static bool flag_premult; + static bool flag_nonuniform; + static bool flag_nonuniform_ati; + + // global mode + static bool mode_rgb; // true if image had constant alpha = 255 + + static void compress(string inf, string zohf, string errf); + static void decompress(string zohf, string outf); + static void compress(const Tile &t, char *block, FILE *errfile); + static void decompress(const char *block, Tile &t); + + static double compress_mode0(const Tile &t, char *block); + static void decompress_mode0(const char *block, Tile &t); + + static double compress_mode1(const Tile &t, char *block); + static void decompress_mode1(const char *block, Tile &t); + + static double compress_mode2(const Tile &t, char *block); + static void decompress_mode2(const char *block, Tile &t); + + static double compress_mode3(const Tile &t, char *block); + static void decompress_mode3(const char *block, Tile &t); + + static double compress_mode4(const Tile &t, char *block); + static void decompress_mode4(const char *block, Tile &t); + + static double compress_mode5(const Tile &t, char *block); + static void decompress_mode5(const char *block, Tile &t); + + static double compress_mode6(const Tile &t, char *block); + static void decompress_mode6(const char *block, Tile &t); + + static double compress_mode7(const Tile &t, char *block); + static void decompress_mode7(const char *block, Tile &t); + + static int getmode(Bits &in) + { + int mode = 0; + + if (in.read(1)) mode = 0; + else if (in.read(1)) mode = 1; + else if (in.read(1)) mode = 2; + else if (in.read(1)) mode = 3; + else if (in.read(1)) mode = 4; + else if (in.read(1)) mode = 5; + else if (in.read(1)) mode = 6; + else if (in.read(1)) mode = 7; + else mode = 8; // reserved + return mode; + } + static int getmode(const char *block) + { + int bits = block[0], mode = 0; + + if (bits & 1) mode = 0; + else if ((bits&3) == 2) mode = 1; + else if ((bits&7) == 4) mode = 2; + else if ((bits & 0xF) == 8) mode = 3; + else if ((bits & 0x1F) == 16) mode = 4; + else if ((bits & 0x3F) == 32) mode = 5; + else if ((bits & 0x7F) == 64) mode = 6; + else if ((bits & 0xFF) == 128) mode = 7; + else mode = 8; // reserved + return mode; + } +}; +#endif \ No newline at end of file diff --git a/src/nvtt/bc7/avpcl.sln b/src/nvtt/bc7/avpcl.sln new file mode 100644 index 0000000..395b1ce --- /dev/null +++ b/src/nvtt/bc7/avpcl.sln @@ -0,0 +1,21 @@ +Microsoft Visual Studio Solution File, Format Version 8.00 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "avpcl", "avpcl.vcproj", "{C6F6CD96-D0C0-4A35-B1BC-53E0A3CB712F}" + ProjectSection(ProjectDependencies) = postProject + EndProjectSection +EndProject +Global + GlobalSection(SolutionConfiguration) = preSolution + Debug = Debug + Release = Release + EndGlobalSection + GlobalSection(ProjectConfiguration) = postSolution + {C6F6CD96-D0C0-4A35-B1BC-53E0A3CB712F}.Debug.ActiveCfg = Debug|Win32 + {C6F6CD96-D0C0-4A35-B1BC-53E0A3CB712F}.Debug.Build.0 = Debug|Win32 + {C6F6CD96-D0C0-4A35-B1BC-53E0A3CB712F}.Release.ActiveCfg = Release|Win32 + {C6F6CD96-D0C0-4A35-B1BC-53E0A3CB712F}.Release.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + EndGlobalSection + GlobalSection(ExtensibilityAddIns) = postSolution + EndGlobalSection +EndGlobal diff --git a/src/nvtt/bc7/avpcl.vcproj b/src/nvtt/bc7/avpcl.vcproj new file mode 100644 index 0000000..4857f78 --- /dev/null +++ b/src/nvtt/bc7/avpcl.vcproj @@ -0,0 +1,314 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/nvtt/bc7/avpcl_mode0.cpp b/src/nvtt/bc7/avpcl_mode0.cpp new file mode 100644 index 0000000..7583b70 --- /dev/null +++ b/src/nvtt/bc7/avpcl_mode0.cpp @@ -0,0 +1,1068 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis + +// x1 444.1x6 16p 45b (3bi) + +#include "bits.h" +#include "tile.h" +#include "avpcl.h" +#include "arvo/Vec4.h" +#include "arvo/Matrix.h" +#include "arvo/SVD.h" +#include "utils.h" +#include "endpts.h" + +#include + +#include "shapes_three.h" + +// use only the first 16 available shapes +#undef NSHAPES +#undef SHAPEBITS +#define NSHAPES 16 +#define SHAPEBITS 4 + +using namespace ArvoMath; + +#define NLSBMODES 4 // number of different lsb modes per region. since we have two .1 per region, that can have 4 values + +#define NINDICES 8 +#define INDEXBITS 3 +#define HIGH_INDEXBIT (1<<(INDEXBITS-1)) +#define DENOM (NINDICES-1) +#define BIAS (DENOM/2) + +// WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like? +// i.e. can we search shapes in a particular order so we can see the global error minima easily and +// stop without having to touch all shapes? + +#define POS_TO_X(pos) ((pos)&3) +#define POS_TO_Y(pos) (((pos)>>2)&3) + +#define NBITSIZES (NREGIONS*2) +#define ABITINDEX(region) (2*(region)+0) +#define BBITINDEX(region) (2*(region)+1) + +struct ChanBits +{ + int nbitsizes[NBITSIZES]; // bitsizes for one channel +}; + +struct Pattern +{ + ChanBits chan[NCHANNELS_RGB];// bit patterns used per channel + int transformed; // if 0, deltas are unsigned and no transform; otherwise, signed and transformed + int mode; // associated mode value + int modebits; // number of mode bits + char *encoding; // verilog description of encoding for this mode +}; + +#define NPATTERNS 1 + +static Pattern patterns[NPATTERNS] = +{ + // red green blue xfm mode mb + 4,4,4,4,4,4, 4,4,4,4,4,4, 4,4,4,4,4,4, 0, 0x1, 1, "", // really 444.1 x 6 +}; + +struct RegionPrec +{ + int endpt_a_prec[NCHANNELS_RGB]; + int endpt_b_prec[NCHANNELS_RGB]; +}; + +struct PatternPrec +{ + RegionPrec region_precs[NREGIONS]; +}; + +// this is the precision for each channel and region +// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO assert to check this! +static PatternPrec pattern_precs[NPATTERNS] = +{ + 4,4,4, 4,4,4, 4,4,4, 4,4,4, 4,4,4, 4,4,4, +}; + +// return # of bits needed to store n. handle signed or unsigned cases properly +static int nbits(int n, bool issigned) +{ + int nb; + if (n==0) + return 0; // no bits needed for 0, signed or not + else if (n > 0) + { + for (nb=0; n; ++nb, n>>=1) ; + return nb + (issigned?1:0); + } + else + { + assert (issigned); + for (nb=0; n<-1; ++nb, n>>=1) ; + return nb + 1; + } +} + +static void transform_forward(IntEndptsRGB_2 ep[NREGIONS]) +{ + assert(0); +} + +static void transform_inverse(IntEndptsRGB_2 ep[NREGIONS]) +{ + assert(0); +} + +// endpoints are 555,555; reduce to 444,444 and put the lsb bit majority in compr_bits +static void compress_one(const IntEndptsRGB& endpts, IntEndptsRGB_2& compr_endpts) +{ + int onescnt; + + onescnt = 0; + for (int j=0; j> 1; + assert (compr_endpts.A[j] < 16); + } + compr_endpts.a_lsb = onescnt >= 2; + + onescnt = 0; + for (int j=0; j> 1; + assert (compr_endpts.B[j] < 16); + } + compr_endpts.b_lsb = onescnt >= 2; +} + +static void uncompress_one(const IntEndptsRGB_2& compr_endpts, IntEndptsRGB& endpts) +{ + for (int j=0; j= 0 && pat_index < NPATTERNS); + assert (in.getptr() == patterns[pat_index].modebits); + + shapeindex = in.read(SHAPEBITS); + p = patterns[pat_index]; + + for (int j=0; j 0; ++j) + { + err = Utils::metric4(colors[i], palette[j]); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[i] = j; + } + } + toterr += besterr; + + // check for early exit + if (toterr > current_err) + { + // fill out bogus index values so it's initialized at least + for (int k = i; k < np; ++k) + indices[k] = -1; + + return DBL_MAX; + } + } + return toterr; +} + +// assign indices given a tile, shape, and quantized endpoints, return toterr for each region +static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endpts[NREGIONS], const PatternPrec &pattern_prec, + int indices[Tile::TILE_H][Tile::TILE_W], double toterr[NREGIONS]) +{ + // build list of possibles + Vec4 palette[NREGIONS][NINDICES]; + + for (int region = 0; region < NREGIONS; ++region) + { + generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]); + toterr[region] = 0; + } + + Vec4 err; + + for (int y = 0; y < tile.size_y; y++) + for (int x = 0; x < tile.size_x; x++) + { + int region = REGION(x,y,shapeindex); + double err, besterr = DBL_MAX; + + for (int i = 0; i < NINDICES && besterr > 0; ++i) + { + err = Utils::metric4(tile.data[y][x], palette[region][i]); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[y][x] = i; + } + } + toterr[region] += besterr; + } +} + +// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's +// this function returns either old_err or a value smaller (if it was successful in improving the error) +static double perturb_one(const Vec4 colors[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGB_2 &old_endpts, IntEndptsRGB_2 &new_endpts, + double old_err, int do_b, int indices[Tile::TILE_TOTAL]) +{ + // we have the old endpoints: old_endpts + // we have the perturbed endpoints: new_endpts + // we have the temporary endpoints: temp_endpts + + IntEndptsRGB_2 temp_endpts; + float min_err = old_err; // start with the best current error + int beststep; + int temp_indices[Tile::TILE_TOTAL]; + + for (int i=0; i>= 1) + { + bool improved = false; + for (int sign = -1; sign <= 1; sign += 2) + { + if (do_b == 0) + { + temp_endpts.A[ch] = new_endpts.A[ch] + sign * step; + if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec)) + continue; + } + else + { + temp_endpts.B[ch] = new_endpts.B[ch] + sign * step; + if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec)) + continue; + } + + float err = map_colors(colors, np, temp_endpts, region_prec, min_err, temp_indices); + + if (err < min_err) + { + improved = true; + min_err = err; + beststep = sign * step; + for (int i=0; i 5000 perturb endpoints 50% of precision +// if err > 1000 25% +// if err > 200 12.5% +// if err > 40 6.25% +// for np = 16 -- adjust error thresholds as a function of np +// always ensure endpoint ordering is preserved (no need to overlap the scan) +// if orig_err returned from this is less than its input value, then indices[] will contain valid indices +static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec ®ion_prec, double &orig_err, IntEndptsRGB_2 &opt_endpts, int indices[Tile::TILE_TOTAL]) +{ + IntEndptsRGB_2 temp_endpts; + double best_err = orig_err; + int aprec = region_prec.endpt_a_prec[ch]; + int bprec = region_prec.endpt_b_prec[ch]; + int good_indices[Tile::TILE_TOTAL]; + int temp_indices[Tile::TILE_TOTAL]; + + for (int i=0; i 5000.0*thr_scale) { adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; } + else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } + else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } + else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } + adelta = MAX(adelta, 3); + bdelta = MAX(bdelta, 3); + +#ifdef DISABLE_EXHAUSTIVE + adelta = bdelta = 3; +#endif + + temp_endpts = opt_endpts; + + // ok figure out the range of A and B + int alow = MAX(0, opt_endpts.A[ch] - adelta); + int ahigh = MIN((1<= initial_error) break + rgb0 += delta0 + next = 1 + else + if (err1 >= initial_error) break + rgb1 += delta1 + next = 0 + initial_err = map() + for (;;) + err = perturb(next ? rgb1:rgb0, delta) + if (err >= initial_err) break + next? rgb1 : rgb0 += delta + initial_err = err + */ + IntEndptsRGB_2 new_a, new_b; + IntEndptsRGB_2 new_endpt; + int do_b; + int orig_indices[Tile::TILE_TOTAL]; + int new_indices[Tile::TILE_TOTAL]; + int temp_indices0[Tile::TILE_TOTAL]; + int temp_indices1[Tile::TILE_TOTAL]; + + // now optimize each channel separately + // for the first error improvement, we save the indices. then, for any later improvement, we compare the indices + // if they differ, we restart the loop (which then falls back to looking for a first improvement.) + for (int ch = 0; ch < NCHANNELS_RGB; ++ch) + { + // figure out which endpoint when perturbed gives the most improvement and start there + // if we just alternate, we can easily end up in a local minima + float err0 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A + float err1 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B + + if (err0 < err1) + { + if (err0 >= opt_err) + continue; + + for (int i=0; i= opt_err) + continue; + + for (int i=0; i= opt_err) + break; + + for (int i=0; i> 1) & 1; + + // make sure we have a valid error for temp_in + // we use DBL_MAX here because we want an accurate temp_in_err, no shortcuts + // (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the DBL_MAX position) + double temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], DBL_MAX, temp_indices); + + // now try to optimize these endpoints + double temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out); + + // if we find an improvement, update the best so far and correct the output endpoints and errors + if (temp_out_err < best_err) + { + best_err = temp_out_err; + opt_err[region] = temp_out_err; + opt_endpts[region] = temp_out; + } + } + } +} + +/* optimization algorithm + for each pattern + convert endpoints using pattern precision + assign indices and get initial error + compress indices (and possibly reorder endpoints) + transform endpoints + if transformed endpoints fit pattern + get original endpoints back + optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better + compress new indices + transform new endpoints + if new endpoints fit pattern AND if error is improved + emit compressed block with new data + else + emit compressed block with original data // to try to preserve maximum endpoint precision +*/ + +static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block) +{ + double orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS]; + IntEndptsRGB_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS]; + int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W]; + + for (int sp = 0; sp < NPATTERNS; ++sp) + { + quantize_endpts(endpts, pattern_precs[sp], orig_endpts); + assign_indices(tile, shapeindex_best, orig_endpts, pattern_precs[sp], orig_indices, orig_err); + swap_indices(orig_endpts, orig_indices, shapeindex_best); + if (patterns[sp].transformed) + transform_forward(orig_endpts); + // apply a heuristic here -- we check if the endpoints fit before we try to optimize them. + // the assumption made is that if they don't fit now, they won't fit after optimizing. + if (endpts_fit(orig_endpts, patterns[sp])) + { + if (patterns[sp].transformed) + transform_inverse(orig_endpts); + optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts); + assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err); + for (int i=0; i RGBA_MAX) v.X() = RGBA_MAX; + if (v.Y() < RGBA_MIN) v.Y() = RGBA_MIN; + if (v.Y() > RGBA_MAX) v.Y() = RGBA_MAX; + if (v.Z() < RGBA_MIN) v.Z() = RGBA_MIN; + if (v.Z() > RGBA_MAX) v.Z() = RGBA_MAX; + v.W() = RGBA_MAX; +} + +static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vec4 palette[NREGIONS][NINDICES]) +{ + for (int region = 0; region < NREGIONS; ++region) + for (int i = 0; i < NINDICES; ++i) + palette[region][i] = PALETTE_LERP(endpts[region].A, endpts[region].B, i, 0.0, float(DENOM)); +} + +// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined +static double map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS]) +{ + // build list of possibles + Vec4 palette[NREGIONS][NINDICES]; + + generate_palette_unquantized(endpts, palette); + + double toterr = 0; + Vec4 err; + + for (int y = 0; y < tile.size_y; y++) + for (int x = 0; x < tile.size_x; x++) + { + int region = REGION(x,y,shapeindex); + double err, besterr = DBL_MAX; + + for (int i = 0; i < NINDICES && besterr > 0; ++i) + { + err = Utils::metric4(tile.data[y][x], palette[region][i]); + + if (err > besterr) // error increased, so we're done searching. this works for most norms. + break; + if (err < besterr) + besterr = err; + } + toterr += besterr; + } + return toterr; +} + +// for this mode, we assume alpha = 255 constant and compress only the RGB portion. +// however, we do the error check against the actual alpha values supplied for the tile. +static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]) +{ + for (int region=0; region maxp) maxp = dp; + } + + // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values + endpts[region].A = mean + minp*direction; + endpts[region].B = mean + maxp*direction; + + // clamp endpoints + // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best + // shape based on endpoints being clamped + clamp(endpts[region].A); + clamp(endpts[region].B); + } + + return map_colors(tile, shapeindex, endpts); +} + +static void swap(double *list1, int *list2, int i, int j) +{ + double t = list1[i]; list1[i] = list1[j]; list1[j] = t; + int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1; +} + +double AVPCL::compress_mode0(const Tile &t, char *block) +{ + // number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES + // NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out + const int NITEMS=NSHAPES/4; + + // pick the best NITEMS shapes and refine these. + struct { + FltEndpts endpts[NREGIONS]; + } all[NSHAPES]; + double roughmse[NSHAPES]; + int index[NSHAPES]; + char tempblock[AVPCL::BLOCKSIZE]; + double msebest = DBL_MAX; + + for (int i=0; i roughmse[j]) + swap(roughmse, index, i, j); + + for (int i=0; i0; ++i) + { + int shape = index[i]; + double mse = refine(t, shape, &all[shape].endpts[0], tempblock); + if (mse < msebest) + { + memcpy(block, tempblock, sizeof(tempblock)); + msebest = mse; + } + } + return msebest; +} + diff --git a/src/nvtt/bc7/avpcl_mode1.cpp b/src/nvtt/bc7/avpcl_mode1.cpp new file mode 100644 index 0000000..bee9daa --- /dev/null +++ b/src/nvtt/bc7/avpcl_mode1.cpp @@ -0,0 +1,1049 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis + +// x10 (666x2).1 (666x2).1 64p 3bi + +#include "bits.h" +#include "tile.h" +#include "avpcl.h" +#include "arvo/Vec4.h" +#include "arvo/Matrix.h" +#include "arvo/SVD.h" +#include "utils.h" +#include "endpts.h" + +#include + +#include "shapes_two.h" + +using namespace ArvoMath; + +#define NLSBMODES 2 // number of different lsb modes per region. since we have one .1 per region, that can have 2 values + +#define NINDICES 8 +#define INDEXBITS 3 +#define HIGH_INDEXBIT (1<<(INDEXBITS-1)) +#define DENOM (NINDICES-1) +#define BIAS (DENOM/2) + +// WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like? +// i.e. can we search shapes in a particular order so we can see the global error minima easily and +// stop without having to touch all shapes? + +#define POS_TO_X(pos) ((pos)&3) +#define POS_TO_Y(pos) (((pos)>>2)&3) + +#define NBITSIZES (NREGIONS*2) +#define ABITINDEX(region) (2*(region)+0) +#define BBITINDEX(region) (2*(region)+1) + +struct ChanBits +{ + int nbitsizes[NBITSIZES]; // bitsizes for one channel +}; + +struct Pattern +{ + ChanBits chan[NCHANNELS_RGB];// bit patterns used per channel + int transformed; // if 0, deltas are unsigned and no transform; otherwise, signed and transformed + int mode; // associated mode value + int modebits; // number of mode bits + char *encoding; // verilog description of encoding for this mode +}; + +#define NPATTERNS 1 + +static Pattern patterns[NPATTERNS] = +{ + // red green blue xfm mode mb + 6,6,6,6, 6,6,6,6, 6,6,6,6, 0, 0x2, 2, "", +}; + +struct RegionPrec +{ + int endpt_a_prec[NCHANNELS_RGB]; + int endpt_b_prec[NCHANNELS_RGB]; +}; + +struct PatternPrec +{ + RegionPrec region_precs[NREGIONS]; +}; + + +// this is the precision for each channel and region +// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO assert to check this! +static PatternPrec pattern_precs[NPATTERNS] = +{ + 6,6,6, 6,6,6, 6,6,6, 6,6,6, +}; + +// return # of bits needed to store n. handle signed or unsigned cases properly +static int nbits(int n, bool issigned) +{ + int nb; + if (n==0) + return 0; // no bits needed for 0, signed or not + else if (n > 0) + { + for (nb=0; n; ++nb, n>>=1) ; + return nb + (issigned?1:0); + } + else + { + assert (issigned); + for (nb=0; n<-1; ++nb, n>>=1) ; + return nb + 1; + } +} + + +static void transform_forward(IntEndptsRGB_1 ep[NREGIONS]) +{ + assert(0); +} + +static void transform_inverse(IntEndptsRGB_1 ep[NREGIONS]) +{ + assert(0); +} + +// endpoints are 777,777; reduce to 666,666 and put the lsb bit majority in compr_bits +static void compress_one(const IntEndptsRGB& endpts, IntEndptsRGB_1& compr_endpts) +{ + int onescnt; + + onescnt = 0; + for (int j=0; j> 1; + onescnt += endpts.B[j] & 1; + compr_endpts.B[j] = endpts.B[j] >> 1; + assert (compr_endpts.A[j] < 64); + assert (compr_endpts.B[j] < 64); + } + compr_endpts.lsb = onescnt >= 3; +} + +static void uncompress_one(const IntEndptsRGB_1& compr_endpts, IntEndptsRGB& endpts) +{ + for (int j=0; j= 0 && pat_index < NPATTERNS); + assert (in.getptr() == patterns[pat_index].modebits); + + shapeindex = in.read(SHAPEBITS); + p = patterns[pat_index]; + + for (int j=0; j 0; ++j) + { + err = Utils::metric4(colors[i], palette[j]); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[i] = j; + } + } + toterr += besterr; + + // check for early exit + if (toterr > current_err) + { + // fill out bogus index values so it's initialized at least + for (int k = i; k < np; ++k) + indices[k] = -1; + + return DBL_MAX; + } + } + return toterr; +} + +// assign indices given a tile, shape, and quantized endpoints, return toterr for each region +static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_1 endpts[NREGIONS], const PatternPrec &pattern_prec, + int indices[Tile::TILE_H][Tile::TILE_W], double toterr[NREGIONS]) +{ + // build list of possibles + Vec4 palette[NREGIONS][NINDICES]; + + for (int region = 0; region < NREGIONS; ++region) + { + generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]); + toterr[region] = 0; + } + + Vec4 err; + + for (int y = 0; y < tile.size_y; y++) + for (int x = 0; x < tile.size_x; x++) + { + int region = REGION(x,y,shapeindex); + double err, besterr = DBL_MAX; + + for (int i = 0; i < NINDICES && besterr > 0; ++i) + { + err = Utils::metric4(tile.data[y][x], palette[region][i]); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[y][x] = i; + } + } + toterr[region] += besterr; + } +} + +// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's +// this function returns either old_err or a value smaller (if it was successful in improving the error) +static double perturb_one(const Vec4 colors[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGB_1 &old_endpts, IntEndptsRGB_1 &new_endpts, + double old_err, int do_b, int indices[Tile::TILE_TOTAL]) +{ + // we have the old endpoints: old_endpts + // we have the perturbed endpoints: new_endpts + // we have the temporary endpoints: temp_endpts + + IntEndptsRGB_1 temp_endpts; + float min_err = old_err; // start with the best current error + int beststep; + int temp_indices[Tile::TILE_TOTAL]; + + for (int i=0; i>= 1) + { + bool improved = false; + for (int sign = -1; sign <= 1; sign += 2) + { + if (do_b == 0) + { + temp_endpts.A[ch] = new_endpts.A[ch] + sign * step; + if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec)) + continue; + } + else + { + temp_endpts.B[ch] = new_endpts.B[ch] + sign * step; + if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec)) + continue; + } + + float err = map_colors(colors, np, temp_endpts, region_prec, min_err, temp_indices); + + if (err < min_err) + { + improved = true; + min_err = err; + beststep = sign * step; + for (int i=0; i 5000 perturb endpoints 50% of precision +// if err > 1000 25% +// if err > 200 12.5% +// if err > 40 6.25% +// for np = 16 -- adjust error thresholds as a function of np +// always ensure endpoint ordering is preserved (no need to overlap the scan) +// if orig_err returned from this is less than its input value, then indices[] will contain valid indices +static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec ®ion_prec, double orig_err, IntEndptsRGB_1 &opt_endpts, int indices[Tile::TILE_TOTAL]) +{ + IntEndptsRGB_1 temp_endpts; + double best_err = orig_err; + int aprec = region_prec.endpt_a_prec[ch]; + int bprec = region_prec.endpt_b_prec[ch]; + int good_indices[Tile::TILE_TOTAL]; + int temp_indices[Tile::TILE_TOTAL]; + + for (int i=0; i 5000.0*thr_scale) { adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; } + else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } + else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } + else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } + adelta = MAX(adelta, 3); + bdelta = MAX(bdelta, 3); + +#ifdef DISABLE_EXHAUSTIVE + adelta = bdelta = 3; +#endif + + temp_endpts = opt_endpts; + + // ok figure out the range of A and B + int alow = MAX(0, opt_endpts.A[ch] - adelta); + int ahigh = MIN((1<= initial_error) break + rgb0 += delta0 + next = 1 + else + if (err1 >= initial_error) break + rgb1 += delta1 + next = 0 + initial_err = map() + for (;;) + err = perturb(next ? rgb1:rgb0, delta) + if (err >= initial_err) break + next? rgb1 : rgb0 += delta + initial_err = err + */ + IntEndptsRGB_1 new_a, new_b; + IntEndptsRGB_1 new_endpt; + int do_b; + int orig_indices[Tile::TILE_TOTAL]; + int new_indices[Tile::TILE_TOTAL]; + int temp_indices0[Tile::TILE_TOTAL]; + int temp_indices1[Tile::TILE_TOTAL]; + + // now optimize each channel separately + // for the first error improvement, we save the indices. then, for any later improvement, we compare the indices + // if they differ, we restart the loop (which then falls back to looking for a first improvement.) + for (int ch = 0; ch < NCHANNELS_RGB; ++ch) + { + // figure out which endpoint when perturbed gives the most improvement and start there + // if we just alternate, we can easily end up in a local minima + float err0 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A + float err1 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B + + if (err0 < err1) + { + if (err0 >= opt_err) + continue; + + for (int i=0; i= opt_err) + continue; + + for (int i=0; i= opt_err) + break; + + for (int i=0; i RGBA_MAX) v.X() = RGBA_MAX; + if (v.Y() < RGBA_MIN) v.Y() = RGBA_MIN; + if (v.Y() > RGBA_MAX) v.Y() = RGBA_MAX; + if (v.Z() < RGBA_MIN) v.Z() = RGBA_MIN; + if (v.Z() > RGBA_MAX) v.Z() = RGBA_MAX; + v.W() = RGBA_MAX; +} + +static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vec4 palette[NREGIONS][NINDICES]) +{ + for (int region = 0; region < NREGIONS; ++region) + for (int i = 0; i < NINDICES; ++i) + palette[region][i] = PALETTE_LERP(endpts[region].A, endpts[region].B, i, 0.0, float(DENOM)); +} + +// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined +static double map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS]) +{ + // build list of possibles + Vec4 palette[NREGIONS][NINDICES]; + + generate_palette_unquantized(endpts, palette); + + double toterr = 0; + Vec4 err; + + for (int y = 0; y < tile.size_y; y++) + for (int x = 0; x < tile.size_x; x++) + { + int region = REGION(x,y,shapeindex); + double err, besterr = DBL_MAX; + + for (int i = 0; i < NINDICES && besterr > 0; ++i) + { + err = Utils::metric4(tile.data[y][x], palette[region][i]); + + if (err > besterr) // error increased, so we're done searching. this works for most norms. + break; + if (err < besterr) + besterr = err; + } + toterr += besterr; + } + return toterr; +} + +static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]) +{ + for (int region=0; region maxp) maxp = dp; + } + + // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values + endpts[region].A = mean + minp*direction; + endpts[region].B = mean + maxp*direction; + + // clamp endpoints + // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best + // shape based on endpoints being clamped + clamp(endpts[region].A); + clamp(endpts[region].B); + } + + return map_colors(tile, shapeindex, endpts); +} + +static void swap(double *list1, int *list2, int i, int j) +{ + double t = list1[i]; list1[i] = list1[j]; list1[j] = t; + int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1; +} + +double AVPCL::compress_mode1(const Tile &t, char *block) +{ + // number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES + // NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out + const int NITEMS=NSHAPES/4; + + // pick the best NITEMS shapes and refine these. + struct { + FltEndpts endpts[NREGIONS]; + } all[NSHAPES]; + double roughmse[NSHAPES]; + int index[NSHAPES]; + char tempblock[AVPCL::BLOCKSIZE]; + double msebest = DBL_MAX; + + for (int i=0; i roughmse[j]) + swap(roughmse, index, i, j); + + for (int i=0; i0; ++i) + { + int shape = index[i]; + double mse = refine(t, shape, &all[shape].endpts[0], tempblock); + if (mse < msebest) + { + memcpy(block, tempblock, sizeof(tempblock)); + msebest = mse; + } + } + return msebest; +} + diff --git a/src/nvtt/bc7/avpcl_mode2.cpp b/src/nvtt/bc7/avpcl_mode2.cpp new file mode 100644 index 0000000..ef37dbe --- /dev/null +++ b/src/nvtt/bc7/avpcl_mode2.cpp @@ -0,0 +1,1005 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis + +// x100 555x6 64p 2bi + +#include "bits.h" +#include "tile.h" +#include "avpcl.h" +#include "arvo/Vec4.h" +#include "arvo/Matrix.h" +#include "arvo/SVD.h" +#include "utils.h" +#include "endpts.h" + +#include + +#include "shapes_three.h" + +using namespace ArvoMath; + +#define NINDICES 4 +#define INDEXBITS 2 +#define HIGH_INDEXBIT (1<<(INDEXBITS-1)) +#define DENOM (NINDICES-1) +#define BIAS (DENOM/2) + +// WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like? +// i.e. can we search shapes in a particular order so we can see the global error minima easily and +// stop without having to touch all shapes? + +#define POS_TO_X(pos) ((pos)&3) +#define POS_TO_Y(pos) (((pos)>>2)&3) + +#define NBITSIZES 6 + +struct ChanBits +{ + int nbitsizes[NBITSIZES]; // bitsizes for one channel +}; + +struct Pattern +{ + ChanBits chan[NCHANNELS_RGB];// bit patterns used per channel + int transformed; // if 0, deltas are unsigned and no transform; otherwise, signed and transformed + int mode; // associated mode value + int modebits; // number of mode bits + char *encoding; // verilog description of encoding for this mode +}; + +#define NPATTERNS 1 + +static Pattern patterns[NPATTERNS] = +{ + // red green blue xfm mode mb + 5,5,5,5,5,5, 5,5,5,5,5,5, 5,5,5,5,5,5, 0, 0x4, 3, "", +}; + + +struct RegionPrec +{ + int endpt_a_prec[NCHANNELS_RGB]; + int endpt_b_prec[NCHANNELS_RGB]; +}; + +struct PatternPrec +{ + RegionPrec region_precs[NREGIONS_THREE]; +}; + + +// this is the precision for each channel and region +// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO assert to check this! + +static PatternPrec pattern_precs[NPATTERNS] = +{ + 5,5,5, 5,5,5, 5,5,5, 5,5,5, 5,5,5, 5,5,5, +}; + +// return # of bits needed to store n. handle signed or unsigned cases properly +static int nbits(int n, bool issigned) +{ + int nb; + if (n==0) + return 0; // no bits needed for 0, signed or not + else if (n > 0) + { + for (nb=0; n; ++nb, n>>=1) ; + return nb + (issigned?1:0); + } + else + { + assert (issigned); + for (nb=0; n<-1; ++nb, n>>=1) ; + return nb + 1; + } +} + +#define R_0 ep[0].A[i] +#define R_1 ep[0].B[i] +#define R_2 ep[1].A[i] +#define R_3 ep[1].B[i] + +static void transform_forward(IntEndptsRGB ep[NREGIONS]) +{ + for (int i=0; i= 0 && pat_index < NPATTERNS); + assert (in.getptr() == patterns[pat_index].modebits); + + shapeindex = in.read(SHAPEBITS); + + p = patterns[pat_index]; + + for (int j=0; j 0; ++j) + { + err = Utils::metric4(colors[i], palette[j]); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[i] = j; + } + } + toterr += besterr; + + // check for early exit + if (toterr > current_err) + { + // fill out bogus index values so it's initialized at least + for (int k = i; k < np; ++k) + indices[k] = -1; + + return DBL_MAX; + } + } + return toterr; +} + +// assign indices given a tile, shape, and quantized endpoints, return toterr for each region +static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB endpts[NREGIONS_THREE], const PatternPrec &pattern_prec, + int indices[Tile::TILE_H][Tile::TILE_W], double toterr[NREGIONS_THREE]) +{ + // build list of possibles + Vec4 palette[NREGIONS_THREE][NINDICES]; + + for (int region = 0; region < NREGIONS_THREE; ++region) + { + generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]); + toterr[region] = 0; + } + + Vec4 err; + + for (int y = 0; y < tile.size_y; y++) + for (int x = 0; x < tile.size_x; x++) + { + int region = REGION(x,y,shapeindex); + double err, besterr = DBL_MAX; + + for (int i = 0; i < NINDICES && besterr > 0; ++i) + { + err = Utils::metric4(tile.data[y][x], palette[region][i]); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[y][x] = i; + } + } + toterr[region] += besterr; + } +} + +// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's +// this function returns either old_err or a value smaller (if it was successful in improving the error) +static double perturb_one(const Vec4 colors[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGB &old_endpts, IntEndptsRGB &new_endpts, + double old_err, int do_b, int indices[Tile::TILE_TOTAL]) +{ + // we have the old endpoints: old_endpts + // we have the perturbed endpoints: new_endpts + // we have the temporary endpoints: temp_endpts + + IntEndptsRGB temp_endpts; + float min_err = old_err; // start with the best current error + int beststep; + int temp_indices[Tile::TILE_TOTAL]; + + for (int i=0; i>= 1) + { + bool improved = false; + for (int sign = -1; sign <= 1; sign += 2) + { + if (do_b == 0) + { + temp_endpts.A[ch] = new_endpts.A[ch] + sign * step; + if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec)) + continue; + } + else + { + temp_endpts.B[ch] = new_endpts.B[ch] + sign * step; + if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec)) + continue; + } + + float err = map_colors(colors, np, temp_endpts, region_prec, min_err, temp_indices); + + if (err < min_err) + { + improved = true; + min_err = err; + beststep = sign * step; + for (int i=0; i 5000 perturb endpoints 50% of precision +// if err > 1000 25% +// if err > 200 12.5% +// if err > 40 6.25% +// for np = 16 -- adjust error thresholds as a function of np +// always ensure endpoint ordering is preserved (no need to overlap the scan) +// if orig_err returned from this is less than its input value, then indices[] will contain valid indices +static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec ®ion_prec, double orig_err, IntEndptsRGB &opt_endpts, int indices[Tile::TILE_TOTAL]) +{ + IntEndptsRGB temp_endpts; + double best_err = orig_err; + int aprec = region_prec.endpt_a_prec[ch]; + int bprec = region_prec.endpt_b_prec[ch]; + int good_indices[Tile::TILE_TOTAL]; + int temp_indices[Tile::TILE_TOTAL]; + + for (int i=0; i 5000.0*thr_scale) { adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; } + else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } + else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } + else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } + adelta = MAX(adelta, 3); + bdelta = MAX(bdelta, 3); + +#ifdef DISABLE_EXHAUSTIVE + adelta = bdelta = 3; +#endif + + temp_endpts = opt_endpts; + + // ok figure out the range of A and B + int alow = MAX(0, opt_endpts.A[ch] - adelta); + int ahigh = MIN((1<= initial_error) break + rgb0 += delta0 + next = 1 + else + if (err1 >= initial_error) break + rgb1 += delta1 + next = 0 + initial_err = map() + for (;;) + err = perturb(next ? rgb1:rgb0, delta) + if (err >= initial_err) break + next? rgb1 : rgb0 += delta + initial_err = err + */ + IntEndptsRGB new_a, new_b; + IntEndptsRGB new_endpt; + int do_b; + int orig_indices[Tile::TILE_TOTAL]; + int new_indices[Tile::TILE_TOTAL]; + int temp_indices0[Tile::TILE_TOTAL]; + int temp_indices1[Tile::TILE_TOTAL]; + + // now optimize each channel separately + // for the first error improvement, we save the indices. then, for any later improvement, we compare the indices + // if they differ, we restart the loop (which then falls back to looking for a first improvement.) + for (int ch = 0; ch < NCHANNELS_RGB; ++ch) + { + // figure out which endpoint when perturbed gives the most improvement and start there + // if we just alternate, we can easily end up in a local minima + float err0 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A + float err1 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B + + if (err0 < err1) + { + if (err0 >= opt_err) + continue; + + for (int i=0; i= opt_err) + continue; + + for (int i=0; i= opt_err) + break; + + for (int i=0; i RGBA_MAX) v.X() = RGBA_MAX; + if (v.Y() < RGBA_MIN) v.Y() = RGBA_MIN; + if (v.Y() > RGBA_MAX) v.Y() = RGBA_MAX; + if (v.Z() < RGBA_MIN) v.Z() = RGBA_MIN; + if (v.Z() > RGBA_MAX) v.Z() = RGBA_MAX; + v.W() = RGBA_MAX; +} + +static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_THREE], Vec4 palette[NREGIONS_THREE][NINDICES]) +{ + for (int region = 0; region < NREGIONS_THREE; ++region) + for (int i = 0; i < NINDICES; ++i) + palette[region][i] = PALETTE_LERP(endpts[region].A, endpts[region].B, i, 0.0, float(DENOM)); +} + +// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined +static double map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS_THREE]) +{ + // build list of possibles + Vec4 palette[NREGIONS_THREE][NINDICES]; + + generate_palette_unquantized(endpts, palette); + + double toterr = 0; + Vec4 err; + + for (int y = 0; y < tile.size_y; y++) + for (int x = 0; x < tile.size_x; x++) + { + int region = REGION(x,y,shapeindex); + double err, besterr = DBL_MAX; + + for (int i = 0; i < NINDICES && besterr > 0; ++i) + { + err = Utils::metric4(tile.data[y][x], palette[region][i]); + + if (err > besterr) // error increased, so we're done searching. this works for most norms. + break; + if (err < besterr) + besterr = err; + } + toterr += besterr; + } + return toterr; +} + +static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_THREE]) +{ + for (int region=0; region maxp) maxp = dp; + } + + // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values + endpts[region].A = mean + minp*direction; + endpts[region].B = mean + maxp*direction; + + // clamp endpoints + // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best + // shape based on endpoints being clamped + clamp(endpts[region].A); + clamp(endpts[region].B); + } + + return map_colors(tile, shapeindex, endpts); +} + +static void swap(double *list1, int *list2, int i, int j) +{ + double t = list1[i]; list1[i] = list1[j]; list1[j] = t; + int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1; +} + +double AVPCL::compress_mode2(const Tile &t, char *block) +{ + // number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES + // NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out + const int NITEMS=NSHAPES/4; + + // pick the best NITEMS shapes and refine these. + struct { + FltEndpts endpts[NREGIONS_THREE]; + } all[NSHAPES]; + double roughmse[NSHAPES]; + int index[NSHAPES]; + char tempblock[AVPCL::BLOCKSIZE]; + double msebest = DBL_MAX; + + for (int i=0; i roughmse[j]) + swap(roughmse, index, i, j); + + for (int i=0; i0; ++i) + { + int shape = index[i]; + double mse = refine(t, shape, &all[shape].endpts[0], tempblock); + if (mse < msebest) + { + memcpy(block, tempblock, sizeof(tempblock)); + msebest = mse; + } + } + return msebest; +} + diff --git a/src/nvtt/bc7/avpcl_mode3.cpp b/src/nvtt/bc7/avpcl_mode3.cpp new file mode 100644 index 0000000..cf19759 --- /dev/null +++ b/src/nvtt/bc7/avpcl_mode3.cpp @@ -0,0 +1,1061 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis + +// x1000 777.1x4 64p 2bi (30b) + +#include "bits.h" +#include "tile.h" +#include "avpcl.h" +#include "arvo/Vec4.h" +#include "arvo/Matrix.h" +#include "arvo/SVD.h" +#include "utils.h" +#include "endpts.h" + +#include + +#include "shapes_two.h" + +using namespace ArvoMath; + +#define NLSBMODES 4 // number of different lsb modes per region. since we have two .1 per region, that can have 4 values + +#define NINDICES 4 +#define INDEXBITS 2 +#define HIGH_INDEXBIT (1<<(INDEXBITS-1)) +#define DENOM (NINDICES-1) +#define BIAS (DENOM/2) + +// WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like? +// i.e. can we search shapes in a particular order so we can see the global error minima easily and +// stop without having to touch all shapes? + +#define POS_TO_X(pos) ((pos)&3) +#define POS_TO_Y(pos) (((pos)>>2)&3) + +#define NBITSIZES (NREGIONS*2) +#define ABITINDEX(region) (2*(region)+0) +#define BBITINDEX(region) (2*(region)+1) + +struct ChanBits +{ + int nbitsizes[NBITSIZES]; // bitsizes for one channel +}; + +struct Pattern +{ + ChanBits chan[NCHANNELS_RGB];// bit patterns used per channel + int transformed; // if 0, deltas are unsigned and no transform; otherwise, signed and transformed + int mode; // associated mode value + int modebits; // number of mode bits + char *encoding; // verilog description of encoding for this mode +}; + +#define NPATTERNS 1 +#define NREGIONS 2 + +static Pattern patterns[NPATTERNS] = +{ + // red green blue xfm mode mb + 7,7,7,7, 7,7,7,7, 7,7,7,7, 0, 0x8, 4, "", +}; + +struct RegionPrec +{ + int endpt_a_prec[NCHANNELS_RGB]; + int endpt_b_prec[NCHANNELS_RGB]; +}; + +struct PatternPrec +{ + RegionPrec region_precs[NREGIONS]; +}; + + +// this is the precision for each channel and region +// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO assert to check this! +static PatternPrec pattern_precs[NPATTERNS] = +{ + 7,7,7, 7,7,7, 7,7,7, 7,7,7, +}; + +// return # of bits needed to store n. handle signed or unsigned cases properly +static int nbits(int n, bool issigned) +{ + int nb; + if (n==0) + return 0; // no bits needed for 0, signed or not + else if (n > 0) + { + for (nb=0; n; ++nb, n>>=1) ; + return nb + (issigned?1:0); + } + else + { + assert (issigned); + for (nb=0; n<-1; ++nb, n>>=1) ; + return nb + 1; + } +} + +static void transform_forward(IntEndptsRGB_2 ep[NREGIONS]) +{ + assert(0); +} + +static void transform_inverse(IntEndptsRGB_2 ep[NREGIONS]) +{ + assert(0); +} + +// endpoints are 888,888; reduce to 777,777 and put the lsb bit majority in compr_bits +static void compress_one(const IntEndptsRGB& endpts, IntEndptsRGB_2& compr_endpts) +{ + int onescnt; + + onescnt = 0; + for (int j=0; j> 1; + assert (compr_endpts.A[j] < 128); + } + compr_endpts.a_lsb = onescnt >= 2; + + onescnt = 0; + for (int j=0; j> 1; + assert (compr_endpts.B[j] < 128); + } + compr_endpts.b_lsb = onescnt >= 2; +} + +static void uncompress_one(const IntEndptsRGB_2& compr_endpts, IntEndptsRGB& endpts) +{ + for (int j=0; j= 0 && pat_index < NPATTERNS); + assert (in.getptr() == patterns[pat_index].modebits); + + shapeindex = in.read(SHAPEBITS); + p = patterns[pat_index]; + + for (int j=0; j 0; ++j) + { + err = Utils::metric4(colors[i], palette[j]); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[i] = j; + } + } + toterr += besterr; + + // check for early exit + if (toterr > current_err) + { + // fill out bogus index values so it's initialized at least + for (int k = i; k < np; ++k) + indices[k] = -1; + + return DBL_MAX; + } + } + return toterr; +} + +static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endpts[NREGIONS], const PatternPrec &pattern_prec, + int indices[Tile::TILE_H][Tile::TILE_W], double toterr[NREGIONS]) +{ + // build list of possibles + Vec4 palette[NREGIONS][NINDICES]; + + for (int region = 0; region < NREGIONS; ++region) + { + generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]); + toterr[region] = 0; + } + + Vec4 err; + + for (int y = 0; y < tile.size_y; y++) + for (int x = 0; x < tile.size_x; x++) + { + int region = REGION(x,y,shapeindex); + double err, besterr = DBL_MAX; + + for (int i = 0; i < NINDICES && besterr > 0; ++i) + { + err = Utils::metric4(tile.data[y][x], palette[region][i]); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[y][x] = i; + } + } + toterr[region] += besterr; + } +} + +// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's +// this function returns either old_err or a value smaller (if it was successful in improving the error) +static double perturb_one(const Vec4 colors[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGB_2 &old_endpts, IntEndptsRGB_2 &new_endpts, + double old_err, int do_b, int indices[Tile::TILE_TOTAL]) +{ + // we have the old endpoints: old_endpts + // we have the perturbed endpoints: new_endpts + // we have the temporary endpoints: temp_endpts + + IntEndptsRGB_2 temp_endpts; + float min_err = old_err; // start with the best current error + int beststep; + int temp_indices[Tile::TILE_TOTAL]; + + for (int i=0; i>= 1) + { + bool improved = false; + for (int sign = -1; sign <= 1; sign += 2) + { + if (do_b == 0) + { + temp_endpts.A[ch] = new_endpts.A[ch] + sign * step; + if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec)) + continue; + } + else + { + temp_endpts.B[ch] = new_endpts.B[ch] + sign * step; + if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec)) + continue; + } + + float err = map_colors(colors, np, temp_endpts, region_prec, min_err, temp_indices); + + if (err < min_err) + { + improved = true; + min_err = err; + beststep = sign * step; + for (int i=0; i 5000 perturb endpoints 50% of precision +// if err > 1000 25% +// if err > 200 12.5% +// if err > 40 6.25% +// for np = 16 -- adjust error thresholds as a function of np +// always ensure endpoint ordering is preserved (no need to overlap the scan) +// if orig_err returned from this is less than its input value, then indices[] will contain valid indices +static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec ®ion_prec, double &orig_err, IntEndptsRGB_2 &opt_endpts, int indices[Tile::TILE_TOTAL]) +{ + IntEndptsRGB_2 temp_endpts; + double best_err = orig_err; + int aprec = region_prec.endpt_a_prec[ch]; + int bprec = region_prec.endpt_b_prec[ch]; + int good_indices[Tile::TILE_TOTAL]; + int temp_indices[Tile::TILE_TOTAL]; + + for (int i=0; i 5000.0*thr_scale) { adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; } + else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } + else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } + else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } + adelta = MAX(adelta, 3); + bdelta = MAX(bdelta, 3); + +#ifdef DISABLE_EXHAUSTIVE + adelta = bdelta = 3; +#endif + + temp_endpts = opt_endpts; + + // ok figure out the range of A and B + int alow = MAX(0, opt_endpts.A[ch] - adelta); + int ahigh = MIN((1<= initial_error) break + rgb0 += delta0 + next = 1 + else + if (err1 >= initial_error) break + rgb1 += delta1 + next = 0 + initial_err = map() + for (;;) + err = perturb(next ? rgb1:rgb0, delta) + if (err >= initial_err) break + next? rgb1 : rgb0 += delta + initial_err = err + */ + IntEndptsRGB_2 new_a, new_b; + IntEndptsRGB_2 new_endpt; + int do_b; + int orig_indices[Tile::TILE_TOTAL]; + int new_indices[Tile::TILE_TOTAL]; + int temp_indices0[Tile::TILE_TOTAL]; + int temp_indices1[Tile::TILE_TOTAL]; + + // now optimize each channel separately + // for the first error improvement, we save the indices. then, for any later improvement, we compare the indices + // if they differ, we restart the loop (which then falls back to looking for a first improvement.) + for (int ch = 0; ch < NCHANNELS_RGB; ++ch) + { + // figure out which endpoint when perturbed gives the most improvement and start there + // if we just alternate, we can easily end up in a local minima + float err0 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A + float err1 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B + + if (err0 < err1) + { + if (err0 >= opt_err) + continue; + + for (int i=0; i= opt_err) + continue; + + for (int i=0; i= opt_err) + break; + + for (int i=0; i> 1) & 1; + + // make sure we have a valid error for temp_in + // we use DBL_MAX here because we want an accurate temp_in_err, no shortcuts + // (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the DBL_MAX position) + double temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], DBL_MAX, temp_indices); + + // now try to optimize these endpoints + double temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out); + + // if we find an improvement, update the best so far and correct the output endpoints and errors + if (temp_out_err < best_err) + { + best_err = temp_out_err; + opt_err[region] = temp_out_err; + opt_endpts[region] = temp_out; + } + } + } +} + +/* optimization algorithm + for each pattern + convert endpoints using pattern precision + assign indices and get initial error + compress indices (and possibly reorder endpoints) + transform endpoints + if transformed endpoints fit pattern + get original endpoints back + optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better + compress new indices + transform new endpoints + if new endpoints fit pattern AND if error is improved + emit compressed block with new data + else + emit compressed block with original data // to try to preserve maximum endpoint precision +*/ + +static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block) +{ + double orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS]; + IntEndptsRGB_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS]; + int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W]; + + for (int sp = 0; sp < NPATTERNS; ++sp) + { + quantize_endpts(endpts, pattern_precs[sp], orig_endpts); + assign_indices(tile, shapeindex_best, orig_endpts, pattern_precs[sp], orig_indices, orig_err); + swap_indices(orig_endpts, orig_indices, shapeindex_best); + if (patterns[sp].transformed) + transform_forward(orig_endpts); + // apply a heuristic here -- we check if the endpoints fit before we try to optimize them. + // the assumption made is that if they don't fit now, they won't fit after optimizing. + if (endpts_fit(orig_endpts, patterns[sp])) + { + if (patterns[sp].transformed) + transform_inverse(orig_endpts); + optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts); + assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err); + for (int i=0; i RGBA_MAX) v.X() = RGBA_MAX; + if (v.Y() < RGBA_MIN) v.Y() = RGBA_MIN; + if (v.Y() > RGBA_MAX) v.Y() = RGBA_MAX; + if (v.Z() < RGBA_MIN) v.Z() = RGBA_MIN; + if (v.Z() > RGBA_MAX) v.Z() = RGBA_MAX; + v.W() = RGBA_MAX; +} + +static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vec4 palette[NREGIONS][NINDICES]) +{ + for (int region = 0; region < NREGIONS; ++region) + for (int i = 0; i < NINDICES; ++i) + palette[region][i] = PALETTE_LERP(endpts[region].A, endpts[region].B, i, 0.0, float(DENOM)); +} + +// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined +static double map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS]) +{ + // build list of possibles + Vec4 palette[NREGIONS][NINDICES]; + + generate_palette_unquantized(endpts, palette); + + double toterr = 0; + Vec4 err; + + for (int y = 0; y < tile.size_y; y++) + for (int x = 0; x < tile.size_x; x++) + { + int region = REGION(x,y,shapeindex); + double err, besterr = DBL_MAX; + + for (int i = 0; i < NINDICES && besterr > 0; ++i) + { + err = Utils::metric4(tile.data[y][x], palette[region][i]); + + if (err > besterr) // error increased, so we're done searching. this works for most norms. + break; + if (err < besterr) + besterr = err; + } + toterr += besterr; + } + return toterr; +} + +static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]) +{ + for (int region=0; region maxp) maxp = dp; + } + + // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values + endpts[region].A = mean + minp*direction; + endpts[region].B = mean + maxp*direction; + + // clamp endpoints + // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best + // shape based on endpoints being clamped + clamp(endpts[region].A); + clamp(endpts[region].B); + } + + return map_colors(tile, shapeindex, endpts); +} + +static void swap(double *list1, int *list2, int i, int j) +{ + double t = list1[i]; list1[i] = list1[j]; list1[j] = t; + int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1; +} + +double AVPCL::compress_mode3(const Tile &t, char *block) +{ + // number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES + // NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out + const int NITEMS=NSHAPES/4; + + // pick the best NITEMS shapes and refine these. + struct { + FltEndpts endpts[NREGIONS]; + } all[NSHAPES]; + double roughmse[NSHAPES]; + int index[NSHAPES]; + char tempblock[AVPCL::BLOCKSIZE]; + double msebest = DBL_MAX; + + for (int i=0; i roughmse[j]) + swap(roughmse, index, i, j); + + for (int i=0; i0; ++i) + { + int shape = index[i]; + double mse = refine(t, shape, &all[shape].endpts[0], tempblock); + if (mse < msebest) + { + memcpy(block, tempblock, sizeof(tempblock)); + msebest = mse; + } + } + return msebest; +} + diff --git a/src/nvtt/bc7/avpcl_mode4.cpp b/src/nvtt/bc7/avpcl_mode4.cpp new file mode 100644 index 0000000..cd6a5e5 --- /dev/null +++ b/src/nvtt/bc7/avpcl_mode4.cpp @@ -0,0 +1,1220 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis + +// x10000 2r 1i 555x2 6x2 2bi 3bi + +#include "bits.h" +#include "tile.h" +#include "avpcl.h" +#include "arvo/Vec4.h" +#include "arvo/Matrix.h" +#include "arvo/SVD.h" +#include "utils.h" +#include "endpts.h" + +#include + +using namespace ArvoMath; + +// there are 2 index arrays. INDEXMODE selects between the arrays being 2 & 3 bits or 3 & 2 bits +// array 0 is always the RGB array and array 1 is always the A array +#define NINDEXARRAYS 2 +#define INDEXARRAY_RGB 0 +#define INDEXARRAY_A 1 +#define INDEXARRAY_2BITS(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXARRAY_A : INDEXARRAY_RGB) +#define INDEXARRAY_3BITS(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_3BITS) ? INDEXARRAY_A : INDEXARRAY_RGB) + +#define NINDICES3 8 +#define INDEXBITS3 3 +#define HIGH_INDEXBIT3 (1<<(INDEXBITS3-1)) +#define DENOM3 (NINDICES3-1) +#define BIAS3 (DENOM3/2) + +#define NINDICES2 4 +#define INDEXBITS2 2 +#define HIGH_INDEXBIT2 (1<<(INDEXBITS2-1)) +#define DENOM2 (NINDICES2-1) +#define BIAS2 (DENOM2/2) + +#define NINDICES_RGB(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? NINDICES3 : NINDICES2) +#define INDEXBITS_RGB(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXBITS3 : INDEXBITS2) +#define HIGH_INDEXBIT_RGB(indexmode)((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? HIGH_INDEXBIT3 : HIGH_INDEXBIT2) +#define DENOM_RGB(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? DENOM3 : DENOM2) +#define BIAS_RGB(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? BIAS3 : BIAS2) + +#define NINDICES_A(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? NINDICES2 : NINDICES3) +#define INDEXBITS_A(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXBITS2 : INDEXBITS3) +#define HIGH_INDEXBIT_A(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? HIGH_INDEXBIT2 : HIGH_INDEXBIT3) +#define DENOM_A(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? DENOM2 : DENOM3) +#define BIAS_A(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? BIAS2 : BIAS3) + +#define NSHAPES 1 + +static int shapes[NSHAPES] = +{ + 0x0000, +}; + +#define REGION(x,y,shapeindex) ((shapes[shapeindex]&(1<<(15-(x)-4*(y))))!=0) + +#define NREGIONS 1 // keep the region stuff in just in case... + +// encoded index compression location: region 0 is always at 0,0. + +#define NBITSIZES 2 // one endpoint pair + +struct ChanBits +{ + int nbitsizes[NBITSIZES]; // bitsizes for one channel +}; + +struct Pattern +{ + ChanBits chan[NCHANNELS_RGBA];// bit patterns used per channel + int transform_mode; // x0 means alpha channel not transformed, x1 otherwise. 0x rgb not transformed, 1x otherwise. + int mode; // associated mode value + int modebits; // number of mode bits + char *encoding; // verilog description of encoding for this mode +}; + +#define TRANSFORM_MODE_ALPHA 1 +#define TRANSFORM_MODE_RGB 2 + +#define NPATTERNS 1 + +static Pattern patterns[NPATTERNS] = +{ + // red green blue alpha xfm mode mb encoding + 5,5, 5,5, 5,5, 6,6, 0x0, 0x10, 5, "", +}; + +struct RegionPrec +{ + int endpt_a_prec[NCHANNELS_RGBA]; + int endpt_b_prec[NCHANNELS_RGBA]; +}; + +struct PatternPrec +{ + RegionPrec region_precs[NREGIONS]; +}; + +// this is the precision for each channel and region +// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO assert to check this! +static PatternPrec pattern_precs[NPATTERNS] = +{ + 5,5,5,6, 5,5,5,6, +}; + + +// return # of bits needed to store n. handle signed or unsigned cases properly +static int nbits(int n, bool issigned) +{ + int nb; + if (n==0) + return 0; // no bits needed for 0, signed or not + else if (n > 0) + { + for (nb=0; n; ++nb, n>>=1) ; + return nb + (issigned?1:0); + } + else + { + assert (issigned); + for (nb=0; n<-1; ++nb, n>>=1) ; + return nb + 1; + } +} + +#define R_0 ep[0].A[i] +#define R_1 ep[0].B[i] + +static void transform_forward(int transform_mode, IntEndptsRGBA ep[NREGIONS]) +{ + int i; + + if (transform_mode & TRANSFORM_MODE_RGB) + for (i=CHANNEL_R; i> 2) & 3 and x = index & 3 +static void swap_indices(int shapeindex, int indexmode, IntEndptsRGBA endpts[NREGIONS], int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W]) +{ + int index_positions[NREGIONS]; + + index_positions[0] = 0; // since WLOG we have the high bit of the shapes at 0 + + for (int region = 0; region < NREGIONS; ++region) + { + int x = index_positions[region] & 3; + int y = (index_positions[region] >> 2) & 3; + assert(REGION(x,y,shapeindex) == region); // double check the table + + // swap RGB + if (indices[INDEXARRAY_RGB][y][x] & HIGH_INDEXBIT_RGB(indexmode)) + { + // high bit is set, swap the endpts and indices for this region + int t; + for (int i=CHANNEL_R; i<=CHANNEL_B; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; } + + for (int y = 0; y < Tile::TILE_H; y++) + for (int x = 0; x < Tile::TILE_W; x++) + if (REGION(x,y,shapeindex) == region) + indices[INDEXARRAY_RGB][y][x] = NINDICES_RGB(indexmode) - 1 - indices[INDEXARRAY_RGB][y][x]; + } + + // swap A + if (indices[INDEXARRAY_A][y][x] & HIGH_INDEXBIT_A(indexmode)) + { + // high bit is set, swap the endpts and indices for this region + int t; + for (int i=CHANNEL_A; i<=CHANNEL_A; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; } + + for (int y = 0; y < Tile::TILE_H; y++) + for (int x = 0; x < Tile::TILE_W; x++) + if (REGION(x,y,shapeindex) == region) + indices[INDEXARRAY_A][y][x] = NINDICES_A(indexmode) - 1 - indices[INDEXARRAY_A][y][x]; + } + } +} + +static bool endpts_fit(IntEndptsRGBA endpts[NREGIONS], const Pattern &p) +{ + return true; +} + +static void write_header(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, const Pattern &p, int rotatemode, int indexmode, Bits &out) +{ + // ignore shapeindex + out.write(p.mode, p.modebits); + out.write(rotatemode, ROTATEMODE_BITS); + out.write(indexmode, INDEXMODE_BITS); + for (int i=0; i= 0 && pat_index < NPATTERNS); + assert (in.getptr() == patterns[pat_index].modebits); + + p = patterns[pat_index]; + + shapeindex = 0; // we don't have any + + rotatemode = in.read(ROTATEMODE_BITS); + indexmode = in.read(INDEXMODE_BITS); + for (int i=0; i>2][i&3], INDEXBITS2 - (i==0?1:0)); // write i..[1:0] or i..[0] + + // then the 3 bit indices + assert ((indices[INDEXARRAY_3BITS(indexmode)][0][0] & HIGH_INDEXBIT3) == 0); + for (int i = 0; i < Tile::TILE_TOTAL; ++i) + out.write(indices[INDEXARRAY_3BITS(indexmode)][i>>2][i&3], INDEXBITS3 - (i==0?1:0)); // write i..[2:0] or i..[1:0] +} + +static void read_indices(Bits &in, int shapeindex, int indexmode, int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W]) +{ + // the indices we shorten is always index 0 + + // do the 2 bit indices first + for (int i = 0; i < Tile::TILE_TOTAL; ++i) + indices[INDEXARRAY_2BITS(indexmode)][i>>2][i&3] = in.read(INDEXBITS2 - (i==0?1:0)); // read i..[1:0] or i..[0] + + // then the 3 bit indices + for (int i = 0; i < Tile::TILE_TOTAL; ++i) + indices[INDEXARRAY_3BITS(indexmode)][i>>2][i&3] = in.read(INDEXBITS3 - (i==0?1:0)); // read i..[1:0] or i..[0] +} + +static void emit_block(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, const Pattern &p, const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], int rotatemode, int indexmode, char *block) +{ + Bits out(block, AVPCL::BITSIZE); + + write_header(endpts, shapeindex, p, rotatemode, indexmode, out); + + write_indices(indices, shapeindex, indexmode, out); + + assert(out.getptr() == AVPCL::BITSIZE); +} + +static void generate_palette_quantized_rgb_a(const IntEndptsRGBA &endpts, const RegionPrec ®ion_prec, int indexmode, Vec3 palette_rgb[NINDICES3], float palette_a[NINDICES3]) +{ + // scale endpoints for RGB + int a, b; + + a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]); + b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]); + + // interpolate R + for (int i = 0; i < NINDICES_RGB(indexmode); ++i) + palette_rgb[i].X() = PALETTE_LERP(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)); + + a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]); + b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]); + + // interpolate G + for (int i = 0; i < NINDICES_RGB(indexmode); ++i) + palette_rgb[i].Y() = PALETTE_LERP(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)); + + a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]); + b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]); + + // interpolate B + for (int i = 0; i < NINDICES_RGB(indexmode); ++i) + palette_rgb[i].Z() = PALETTE_LERP(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)); + + a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]); + b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]); + + // interpolate A + for (int i = 0; i < NINDICES_A(indexmode); ++i) + palette_a[i] = PALETTE_LERP(a, b, i, BIAS_A(indexmode), DENOM_A(indexmode)); + +} + +static void sign_extend(Pattern &p, IntEndptsRGBA endpts[NREGIONS]) +{ + for (int i=0; i 0; ++j) + { + err = Utils::metric1(a, palette_a[j], rotatemode); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + palette_alpha = palette_a[j]; + indices[INDEXARRAY_A][i] = j; + } + } + toterr += besterr; // squared-error norms are additive since we don't do the square root + + // do RGB index + besterr = DBL_MAX; + for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j) + { + err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[j], rotatemode) : + Utils::metric3premult_alphaout(rgb, tile_alpha, palette_rgb[j], palette_alpha); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[INDEXARRAY_RGB][i] = j; + } + } + toterr += besterr; + if (toterr > current_besterr) + { + // fill out bogus index values so it's initialized at least + for (int k = i; k < np; ++k) + { + indices[INDEXARRAY_RGB][k] = -1; + indices[INDEXARRAY_A][k] = -1; + } + return DBL_MAX; + } + } + else + { + // do RGB index + besterr = DBL_MAX; + int bestindex; + for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j) + { + err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[j], rotatemode) : + Utils::metric3premult_alphain(rgb, palette_rgb[j], rotatemode); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + bestindex = j; + indices[INDEXARRAY_RGB][i] = j; + } + } + palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[bestindex]).X() : + (rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[bestindex]).Y() : + (rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[bestindex]).Z() : (assert(0),0); + toterr += besterr; + + // do A index + besterr = DBL_MAX; + for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j) + { + err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[j], rotatemode) : + Utils::metric1premult(a, tile_alpha, palette_a[j], palette_alpha, rotatemode); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[INDEXARRAY_A][i] = j; + } + } + toterr += besterr; // squared-error norms are additive since we don't do the square root + if (toterr > current_besterr) + { + // fill out bogus index values so it's initialized at least + for (int k = i; k < np; ++k) + { + indices[INDEXARRAY_RGB][k] = -1; + indices[INDEXARRAY_A][k] = -1; + } + return DBL_MAX; + } + } + } + return toterr; +} + +// assign indices given a tile, shape, and quantized endpoints, return toterr for each region +static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int indexmode, IntEndptsRGBA endpts[NREGIONS], const PatternPrec &pattern_prec, + int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], double toterr[NREGIONS]) +{ + Vec3 palette_rgb[NREGIONS][NINDICES3]; // could be nindices2 + float palette_a[NREGIONS][NINDICES3]; // could be nindices2 + + for (int region = 0; region < NREGIONS; ++region) + { + generate_palette_quantized_rgb_a(endpts[region], pattern_prec.region_precs[region], indexmode, &palette_rgb[region][0], &palette_a[region][0]); + toterr[region] = 0; + } + + Vec3 rgb; + float a; + + for (int y = 0; y < tile.size_y; y++) + for (int x = 0; x < tile.size_x; x++) + { + int region = REGION(x,y,shapeindex); + double err, besterr; + float palette_alpha = 0, tile_alpha = 0; + + rgb.X() = (tile.data[y][x]).X(); + rgb.Y() = (tile.data[y][x]).Y(); + rgb.Z() = (tile.data[y][x]).Z(); + a = (tile.data[y][x]).W(); + + if(AVPCL::flag_premult) + tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (tile.data[y][x]).X() : + (rotatemode == ROTATEMODE_RGBA_RABG) ? (tile.data[y][x]).Y() : + (rotatemode == ROTATEMODE_RGBA_RGAB) ? (tile.data[y][x]).Z() : (tile.data[y][x]).W(); + + // compute the two indices separately + // if we're doing premultiplied alpha, we need to choose first the index that + // determines the alpha value, and then do the other index + + if (rotatemode == ROTATEMODE_RGBA_RGBA) + { + // do A index first as it has the alpha + besterr = DBL_MAX; + for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i) + { + err = Utils::metric1(a, palette_a[region][i], rotatemode); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[INDEXARRAY_A][y][x] = i; + palette_alpha = palette_a[region][i]; + } + } + toterr[region] += besterr; // squared-error norms are additive since we don't do the square root + + // do RGB index + besterr = DBL_MAX; + for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i) + { + err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[region][i], rotatemode) : + Utils::metric3premult_alphaout(rgb, tile_alpha, palette_rgb[region][i], palette_alpha); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[INDEXARRAY_RGB][y][x] = i; + } + } + toterr[region] += besterr; + } + else + { + // do RGB index first as it has the alpha + besterr = DBL_MAX; + int bestindex; + for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i) + { + err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[region][i], rotatemode) : + Utils::metric3premult_alphain(rgb, palette_rgb[region][i], rotatemode); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[INDEXARRAY_RGB][y][x] = i; + bestindex = i; + } + } + palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[region][bestindex]).X() : + (rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[region][bestindex]).Y() : + (rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[region][bestindex]).Z() : (assert(0),0); + toterr[region] += besterr; + + // do A index + besterr = DBL_MAX; + for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i) + { + err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[region][i], rotatemode) : + Utils::metric1premult(a, tile_alpha, palette_a[region][i], palette_alpha, rotatemode); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[INDEXARRAY_A][y][x] = i; + } + } + toterr[region] += besterr; // squared-error norms are additive since we don't do the square root + } + } +} + +// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's +// this function returns either old_err or a value smaller (if it was successful in improving the error) +static double perturb_one(const Vec4 colors[], int np, int rotatemode, int indexmode, int ch, const RegionPrec ®ion_prec, const IntEndptsRGBA &old_endpts, IntEndptsRGBA &new_endpts, + double old_err, int do_b, int indices[NINDEXARRAYS][Tile::TILE_TOTAL]) +{ + // we have the old endpoints: old_endpts + // we have the perturbed endpoints: new_endpts + // we have the temporary endpoints: temp_endpts + + IntEndptsRGBA temp_endpts; + float min_err = old_err; // start with the best current error + int beststep; + int temp_indices[NINDEXARRAYS][Tile::TILE_TOTAL]; + + for (int j=0; j>= 1) + { + bool improved = false; + for (int sign = -1; sign <= 1; sign += 2) + { + if (do_b == 0) + { + temp_endpts.A[ch] = new_endpts.A[ch] + sign * step; + if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec)) + continue; + } + else + { + temp_endpts.B[ch] = new_endpts.B[ch] + sign * step; + if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec)) + continue; + } + + float err = map_colors(colors, np, rotatemode, indexmode, temp_endpts, region_prec, min_err, temp_indices); + + if (err < min_err) + { + improved = true; + min_err = err; + beststep = sign * step; + for (int j=0; j 5000 perturb endpoints 50% of precision +// if err > 1000 25% +// if err > 200 12.5% +// if err > 40 6.25% +// for np = 16 -- adjust error thresholds as a function of np +// always ensure endpoint ordering is preserved (no need to overlap the scan) +static double exhaustive(const Vec4 colors[], int np, int rotatemode, int indexmode, int ch, const RegionPrec ®ion_prec, double orig_err, IntEndptsRGBA &opt_endpts, int indices[NINDEXARRAYS][Tile::TILE_TOTAL]) +{ + IntEndptsRGBA temp_endpts; + double best_err = orig_err; + int aprec = region_prec.endpt_a_prec[ch]; + int bprec = region_prec.endpt_b_prec[ch]; + int good_indices[NINDEXARRAYS][Tile::TILE_TOTAL]; + int temp_indices[NINDEXARRAYS][Tile::TILE_TOTAL]; + + for (int j=0; j 5000.0*thr_scale) { adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; } + else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } + else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } + else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } + adelta = MAX(adelta, 3); + bdelta = MAX(bdelta, 3); + +#ifdef DISABLE_EXHAUSTIVE + adelta = bdelta = 3; +#endif + + temp_endpts = opt_endpts; + + // ok figure out the range of A and B + int alow = MAX(0, opt_endpts.A[ch] - adelta); + int ahigh = MIN((1<= initial_error) break + rgb0 += delta0 + next = 1 + else + if (err1 >= initial_error) break + rgb1 += delta1 + next = 0 + initial_err = map() + for (;;) + err = perturb(next ? rgb1:rgb0, delta) + if (err >= initial_err) break + next? rgb1 : rgb0 += delta + initial_err = err + */ + IntEndptsRGBA new_a, new_b; + IntEndptsRGBA new_endpt; + int do_b; + int orig_indices[NINDEXARRAYS][Tile::TILE_TOTAL]; + int new_indices[NINDEXARRAYS][Tile::TILE_TOTAL]; + int temp_indices0[NINDEXARRAYS][Tile::TILE_TOTAL]; + int temp_indices1[NINDEXARRAYS][Tile::TILE_TOTAL]; + + // now optimize each channel separately + for (int ch = 0; ch < NCHANNELS_RGBA; ++ch) + { + // figure out which endpoint when perturbed gives the most improvement and start there + // if we just alternate, we can easily end up in a local minima + float err0 = perturb_one(colors, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A + float err1 = perturb_one(colors, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B + + if (err0 < err1) + { + if (err0 >= opt_err) + continue; + + for (int j=0; j= opt_err) + continue; + + for (int j=0; j= opt_err) + break; + + for (int j=0; j RGBA_MAX) v.X() = RGBA_MAX; + if (v.Y() < RGBA_MIN) v.Y() = RGBA_MIN; + if (v.Y() > RGBA_MAX) v.Y() = RGBA_MAX; + if (v.Z() < RGBA_MIN) v.Z() = RGBA_MIN; + if (v.Z() > RGBA_MAX) v.Z() = RGBA_MAX; + if (v.W() < RGBA_MIN) v.W() = RGBA_MIN; + if (v.W() > RGBA_MAX) v.W() = RGBA_MAX; +} + +// compute initial endpoints for the "RGB" portion and the "A" portion. +// Note these channels may have been rotated. +static void rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]) +{ + for (int region=0; region maxp) maxp = dp; + + dp = alpha[i]; + if (dp < mina) mina = dp; + if (dp > maxa) maxa = dp; + } + + // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values + endpts[region].A = mean + minp*direction; + endpts[region].B = mean + maxp*direction; + endpts[region].A.W() = mean.W() + mina; + endpts[region].B.W() = mean.W() + maxa; + + // clamp endpoints + // WORK: is [0,255] the right range, or should it be [0,255.5) or even [0,256) ? + clamp(endpts[region].A); + clamp(endpts[region].B); + } +} + +double AVPCL::compress_mode4(const Tile &t, char *block) +{ + FltEndpts endpts[NREGIONS]; + char tempblock[AVPCL::BLOCKSIZE]; + double msebest = DBL_MAX; + int shape = 0; + Tile t1; + + // try all rotations. refine tries the 2 different indexings. + for (int r = 0; r < NROTATEMODES && msebest > 0; ++r) + { + rotate_tile(t, r, t1); + rough(t1, shape, endpts); + for (int i = 0; i < NINDEXMODES && msebest > 0; ++i) + { + double mse = refine(t1, shape, r, i, endpts, tempblock); + if (mse < msebest) + { + memcpy(block, tempblock, sizeof(tempblock)); + msebest = mse; + } + } + } + return msebest; +} diff --git a/src/nvtt/bc7/avpcl_mode5.cpp b/src/nvtt/bc7/avpcl_mode5.cpp new file mode 100644 index 0000000..d7f04da --- /dev/null +++ b/src/nvtt/bc7/avpcl_mode5.cpp @@ -0,0 +1,1222 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis + +// x100000 2r 777x2 8x2 2bi 2bi + +#include "bits.h" +#include "tile.h" +#include "avpcl.h" +#include "arvo/Vec4.h" +#include "arvo/Matrix.h" +#include "arvo/SVD.h" +#include "utils.h" +#include "endpts.h" + +#include + +using namespace ArvoMath; + +// there are 2 index arrays. INDEXMODE selects between the arrays being 2 & 3 bits or 3 & 2 bits +// array 0 is always the RGB array and array 1 is always the A array +#define NINDEXARRAYS 2 +#define INDEXARRAY_RGB 0 +#define INDEXARRAY_A 1 +#define INDEXARRAY_2BITS(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXARRAY_A : INDEXARRAY_RGB) +#define INDEXARRAY_3BITS(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_3BITS) ? INDEXARRAY_A : INDEXARRAY_RGB) + +#define NINDICES3 4 +#define INDEXBITS3 2 +#define HIGH_INDEXBIT3 (1<<(INDEXBITS3-1)) +#define DENOM3 (NINDICES3-1) +#define BIAS3 (DENOM3/2) + +#define NINDICES2 4 +#define INDEXBITS2 2 +#define HIGH_INDEXBIT2 (1<<(INDEXBITS2-1)) +#define DENOM2 (NINDICES2-1) +#define BIAS2 (DENOM2/2) + +#define NINDICES_RGB(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? NINDICES3 : NINDICES2) +#define INDEXBITS_RGB(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXBITS3 : INDEXBITS2) +#define HIGH_INDEXBIT_RGB(indexmode)((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? HIGH_INDEXBIT3 : HIGH_INDEXBIT2) +#define DENOM_RGB(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? DENOM3 : DENOM2) +#define BIAS_RGB(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? BIAS3 : BIAS2) + +#define NINDICES_A(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? NINDICES2 : NINDICES3) +#define INDEXBITS_A(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXBITS2 : INDEXBITS3) +#define HIGH_INDEXBIT_A(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? HIGH_INDEXBIT2 : HIGH_INDEXBIT3) +#define DENOM_A(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? DENOM2 : DENOM3) +#define BIAS_A(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? BIAS2 : BIAS3) + +#define NSHAPES 1 + +static int shapes[NSHAPES] = +{ + 0x0000, +}; + +#define REGION(x,y,shapeindex) ((shapes[shapeindex]&(1<<(15-(x)-4*(y))))!=0) + +#define NREGIONS 1 // keep the region stuff in just in case... + +// encoded index compression location: region 0 is always at 0,0. + +#define NBITSIZES 2 // one endpoint pair + +struct ChanBits +{ + int nbitsizes[NBITSIZES]; // bitsizes for one channel +}; + +struct Pattern +{ + ChanBits chan[NCHANNELS_RGBA];// bit patterns used per channel + int transform_mode; // x0 means alpha channel not transformed, x1 otherwise. 0x rgb not transformed, 1x otherwise. + int mode; // associated mode value + int modebits; // number of mode bits + char *encoding; // verilog description of encoding for this mode +}; + +#define TRANSFORM_MODE_ALPHA 1 +#define TRANSFORM_MODE_RGB 2 + +#define NPATTERNS 1 + +static Pattern patterns[NPATTERNS] = +{ + // red green blue alpha xfm mode mb encoding + 7,7, 7,7, 7,7, 8,8, 0x0, 0x20, 6, "", +}; + +struct RegionPrec +{ + int endpt_a_prec[NCHANNELS_RGBA]; + int endpt_b_prec[NCHANNELS_RGBA]; +}; + +struct PatternPrec +{ + RegionPrec region_precs[NREGIONS]; +}; + +// this is the precision for each channel and region +// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO assert to check this! +static PatternPrec pattern_precs[NPATTERNS] = +{ + 7,7,7,8, 7,7,7,8, +}; + + +// return # of bits needed to store n. handle signed or unsigned cases properly +static int nbits(int n, bool issigned) +{ + int nb; + if (n==0) + return 0; // no bits needed for 0, signed or not + else if (n > 0) + { + for (nb=0; n; ++nb, n>>=1) ; + return nb + (issigned?1:0); + } + else + { + assert (issigned); + for (nb=0; n<-1; ++nb, n>>=1) ; + return nb + 1; + } +} + +#define R_0 ep[0].A[i] +#define R_1 ep[0].B[i] + +static void transform_forward(int transform_mode, IntEndptsRGBA ep[NREGIONS]) +{ + int i; + + if (transform_mode & TRANSFORM_MODE_RGB) + for (i=CHANNEL_R; i> 2) & 3 and x = index & 3 +static void swap_indices(int shapeindex, int indexmode, IntEndptsRGBA endpts[NREGIONS], int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W]) +{ + int index_positions[NREGIONS]; + + index_positions[0] = 0; // since WLOG we have the high bit of the shapes at 0 + + for (int region = 0; region < NREGIONS; ++region) + { + int x = index_positions[region] & 3; + int y = (index_positions[region] >> 2) & 3; + assert(REGION(x,y,shapeindex) == region); // double check the table + + // swap RGB + if (indices[INDEXARRAY_RGB][y][x] & HIGH_INDEXBIT_RGB(indexmode)) + { + // high bit is set, swap the endpts and indices for this region + int t; + for (int i=CHANNEL_R; i<=CHANNEL_B; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; } + + for (int y = 0; y < Tile::TILE_H; y++) + for (int x = 0; x < Tile::TILE_W; x++) + if (REGION(x,y,shapeindex) == region) + indices[INDEXARRAY_RGB][y][x] = NINDICES_RGB(indexmode) - 1 - indices[INDEXARRAY_RGB][y][x]; + } + + // swap A + if (indices[INDEXARRAY_A][y][x] & HIGH_INDEXBIT_A(indexmode)) + { + // high bit is set, swap the endpts and indices for this region + int t; + for (int i=CHANNEL_A; i<=CHANNEL_A; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; } + + for (int y = 0; y < Tile::TILE_H; y++) + for (int x = 0; x < Tile::TILE_W; x++) + if (REGION(x,y,shapeindex) == region) + indices[INDEXARRAY_A][y][x] = NINDICES_A(indexmode) - 1 - indices[INDEXARRAY_A][y][x]; + } + } +} + +static bool endpts_fit(IntEndptsRGBA endpts[NREGIONS], const Pattern &p) +{ + return true; +} + +static void write_header(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, const Pattern &p, int rotatemode, int indexmode, Bits &out) +{ + // ignore shapeindex + out.write(p.mode, p.modebits); + out.write(rotatemode, ROTATEMODE_BITS); +// out.write(indexmode, INDEXMODE_BITS); + for (int i=0; i= 0 && pat_index < NPATTERNS); + assert (in.getptr() == patterns[pat_index].modebits); + + p = patterns[pat_index]; + + shapeindex = 0; // we don't have any + + rotatemode = in.read(ROTATEMODE_BITS); + + indexmode = 0; // we don't have any + + for (int i=0; i>2][i&3], INDEXBITS2 - (i==0?1:0)); // write i..[1:0] or i..[0] + + // then the 3 bit indices + assert ((indices[INDEXARRAY_3BITS(indexmode)][0][0] & HIGH_INDEXBIT3) == 0); + for (int i = 0; i < Tile::TILE_TOTAL; ++i) + out.write(indices[INDEXARRAY_3BITS(indexmode)][i>>2][i&3], INDEXBITS3 - (i==0?1:0)); // write i..[2:0] or i..[1:0] +} + +static void read_indices(Bits &in, int shapeindex, int indexmode, int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W]) +{ + // the indices we shorten is always index 0 + + // do the 2 bit indices first + for (int i = 0; i < Tile::TILE_TOTAL; ++i) + indices[INDEXARRAY_2BITS(indexmode)][i>>2][i&3] = in.read(INDEXBITS2 - (i==0?1:0)); // read i..[1:0] or i..[0] + + // then the 3 bit indices + for (int i = 0; i < Tile::TILE_TOTAL; ++i) + indices[INDEXARRAY_3BITS(indexmode)][i>>2][i&3] = in.read(INDEXBITS3 - (i==0?1:0)); // read i..[1:0] or i..[0] +} + +static void emit_block(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, const Pattern &p, const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], int rotatemode, int indexmode, char *block) +{ + Bits out(block, AVPCL::BITSIZE); + + write_header(endpts, shapeindex, p, rotatemode, indexmode, out); + + write_indices(indices, shapeindex, indexmode, out); + + assert(out.getptr() == AVPCL::BITSIZE); +} + +static void generate_palette_quantized_rgb_a(const IntEndptsRGBA &endpts, const RegionPrec ®ion_prec, int indexmode, Vec3 palette_rgb[NINDICES3], float palette_a[NINDICES3]) +{ + // scale endpoints for RGB + int a, b; + + a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]); + b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]); + + // interpolate R + for (int i = 0; i < NINDICES_RGB(indexmode); ++i) + palette_rgb[i].X() = PALETTE_LERP(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)); + + a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]); + b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]); + + // interpolate G + for (int i = 0; i < NINDICES_RGB(indexmode); ++i) + palette_rgb[i].Y() = PALETTE_LERP(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)); + + a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]); + b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]); + + // interpolate B + for (int i = 0; i < NINDICES_RGB(indexmode); ++i) + palette_rgb[i].Z() = PALETTE_LERP(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)); + + a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]); + b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]); + + // interpolate A + for (int i = 0; i < NINDICES_A(indexmode); ++i) + palette_a[i] = PALETTE_LERP(a, b, i, BIAS_A(indexmode), DENOM_A(indexmode)); +} + +static void sign_extend(Pattern &p, IntEndptsRGBA endpts[NREGIONS]) +{ + for (int i=0; i 0; ++j) + { + err = Utils::metric1(a, palette_a[j], rotatemode); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + palette_alpha = palette_a[j]; + indices[INDEXARRAY_A][i] = j; + } + } + toterr += besterr; // squared-error norms are additive since we don't do the square root + + // do RGB index + besterr = DBL_MAX; + for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j) + { + err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[j], rotatemode) : + Utils::metric3premult_alphaout(rgb, tile_alpha, palette_rgb[j], palette_alpha); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[INDEXARRAY_RGB][i] = j; + } + } + toterr += besterr; + if (toterr > current_besterr) + { + // fill out bogus index values so it's initialized at least + for (int k = i; k < np; ++k) + { + indices[INDEXARRAY_RGB][k] = -1; + indices[INDEXARRAY_A][k] = -1; + } + return DBL_MAX; + } + } + else + { + // do RGB index + besterr = DBL_MAX; + int bestindex; + for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j) + { + err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[j], rotatemode) : + Utils::metric3premult_alphain(rgb, palette_rgb[j], rotatemode); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + bestindex = j; + indices[INDEXARRAY_RGB][i] = j; + } + } + palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[bestindex]).X() : + (rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[bestindex]).Y() : + (rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[bestindex]).Z() : (assert(0),0); + toterr += besterr; + + // do A index + besterr = DBL_MAX; + for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j) + { + err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[j], rotatemode) : + Utils::metric1premult(a, tile_alpha, palette_a[j], palette_alpha, rotatemode); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[INDEXARRAY_A][i] = j; + } + } + toterr += besterr; // squared-error norms are additive since we don't do the square root + if (toterr > current_besterr) + { + // fill out bogus index values so it's initialized at least + for (int k = i; k < np; ++k) + { + indices[INDEXARRAY_RGB][k] = -1; + indices[INDEXARRAY_A][k] = -1; + } + return DBL_MAX; + } + } + } + return toterr; +} + +// assign indices given a tile, shape, and quantized endpoints, return toterr for each region +static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int indexmode, IntEndptsRGBA endpts[NREGIONS], const PatternPrec &pattern_prec, + int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], double toterr[NREGIONS]) +{ + Vec3 palette_rgb[NREGIONS][NINDICES3]; // could be nindices2 + float palette_a[NREGIONS][NINDICES3]; // could be nindices2 + + for (int region = 0; region < NREGIONS; ++region) + { + generate_palette_quantized_rgb_a(endpts[region], pattern_prec.region_precs[region], indexmode, &palette_rgb[region][0], &palette_a[region][0]); + toterr[region] = 0; + } + + Vec3 rgb; + float a; + + for (int y = 0; y < tile.size_y; y++) + for (int x = 0; x < tile.size_x; x++) + { + int region = REGION(x,y,shapeindex); + double err, besterr; + float palette_alpha = 0, tile_alpha = 0; + + rgb.X() = (tile.data[y][x]).X(); + rgb.Y() = (tile.data[y][x]).Y(); + rgb.Z() = (tile.data[y][x]).Z(); + a = (tile.data[y][x]).W(); + + if(AVPCL::flag_premult) + tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (tile.data[y][x]).X() : + (rotatemode == ROTATEMODE_RGBA_RABG) ? (tile.data[y][x]).Y() : + (rotatemode == ROTATEMODE_RGBA_RGAB) ? (tile.data[y][x]).Z() : (tile.data[y][x]).W(); + + // compute the two indices separately + // if we're doing premultiplied alpha, we need to choose first the index that + // determines the alpha value, and then do the other index + + if (rotatemode == ROTATEMODE_RGBA_RGBA) + { + // do A index first as it has the alpha + besterr = DBL_MAX; + for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i) + { + err = Utils::metric1(a, palette_a[region][i], rotatemode); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[INDEXARRAY_A][y][x] = i; + palette_alpha = palette_a[region][i]; + } + } + toterr[region] += besterr; // squared-error norms are additive since we don't do the square root + + // do RGB index + besterr = DBL_MAX; + for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i) + { + err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[region][i], rotatemode) : + Utils::metric3premult_alphaout(rgb, tile_alpha, palette_rgb[region][i], palette_alpha); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[INDEXARRAY_RGB][y][x] = i; + } + } + toterr[region] += besterr; + } + else + { + // do RGB index first as it has the alpha + besterr = DBL_MAX; + int bestindex; + for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i) + { + err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[region][i], rotatemode) : + Utils::metric3premult_alphain(rgb, palette_rgb[region][i], rotatemode); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[INDEXARRAY_RGB][y][x] = i; + bestindex = i; + } + } + palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[region][bestindex]).X() : + (rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[region][bestindex]).Y() : + (rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[region][bestindex]).Z() : (assert(0),0); + toterr[region] += besterr; + + // do A index + besterr = DBL_MAX; + for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i) + { + err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[region][i], rotatemode) : + Utils::metric1premult(a, tile_alpha, palette_a[region][i], palette_alpha, rotatemode); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[INDEXARRAY_A][y][x] = i; + } + } + toterr[region] += besterr; // squared-error norms are additive since we don't do the square root + } + } +} + +// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's +// this function returns either old_err or a value smaller (if it was successful in improving the error) +static double perturb_one(const Vec4 colors[], int np, int rotatemode, int indexmode, int ch, const RegionPrec ®ion_prec, const IntEndptsRGBA &old_endpts, IntEndptsRGBA &new_endpts, + double old_err, int do_b, int indices[NINDEXARRAYS][Tile::TILE_TOTAL]) +{ + // we have the old endpoints: old_endpts + // we have the perturbed endpoints: new_endpts + // we have the temporary endpoints: temp_endpts + + IntEndptsRGBA temp_endpts; + float min_err = old_err; // start with the best current error + int beststep; + int temp_indices[NINDEXARRAYS][Tile::TILE_TOTAL]; + + for (int j=0; j>= 1) + { + bool improved = false; + for (int sign = -1; sign <= 1; sign += 2) + { + if (do_b == 0) + { + temp_endpts.A[ch] = new_endpts.A[ch] + sign * step; + if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec)) + continue; + } + else + { + temp_endpts.B[ch] = new_endpts.B[ch] + sign * step; + if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec)) + continue; + } + + float err = map_colors(colors, np, rotatemode, indexmode, temp_endpts, region_prec, min_err, temp_indices); + + if (err < min_err) + { + improved = true; + min_err = err; + beststep = sign * step; + for (int j=0; j 5000 perturb endpoints 50% of precision +// if err > 1000 25% +// if err > 200 12.5% +// if err > 40 6.25% +// for np = 16 -- adjust error thresholds as a function of np +// always ensure endpoint ordering is preserved (no need to overlap the scan) +static double exhaustive(const Vec4 colors[], int np, int rotatemode, int indexmode, int ch, const RegionPrec ®ion_prec, double orig_err, IntEndptsRGBA &opt_endpts, int indices[NINDEXARRAYS][Tile::TILE_TOTAL]) +{ + IntEndptsRGBA temp_endpts; + double best_err = orig_err; + int aprec = region_prec.endpt_a_prec[ch]; + int bprec = region_prec.endpt_b_prec[ch]; + int good_indices[NINDEXARRAYS][Tile::TILE_TOTAL]; + int temp_indices[NINDEXARRAYS][Tile::TILE_TOTAL]; + + for (int j=0; j 5000.0*thr_scale) { adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; } + else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } + else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } + else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } + adelta = MAX(adelta, 3); + bdelta = MAX(bdelta, 3); + +#ifdef DISABLE_EXHAUSTIVE + adelta = bdelta = 3; +#endif + + temp_endpts = opt_endpts; + + // ok figure out the range of A and B + int alow = MAX(0, opt_endpts.A[ch] - adelta); + int ahigh = MIN((1<= initial_error) break + rgb0 += delta0 + next = 1 + else + if (err1 >= initial_error) break + rgb1 += delta1 + next = 0 + initial_err = map() + for (;;) + err = perturb(next ? rgb1:rgb0, delta) + if (err >= initial_err) break + next? rgb1 : rgb0 += delta + initial_err = err + */ + IntEndptsRGBA new_a, new_b; + IntEndptsRGBA new_endpt; + int do_b; + int orig_indices[NINDEXARRAYS][Tile::TILE_TOTAL]; + int new_indices[NINDEXARRAYS][Tile::TILE_TOTAL]; + int temp_indices0[NINDEXARRAYS][Tile::TILE_TOTAL]; + int temp_indices1[NINDEXARRAYS][Tile::TILE_TOTAL]; + + // now optimize each channel separately + for (int ch = 0; ch < NCHANNELS_RGBA; ++ch) + { + // figure out which endpoint when perturbed gives the most improvement and start there + // if we just alternate, we can easily end up in a local minima + float err0 = perturb_one(colors, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A + float err1 = perturb_one(colors, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B + + if (err0 < err1) + { + if (err0 >= opt_err) + continue; + + for (int j=0; j= opt_err) + continue; + + for (int j=0; j= opt_err) + break; + + for (int j=0; j RGBA_MAX) v.X() = RGBA_MAX; + if (v.Y() < RGBA_MIN) v.Y() = RGBA_MIN; + if (v.Y() > RGBA_MAX) v.Y() = RGBA_MAX; + if (v.Z() < RGBA_MIN) v.Z() = RGBA_MIN; + if (v.Z() > RGBA_MAX) v.Z() = RGBA_MAX; + if (v.W() < RGBA_MIN) v.W() = RGBA_MIN; + if (v.W() > RGBA_MAX) v.W() = RGBA_MAX; +} + +// compute initial endpoints for the "RGB" portion and the "A" portion. +// Note these channels may have been rotated. +static void rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]) +{ + for (int region=0; region maxp) maxp = dp; + + dp = alpha[i]; + if (dp < mina) mina = dp; + if (dp > maxa) maxa = dp; + } + + // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values + endpts[region].A = mean + minp*direction; + endpts[region].B = mean + maxp*direction; + endpts[region].A.W() = mean.W() + mina; + endpts[region].B.W() = mean.W() + maxa; + + // clamp endpoints + // WORK: is [0,255] the right range, or should it be [0,255.5) or even [0,256) ? + clamp(endpts[region].A); + clamp(endpts[region].B); + } +} + +double AVPCL::compress_mode5(const Tile &t, char *block) +{ + FltEndpts endpts[NREGIONS]; + char tempblock[AVPCL::BLOCKSIZE]; + double msebest = DBL_MAX; + int shape = 0; + Tile t1; + + // try all rotations. refine tries the 2 different indexings. + for (int r = 0; r < NROTATEMODES && msebest > 0; ++r) + { + rotate_tile(t, r, t1); + rough(t1, shape, endpts); +// for (int i = 0; i < NINDEXMODES && msebest > 0; ++i) + for (int i = 0; i < 1 && msebest > 0; ++i) + { + double mse = refine(t1, shape, r, i, endpts, tempblock); + if (mse < msebest) + { + memcpy(block, tempblock, sizeof(tempblock)); + msebest = mse; + } + } + } + return msebest; +} diff --git a/src/nvtt/bc7/avpcl_mode6.cpp b/src/nvtt/bc7/avpcl_mode6.cpp new file mode 100644 index 0000000..13e07fb --- /dev/null +++ b/src/nvtt/bc7/avpcl_mode6.cpp @@ -0,0 +1,1059 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis + +// x1000000 7777.1x2 4bi + +#include "bits.h" +#include "tile.h" +#include "avpcl.h" +#include "arvo/Vec4.h" +#include "arvo/Matrix.h" +#include "arvo/SVD.h" +#include "utils.h" +#include "endpts.h" + +#include + +using namespace ArvoMath; + +#define NLSBMODES 4 // number of different lsb modes per region. since we have two .1 per region, that can have 4 values + +#define NINDICES 16 +#define INDEXBITS 4 +#define HIGH_INDEXBIT (1<<(INDEXBITS-1)) +#define DENOM (NINDICES-1) +#define BIAS (DENOM/2) + +#define NSHAPES 1 + +static int shapes[NSHAPES] = +{ + 0x0000, +}; + +#define REGION(x,y,shapeindex) ((shapes[shapeindex]&(1<<(15-(x)-4*(y))))!=0) + +#define NREGIONS 1 + +#define NBITSIZES (NREGIONS*2) +#define ABITINDEX(region) (2*(region)+0) +#define BBITINDEX(region) (2*(region)+1) + +struct ChanBits +{ + int nbitsizes[NBITSIZES]; // bitsizes for one channel +}; + +struct Pattern +{ + ChanBits chan[NCHANNELS_RGBA];// bit patterns used per channel + int mode; // associated mode value + int modebits; // number of mode bits + char *encoding; // verilog description of encoding for this mode +}; + +#define NPATTERNS 1 + +static Pattern patterns[NPATTERNS] = +{ + // red green blue alpha mode mb verilog + 7,7, 7,7, 7,7, 7,7, 0x40, 7, "", +}; + +struct RegionPrec +{ + int endpt_a_prec[NCHANNELS_RGBA]; + int endpt_b_prec[NCHANNELS_RGBA]; +}; + +struct PatternPrec +{ + RegionPrec region_precs[NREGIONS]; +}; + +// this is the precision for each channel and region +// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO assert to check this! +static PatternPrec pattern_precs[NPATTERNS] = +{ + 7,7,7,7, 7,7,7,7, +}; + +// return # of bits needed to store n. handle signed or unsigned cases properly +static int nbits(int n, bool issigned) +{ + int nb; + if (n==0) + return 0; // no bits needed for 0, signed or not + else if (n > 0) + { + for (nb=0; n; ++nb, n>>=1) ; + return nb + (issigned?1:0); + } + else + { + assert (issigned); + for (nb=0; n<-1; ++nb, n>>=1) ; + return nb + 1; + } +} + +/* +we're using this table to assign lsbs +abgr >=2 correct +0000 0 0 +0001 0 0 +0010 0 0 +0011 1 x1 +0100 0 0 +0101 1 x1 +0110 1 x1 +0111 1 1 +1000 0 0 +1001 1 x0 +1010 1 x0 +1011 1 1 +1100 1 x0 +1101 1 1 +1110 1 1 +1111 1 1 + +we need 8 0's and 8 1's. the x's can be either 0 or 1 as long as you get 8/8. +I choose to assign the lsbs so that the rgb channels are as good as possible. +*/ + +// 8888 ->7777.1, use the "correct" column above to assign the lsb +static void compress_one(const IntEndptsRGBA& endpts, IntEndptsRGBA_2& compr_endpts) +{ + int onescnt; + + onescnt = 0; + for (int j=0; j> 1; + assert (compr_endpts.A[j] < 128); + } + compr_endpts.a_lsb = onescnt >= 2; + + onescnt = 0; + for (int j=0; j> 1; + assert (compr_endpts.B[j] < 128); + } + compr_endpts.b_lsb = onescnt >= 2; +} + +static void uncompress_one(const IntEndptsRGBA_2& compr_endpts, IntEndptsRGBA& endpts) +{ + for (int j=0; j> 2) & 3 and x = index & 3 +static void swap_indices(IntEndptsRGBA_2 endpts[NREGIONS], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex) +{ + int index_positions[NREGIONS]; + + index_positions[0] = 0; // since WLOG we have the high bit of the shapes at 0 + + for (int region = 0; region < NREGIONS; ++region) + { + int x = index_positions[region] & 3; + int y = (index_positions[region] >> 2) & 3; + assert(REGION(x,y,shapeindex) == region); // double check the table + if (indices[y][x] & HIGH_INDEXBIT) + { + // high bit is set, swap the endpts and indices for this region + int t; + for (int i=0; i= 0 && pat_index < NPATTERNS); + assert (in.getptr() == patterns[pat_index].modebits); + + p = patterns[pat_index]; + + shapeindex = 0; // we don't have any + + for (int j=0; j>2][i&3], INDEXBITS-1); // write i..[2:0] + else + out.write(indices[i>>2][i&3], INDEXBITS); // write i..[3:0] + } + +} + +static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W]) +{ + // the index we shorten is always index 0 + for (int i = 0; i < Tile::TILE_TOTAL; ++i) + { + if (i==0) + indices[i>>2][i&3] = in.read(INDEXBITS-1); // read i..[1:0] + else + indices[i>>2][i&3] = in.read(INDEXBITS); // read i..[2:0] + } +} + +static void emit_block(const IntEndptsRGBA_2 endpts[NREGIONS], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block) +{ + Bits out(block, AVPCL::BITSIZE); + + write_header(endpts, shapeindex, p, out); + + write_indices(indices, shapeindex, out); + + assert(out.getptr() == AVPCL::BITSIZE); +} + +static void generate_palette_quantized(const IntEndptsRGBA_2 &endpts_2, const RegionPrec ®ion_prec, Vec4 palette[NINDICES]) +{ + IntEndptsRGBA endpts; + + uncompress_one(endpts_2, endpts); + + // scale endpoints + int a, b; // really need a IntVec4... + + a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]+1); // +1 since we are in uncompressed space + b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]+1); + + // interpolate + for (int i = 0; i < NINDICES; ++i) + palette[i].X() = PALETTE_LERP(a, b, i, BIAS, DENOM); + + a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1); + b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1); + + // interpolate + for (int i = 0; i < NINDICES; ++i) + palette[i].Y() = PALETTE_LERP(a, b, i, BIAS, DENOM); + + a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1); + b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1); + + // interpolate + for (int i = 0; i < NINDICES; ++i) + palette[i].Z() = PALETTE_LERP(a, b, i, BIAS, DENOM); + + a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]+1); + b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]+1); + + // interpolate + for (int i = 0; i < NINDICES; ++i) + palette[i].W() = PALETTE_LERP(a, b, i, BIAS, DENOM); +} + +void AVPCL::decompress_mode6(const char *block, Tile &t) +{ + Bits in(block, AVPCL::BITSIZE); + + Pattern p; + IntEndptsRGBA_2 endpts[NREGIONS]; + int shapeindex, pat_index; + + read_header(in, endpts, shapeindex, p, pat_index); + + Vec4 palette[NREGIONS][NINDICES]; + for (int r = 0; r < NREGIONS; ++r) + generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]); + + int indices[Tile::TILE_H][Tile::TILE_W]; + + read_indices(in, shapeindex, indices); + + assert(in.getptr() == AVPCL::BITSIZE); + + // lookup + for (int y = 0; y < Tile::TILE_H; y++) + for (int x = 0; x < Tile::TILE_W; x++) + t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]]; +} + +// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr +static double map_colors(const Vec4 colors[], int np, const IntEndptsRGBA_2 &endpts, const RegionPrec ®ion_prec, double current_err, int indices[Tile::TILE_TOTAL]) +{ + Vec4 palette[NINDICES]; + double toterr = 0; + Vec4 err; + + generate_palette_quantized(endpts, region_prec, palette); + + for (int i = 0; i < np; ++i) + { + double err, besterr = DBL_MAX; + + for (int j = 0; j < NINDICES && besterr > 0; ++j) + { + err = !AVPCL::flag_premult ? Utils::metric4(colors[i], palette[j]) : + Utils::metric4premult(colors[i], palette[j]) ; + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[i] = j; + } + } + toterr += besterr; + + // check for early exit + if (toterr > current_err) + { + // fill out bogus index values so it's initialized at least + for (int k = i; k < np; ++k) + indices[k] = -1; + + return DBL_MAX; + } + } + return toterr; +} + +// assign indices given a tile, shape, and quantized endpoints, return toterr for each region +static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGBA_2 endpts[NREGIONS], const PatternPrec &pattern_prec, + int indices[Tile::TILE_H][Tile::TILE_W], double toterr[NREGIONS]) +{ + // build list of possibles + Vec4 palette[NREGIONS][NINDICES]; + + for (int region = 0; region < NREGIONS; ++region) + { + generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]); + toterr[region] = 0; + } + + Vec4 err; + + for (int y = 0; y < tile.size_y; y++) + for (int x = 0; x < tile.size_x; x++) + { + int region = REGION(x,y,shapeindex); + double err, besterr = DBL_MAX; + + for (int i = 0; i < NINDICES && besterr > 0; ++i) + { + err = !AVPCL::flag_premult ? Utils::metric4(tile.data[y][x], palette[region][i]) : + Utils::metric4premult(tile.data[y][x], palette[region][i]) ; + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[y][x] = i; + } + } + toterr[region] += besterr; + } +} + +// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's +// this function returns either old_err or a value smaller (if it was successful in improving the error) +static double perturb_one(const Vec4 colors[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGBA_2 &old_endpts, IntEndptsRGBA_2 &new_endpts, + double old_err, int do_b, int indices[Tile::TILE_TOTAL]) +{ + // we have the old endpoints: old_endpts + // we have the perturbed endpoints: new_endpts + // we have the temporary endpoints: temp_endpts + + IntEndptsRGBA_2 temp_endpts; + float min_err = old_err; // start with the best current error + int beststep; + int temp_indices[Tile::TILE_TOTAL]; + + for (int i=0; i>= 1) + { + bool improved = false; + for (int sign = -1; sign <= 1; sign += 2) + { + if (do_b == 0) + { + temp_endpts.A[ch] = new_endpts.A[ch] + sign * step; + if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec)) + continue; + } + else + { + temp_endpts.B[ch] = new_endpts.B[ch] + sign * step; + if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec)) + continue; + } + + float err = map_colors(colors, np, temp_endpts, region_prec, min_err, temp_indices); + + if (err < min_err) + { + improved = true; + min_err = err; + beststep = sign * step; + for (int i=0; i 5000 perturb endpoints 50% of precision +// if err > 1000 25% +// if err > 200 12.5% +// if err > 40 6.25% +// for np = 16 -- adjust error thresholds as a function of np +// always ensure endpoint ordering is preserved (no need to overlap the scan) +// if orig_err returned from this is less than its input value, then indices[] will contain valid indices +static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec ®ion_prec, double orig_err, IntEndptsRGBA_2 &opt_endpts, int indices[Tile::TILE_TOTAL]) +{ + IntEndptsRGBA_2 temp_endpts; + double best_err = orig_err; + int aprec = region_prec.endpt_a_prec[ch]; + int bprec = region_prec.endpt_b_prec[ch]; + int good_indices[Tile::TILE_TOTAL]; + int temp_indices[Tile::TILE_TOTAL]; + + for (int i=0; i 5000.0*thr_scale) { adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; } + else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } + else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } + else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } + adelta = MAX(adelta, 3); + bdelta = MAX(bdelta, 3); + +#ifdef DISABLE_EXHAUSTIVE + adelta = bdelta = 3; +#endif + + temp_endpts = opt_endpts; + + // ok figure out the range of A and B + int alow = MAX(0, opt_endpts.A[ch] - adelta); + int ahigh = MIN((1<= initial_error) break + rgb0 += delta0 + next = 1 + else + if (err1 >= initial_error) break + rgb1 += delta1 + next = 0 + initial_err = map() + for (;;) + err = perturb(next ? rgb1:rgb0, delta) + if (err >= initial_err) break + next? rgb1 : rgb0 += delta + initial_err = err + */ + IntEndptsRGBA_2 new_a, new_b; + IntEndptsRGBA_2 new_endpt; + int do_b; + int orig_indices[Tile::TILE_TOTAL]; + int new_indices[Tile::TILE_TOTAL]; + int temp_indices0[Tile::TILE_TOTAL]; + int temp_indices1[Tile::TILE_TOTAL]; + + // now optimize each channel separately + // for the first error improvement, we save the indices. then, for any later improvement, we compare the indices + // if they differ, we restart the loop (which then falls back to looking for a first improvement.) + for (int ch = 0; ch < NCHANNELS_RGBA; ++ch) + { + // figure out which endpoint when perturbed gives the most improvement and start there + // if we just alternate, we can easily end up in a local minima + float err0 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A + float err1 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B + + if (err0 < err1) + { + if (err0 >= opt_err) + continue; + + for (int i=0; i= opt_err) + continue; + + for (int i=0; i= opt_err) + break; + + for (int i=0; i> 1) & 1; + + // make sure we have a valid error for temp_in + // we use DBL_MAX here because we want an accurate temp_in_err, no shortcuts + // (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the DBL_MAX position) + double temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], DBL_MAX, temp_indices); + + // now try to optimize these endpoints + double temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out); + + // if we find an improvement, update the best so far and correct the output endpoints and errors + if (temp_out_err < best_err) + { + best_err = temp_out_err; + opt_err[region] = temp_out_err; + opt_endpts[region] = temp_out; + } + } + } +} + +/* optimization algorithm + for each pattern + convert endpoints using pattern precision + assign indices and get initial error + compress indices (and possibly reorder endpoints) + transform endpoints + if transformed endpoints fit pattern + get original endpoints back + optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better + compress new indices + transform new endpoints + if new endpoints fit pattern AND if error is improved + emit compressed block with new data + else + emit compressed block with original data // to try to preserve maximum endpoint precision + + simplify the above given that there is no transform now and that endpoints will always fit +*/ + +static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block) +{ + double orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS]; + IntEndptsRGBA_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS]; + int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W]; + + for (int sp = 0; sp < NPATTERNS; ++sp) + { + quantize_endpts(endpts, pattern_precs[sp], orig_endpts); + assign_indices(tile, shapeindex_best, orig_endpts, pattern_precs[sp], orig_indices, orig_err); + swap_indices(orig_endpts, orig_indices, shapeindex_best); + + optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts); + + assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err); + for (int i=0; i RGBA_MAX) v.X() = RGBA_MAX; + if (v.Y() < RGBA_MIN) v.Y() = RGBA_MIN; + if (v.Y() > RGBA_MAX) v.Y() = RGBA_MAX; + if (v.Z() < RGBA_MIN) v.Z() = RGBA_MIN; + if (v.Z() > RGBA_MAX) v.Z() = RGBA_MAX; + if (v.W() < RGBA_MIN) v.W() = RGBA_MIN; + if (v.W() > RGBA_MAX) v.W() = RGBA_MAX; +} + +static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vec4 palette[NREGIONS][NINDICES]) +{ + for (int region = 0; region < NREGIONS; ++region) + for (int i = 0; i < NINDICES; ++i) + palette[region][i] = PALETTE_LERP(endpts[region].A, endpts[region].B, i, 0.0, float(DENOM)); +} + +// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined +static double map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS]) +{ + // build list of possibles + Vec4 palette[NREGIONS][NINDICES]; + + generate_palette_unquantized(endpts, palette); + + double toterr = 0; + Vec4 err; + + for (int y = 0; y < tile.size_y; y++) + for (int x = 0; x < tile.size_x; x++) + { + int region = REGION(x,y,shapeindex); + double err, besterr; + + besterr = Utils::metric4(tile.data[y][x], palette[region][0]); + + for (int i = 1; i < NINDICES && besterr > 0; ++i) + { + err = Utils::metric4(tile.data[y][x], palette[region][i]); + + if (err > besterr) // error increased, so we're done searching. this works for most norms. + break; + if (err < besterr) + besterr = err; + } + toterr += besterr; + } + return toterr; +} + +static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]) +{ + for (int region=0; region maxp) maxp = dp; + } + + // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values + endpts[region].A = mean + minp*direction; + endpts[region].B = mean + maxp*direction; + + // clamp endpoints + // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best + // shape based on endpoints being clamped + clamp(endpts[region].A); + clamp(endpts[region].B); + } + + return map_colors(tile, shapeindex, endpts); +} + +static void swap(double *list1, int *list2, int i, int j) +{ + double t = list1[i]; list1[i] = list1[j]; list1[j] = t; + int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1; +} + +double AVPCL::compress_mode6(const Tile &t, char *block) +{ + // number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES + // NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out + const int NITEMS=1; + + // pick the best NITEMS shapes and refine these. + struct { + FltEndpts endpts[NREGIONS]; + } all[NSHAPES]; + double roughmse[NSHAPES]; + int index[NSHAPES]; + char tempblock[AVPCL::BLOCKSIZE]; + double msebest = DBL_MAX; + + for (int i=0; i roughmse[j]) + swap(roughmse, index, i, j); + + for (int i=0; i0; ++i) + { + int shape = index[i]; + double mse = refine(t, shape, &all[shape].endpts[0], tempblock); + if (mse < msebest) + { + memcpy(block, tempblock, sizeof(tempblock)); + msebest = mse; + } + } + return msebest; +} + diff --git a/src/nvtt/bc7/avpcl_mode7.cpp b/src/nvtt/bc7/avpcl_mode7.cpp new file mode 100644 index 0000000..b2813e8 --- /dev/null +++ b/src/nvtt/bc7/avpcl_mode7.cpp @@ -0,0 +1,1098 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis + +// x10000000 5555.1x4 64p 2bi (30b) + +#include "bits.h" +#include "tile.h" +#include "avpcl.h" +#include "arvo/Vec4.h" +#include "arvo/Matrix.h" +#include "arvo/SVD.h" +#include "utils.h" +#include "endpts.h" + +#include + +#include "shapes_two.h" + +using namespace ArvoMath; + +#define NLSBMODES 4 // number of different lsb modes per region. since we have two .1 per region, that can have 4 values + +#define NINDICES 4 +#define INDEXBITS 2 +#define HIGH_INDEXBIT (1<<(INDEXBITS-1)) +#define DENOM (NINDICES-1) +#define BIAS (DENOM/2) + +// WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like? +// i.e. can we search shapes in a particular order so we can see the global error minima easily and +// stop without having to touch all shapes? + +#define POS_TO_X(pos) ((pos)&3) +#define POS_TO_Y(pos) (((pos)>>2)&3) + +#define NBITSIZES (NREGIONS*2) +#define ABITINDEX(region) (2*(region)+0) +#define BBITINDEX(region) (2*(region)+1) + +struct ChanBits +{ + int nbitsizes[NBITSIZES]; // bitsizes for one channel +}; + +struct Pattern +{ + ChanBits chan[NCHANNELS_RGBA];// bit patterns used per channel + int transformed; // if 0, deltas are unsigned and no transform; otherwise, signed and transformed + int mode; // associated mode value + int modebits; // number of mode bits + char *encoding; // verilog description of encoding for this mode +}; + +#define NPATTERNS 1 +#define NREGIONS 2 + +static Pattern patterns[NPATTERNS] = +{ + // red green blue alpha xfm mode mb + 5,5,5,5, 5,5,5,5, 5,5,5,5, 5,5,5,5, 0, 0x80, 8, "", +}; + +struct RegionPrec +{ + int endpt_a_prec[NCHANNELS_RGBA]; + int endpt_b_prec[NCHANNELS_RGBA]; +}; + +struct PatternPrec +{ + RegionPrec region_precs[NREGIONS]; +}; + + +// this is the precision for each channel and region +// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO assert to check this! +static PatternPrec pattern_precs[NPATTERNS] = +{ + 5,5,5,5, 5,5,5,5, 5,5,5,5, 5,5,5,5, +}; + +// return # of bits needed to store n. handle signed or unsigned cases properly +static int nbits(int n, bool issigned) +{ + int nb; + if (n==0) + return 0; // no bits needed for 0, signed or not + else if (n > 0) + { + for (nb=0; n; ++nb, n>>=1) ; + return nb + (issigned?1:0); + } + else + { + assert (issigned); + for (nb=0; n<-1; ++nb, n>>=1) ; + return nb + 1; + } +} + +static void transform_forward(IntEndptsRGBA_2 ep[NREGIONS]) +{ + assert(0); +} + +static void transform_inverse(IntEndptsRGBA_2 ep[NREGIONS]) +{ + assert(0); +} + +/* +we're using this table to assign lsbs +abgr >=2 correct +0000 0 0 +0001 0 0 +0010 0 0 +0011 1 x1 +0100 0 0 +0101 1 x1 +0110 1 x1 +0111 1 1 +1000 0 0 +1001 1 x0 +1010 1 x0 +1011 1 1 +1100 1 x0 +1101 1 1 +1110 1 1 +1111 1 1 + +we need 8 0's and 8 1's. the x's can be either 0 or 1 as long as you get 8/8. +I choose to assign the lsbs so that the rgb channels are as good as possible. +*/ + +// 6666 ->5555.1, use the "correct" column above to assign the lsb +static void compress_one(const IntEndptsRGBA& endpts, IntEndptsRGBA_2& compr_endpts) +{ + int onescnt; + + onescnt = 0; + for (int j=0; j> 1; + assert (compr_endpts.A[j] < 32); + } + compr_endpts.a_lsb = onescnt >= 2; + + onescnt = 0; + for (int j=0; j> 1; + assert (compr_endpts.B[j] < 32); + } + compr_endpts.b_lsb = onescnt >= 2; +} + +static void uncompress_one(const IntEndptsRGBA_2& compr_endpts, IntEndptsRGBA& endpts) +{ + for (int j=0; j> 2) & 3 and x = index & 3 +static void swap_indices(IntEndptsRGBA_2 endpts[NREGIONS], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex) +{ + for (int region = 0; region < NREGIONS; ++region) + { + int position = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,region); + + int x = POS_TO_X(position); + int y = POS_TO_Y(position); + assert(REGION(x,y,shapeindex) == region); // double check the table + if (indices[y][x] & HIGH_INDEXBIT) + { + // high bit is set, swap the endpts and indices for this region + int t; + for (int i=0; i= 0 && pat_index < NPATTERNS); + assert (in.getptr() == patterns[pat_index].modebits); + + shapeindex = in.read(SHAPEBITS); + p = patterns[pat_index]; + + for (int j=0; j 0; ++j) + { + err = !AVPCL::flag_premult ? Utils::metric4(colors[i], palette[j]) : + Utils::metric4premult(colors[i], palette[j]) ; + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[i] = j; + } + } + toterr += besterr; + + // check for early exit + if (toterr > current_err) + { + // fill out bogus index values so it's initialized at least + for (int k = i; k < np; ++k) + indices[k] = -1; + + return DBL_MAX; + } + } + return toterr; +} + +// assign indices given a tile, shape, and quantized endpoints, return toterr for each region +static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGBA_2 endpts[NREGIONS], const PatternPrec &pattern_prec, + int indices[Tile::TILE_H][Tile::TILE_W], double toterr[NREGIONS]) +{ + // build list of possibles + Vec4 palette[NREGIONS][NINDICES]; + + for (int region = 0; region < NREGIONS; ++region) + { + generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]); + toterr[region] = 0; + } + + Vec4 err; + + for (int y = 0; y < tile.size_y; y++) + for (int x = 0; x < tile.size_x; x++) + { + int region = REGION(x,y,shapeindex); + double err, besterr = DBL_MAX; + + for (int i = 0; i < NINDICES && besterr > 0; ++i) + { + err = !AVPCL::flag_premult ? Utils::metric4(tile.data[y][x], palette[region][i]) : + Utils::metric4premult(tile.data[y][x], palette[region][i]) ; + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[y][x] = i; + } + } + toterr[region] += besterr; + } +} + +// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's +// this function returns either old_err or a value smaller (if it was successful in improving the error) +static double perturb_one(const Vec4 colors[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGBA_2 &old_endpts, IntEndptsRGBA_2 &new_endpts, + double old_err, int do_b, int indices[Tile::TILE_TOTAL]) +{ + // we have the old endpoints: old_endpts + // we have the perturbed endpoints: new_endpts + // we have the temporary endpoints: temp_endpts + + IntEndptsRGBA_2 temp_endpts; + float min_err = old_err; // start with the best current error + int beststep; + int temp_indices[Tile::TILE_TOTAL]; + + for (int i=0; i>= 1) + { + bool improved = false; + for (int sign = -1; sign <= 1; sign += 2) + { + if (do_b == 0) + { + temp_endpts.A[ch] = new_endpts.A[ch] + sign * step; + if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec)) + continue; + } + else + { + temp_endpts.B[ch] = new_endpts.B[ch] + sign * step; + if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec)) + continue; + } + + float err = map_colors(colors, np, temp_endpts, region_prec, min_err, temp_indices); + + if (err < min_err) + { + improved = true; + min_err = err; + beststep = sign * step; + for (int i=0; i 5000 perturb endpoints 50% of precision +// if err > 1000 25% +// if err > 200 12.5% +// if err > 40 6.25% +// for np = 16 -- adjust error thresholds as a function of np +// always ensure endpoint ordering is preserved (no need to overlap the scan) +// if orig_err returned from this is less than its input value, then indices[] will contain valid indices +static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec ®ion_prec, double orig_err, IntEndptsRGBA_2 &opt_endpts, int indices[Tile::TILE_TOTAL]) +{ + IntEndptsRGBA_2 temp_endpts; + double best_err = orig_err; + int aprec = region_prec.endpt_a_prec[ch]; + int bprec = region_prec.endpt_b_prec[ch]; + int good_indices[Tile::TILE_TOTAL]; + int temp_indices[Tile::TILE_TOTAL]; + + for (int i=0; i 5000.0*thr_scale) { adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; } + else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } + else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } + else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } + adelta = MAX(adelta, 3); + bdelta = MAX(bdelta, 3); + +#ifdef DISABLE_EXHAUSTIVE + adelta = bdelta = 3; +#endif + + temp_endpts = opt_endpts; + + // ok figure out the range of A and B + int alow = MAX(0, opt_endpts.A[ch] - adelta); + int ahigh = MIN((1<= initial_error) break + rgb0 += delta0 + next = 1 + else + if (err1 >= initial_error) break + rgb1 += delta1 + next = 0 + initial_err = map() + for (;;) + err = perturb(next ? rgb1:rgb0, delta) + if (err >= initial_err) break + next? rgb1 : rgb0 += delta + initial_err = err + */ + IntEndptsRGBA_2 new_a, new_b; + IntEndptsRGBA_2 new_endpt; + int do_b; + int orig_indices[Tile::TILE_TOTAL]; + int new_indices[Tile::TILE_TOTAL]; + int temp_indices0[Tile::TILE_TOTAL]; + int temp_indices1[Tile::TILE_TOTAL]; + + // now optimize each channel separately + // for the first error improvement, we save the indices. then, for any later improvement, we compare the indices + // if they differ, we restart the loop (which then falls back to looking for a first improvement.) + for (int ch = 0; ch < NCHANNELS_RGBA; ++ch) + { + // figure out which endpoint when perturbed gives the most improvement and start there + // if we just alternate, we can easily end up in a local minima + float err0 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A + float err1 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B + + if (err0 < err1) + { + if (err0 >= opt_err) + continue; + + for (int i=0; i= opt_err) + continue; + + for (int i=0; i= opt_err) + break; + + for (int i=0; i> 1) & 1; + + // make sure we have a valid error for temp_in + // we use DBL_MAX here because we want an accurate temp_in_err, no shortcuts + // (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the DBL_MAX position) + double temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], DBL_MAX, temp_indices); + + // now try to optimize these endpoints + double temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out); + + // if we find an improvement, update the best so far and correct the output endpoints and errors + if (temp_out_err < best_err) + { + best_err = temp_out_err; + opt_err[region] = temp_out_err; + opt_endpts[region] = temp_out; + } + } + } +} + +/* optimization algorithm + for each pattern + convert endpoints using pattern precision + assign indices and get initial error + compress indices (and possibly reorder endpoints) + transform endpoints + if transformed endpoints fit pattern + get original endpoints back + optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better + compress new indices + transform new endpoints + if new endpoints fit pattern AND if error is improved + emit compressed block with new data + else + emit compressed block with original data // to try to preserve maximum endpoint precision +*/ + +static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block) +{ + double orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS]; + IntEndptsRGBA_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS]; + int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W]; + + for (int sp = 0; sp < NPATTERNS; ++sp) + { + quantize_endpts(endpts, pattern_precs[sp], orig_endpts); + assign_indices(tile, shapeindex_best, orig_endpts, pattern_precs[sp], orig_indices, orig_err); + swap_indices(orig_endpts, orig_indices, shapeindex_best); + if (patterns[sp].transformed) + transform_forward(orig_endpts); + // apply a heuristic here -- we check if the endpoints fit before we try to optimize them. + // the assumption made is that if they don't fit now, they won't fit after optimizing. + if (endpts_fit(orig_endpts, patterns[sp])) + { + if (patterns[sp].transformed) + transform_inverse(orig_endpts); + optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts); + assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err); + for (int i=0; i RGBA_MAX) v.X() = RGBA_MAX; + if (v.Y() < RGBA_MIN) v.Y() = RGBA_MIN; + if (v.Y() > RGBA_MAX) v.Y() = RGBA_MAX; + if (v.Z() < RGBA_MIN) v.Z() = RGBA_MIN; + if (v.Z() > RGBA_MAX) v.Z() = RGBA_MAX; + if (v.W() < RGBA_MIN) v.W() = RGBA_MIN; + if (v.W() > RGBA_MAX) v.W() = RGBA_MAX; +} + +static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vec4 palette[NREGIONS][NINDICES]) +{ + for (int region = 0; region < NREGIONS; ++region) + for (int i = 0; i < NINDICES; ++i) + palette[region][i] = PALETTE_LERP(endpts[region].A, endpts[region].B, i, 0.0, float(DENOM)); +} + +// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined +static double map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS]) +{ + // build list of possibles + Vec4 palette[NREGIONS][NINDICES]; + + generate_palette_unquantized(endpts, palette); + + double toterr = 0; + Vec4 err; + + for (int y = 0; y < tile.size_y; y++) + for (int x = 0; x < tile.size_x; x++) + { + int region = REGION(x,y,shapeindex); + double err, besterr = DBL_MAX; + + for (int i = 0; i < NINDICES && besterr > 0; ++i) + { + err = Utils::metric4(tile.data[y][x], palette[region][i]); + + if (err > besterr) // error increased, so we're done searching. this works for most norms. + break; + if (err < besterr) + besterr = err; + } + toterr += besterr; + } + return toterr; +} + +static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]) +{ + for (int region=0; region maxp) maxp = dp; + } + + // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values + endpts[region].A = mean + minp*direction; + endpts[region].B = mean + maxp*direction; + + // clamp endpoints + // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best + // shape based on endpoints being clamped + clamp(endpts[region].A); + clamp(endpts[region].B); + } + + return map_colors(tile, shapeindex, endpts); +} + +static void swap(double *list1, int *list2, int i, int j) +{ + double t = list1[i]; list1[i] = list1[j]; list1[j] = t; + int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1; +} + +double AVPCL::compress_mode7(const Tile &t, char *block) +{ + // number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES + // NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out + const int NITEMS=NSHAPES/4; + + // pick the best NITEMS shapes and refine these. + struct { + FltEndpts endpts[NREGIONS]; + } all[NSHAPES]; + double roughmse[NSHAPES]; + int index[NSHAPES]; + char tempblock[AVPCL::BLOCKSIZE]; + double msebest = DBL_MAX; + + for (int i=0; i roughmse[j]) + swap(roughmse, index, i, j); + + for (int i=0; i0; ++i) + { + int shape = index[i]; + double mse = refine(t, shape, &all[shape].endpts[0], tempblock); + if (mse < msebest) + { + memcpy(block, tempblock, sizeof(tempblock)); + msebest = mse; + } + } + return msebest; +} + diff --git a/src/nvtt/bc7/avpclc.cpp b/src/nvtt/bc7/avpclc.cpp new file mode 100644 index 0000000..afa8903 --- /dev/null +++ b/src/nvtt/bc7/avpclc.cpp @@ -0,0 +1,348 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +// NOTE: the compressor will compress RGB tiles where the input alpha is constant at 255 +// using modes where the alpha is variable if that mode gives a smaller mean squared error. + +#include +#include +#include +#include +#include + +#include "ImfArray.h" +#include "targa.h" +#include "avpcl.h" + +using namespace std; + +static void analyze(string in1, string in2) +{ + Array2D pin1, pin2; + int w1, h1, w2, h2; + + Targa::read(in1, pin1, w1, h1); + Targa::read(in2, pin2, w2, h2); + + // choose the smaller of the two dimensions (since the old compressor would truncate to multiple-of-4 sizes) + int w = MIN(w1, w2); + int h = MIN(h1, h2); + + double nsamples = 0; + double mabse_rgb = 0, mabse_a = 0, mabse_rgba = 0, mse_rgb = 0, mse_a = 0, mse_rgba = 0; + int errdist_rgb[9], errdist_a[9], errdist_rgba[9]; + int errs[4*16]; + + for (int i=0; i<9; ++i) + errdist_rgb[i] = errdist_a[i] = errdist_rgba[i] = 0; + + int psnrhist[100]; + for (int i=0; i<100; ++i) + psnrhist[i] = 0; + bool first = true; + + int worstx, worsty; + double worstpsnr = 999.0; + + bool constant_alpha = true; + + for (int y = 0; y < h; y+=4) + for (int x = 0; x < w; x+=4) + { + int xw = MIN(w-x, 4); + int yw = MIN(h-y, 4); + int np = 0; + + float a[4], b[4]; + + for (int y0=0; y0 0 ? err : -err; + int j = i & 3; + int lsb; + + for (lsb=0; (abse>>lsb)>0; ++lsb) + ; + assert (lsb <= 8); + + if (j == 3) + { + mabse_a += (double)abse; + mse_a += (double)abse * abse; + errdist_a[lsb]++; + } + else + { + mabse_rgb += (double)abse; + mse_rgb += (double)abse * abse; + errdist_rgb[lsb]++; + } + mabse_rgba += (double)abse; + mse_rgba += (double)abse * abse; + errdist_rgba[lsb]++; + + msetile += (double)abse * abse; + } + + double psnrtile, rmsetile; + + rmsetile = sqrt(msetile / double(np)); + psnrtile = (rmsetile == 0) ? 99.0 : 20.0 * log10(255.0/rmsetile); + + if (psnrtile < worstpsnr) + { + worstx = x; worsty = y; worstpsnr = psnrtile; + } +#ifdef EXTERNAL_RELEASE + int psnrquant = (int) floor (psnrtile); // 10 means [10,11) psnrs, e.g. + // clamp just in case + psnrquant = (psnrquant < 0) ? 0 : (psnrquant > 99) ? 99 : psnrquant; + psnrhist[psnrquant]++; + if (first && psnrquant < 16) + { + first = false; + printf("Tiles with RGBA PSNR's worse than 16dB\n"); + } + if (psnrquant < 16) + printf("X %4d Y %4d RGBA PSNR %7.2f\n", x, y, psnrtile); +#endif + } + + nsamples = w * h; + + mabse_a /= nsamples; + mse_a /= nsamples; + mabse_rgb /= (nsamples*3); + mse_rgb /= (nsamples*3); + mabse_rgba /= (nsamples*4); + mse_rgba /= (nsamples*4); + + double rmse_a, psnr_a, rmse_rgb, psnr_rgb, rmse_rgba, psnr_rgba; + + rmse_a = sqrt(mse_a); + psnr_a = (rmse_a == 0) ? 999.0 : 20.0 * log10(255.0/rmse_a); + + rmse_rgb = sqrt(mse_rgb); + psnr_rgb = (rmse_rgb == 0) ? 999.0 : 20.0 * log10(255.0/rmse_rgb); + + rmse_rgba = sqrt(mse_rgba); + psnr_rgba = (rmse_rgba == 0) ? 999.0 : 20.0 * log10(255.0/rmse_rgba); + + printf("Image size compared: %dw x %dh\n", w, h); + printf("Image alpha is %s.\n", constant_alpha ? "CONSTANT" : "VARIABLE"); + if (w != w1 || w != w2 || h != h1 || h != h2) + printf("--- NOTE: only the overlap between the 2 images (%d,%d) and (%d,%d) was compared\n", w1, h1, w2, h2); + printf("Total pixels: %12d\n", w * h); + + char *which = !AVPCL::flag_premult ? "RGB" : "aRaGaB"; + + printf("\n%s Mean absolute error: %f\n", which, mabse_rgb); + printf("%s Root mean squared error: %f (MSE %f)\n", which, rmse_rgb, rmse_rgb*rmse_rgb); + printf("%s Peak signal to noise ratio in dB: %f\n", which, psnr_rgb); + printf("%s Histogram of number of channels with indicated LSB error\n", which); + for (int i = 0; i < 9; ++i) + if (errdist_rgb[i]) printf("%2d LSB error: %10d\n", i, errdist_rgb[i]); + + printf("\nAlpha Mean absolute error: %f\n", mabse_a); + printf("Alpha Root mean squared error: %f (MSE %f)\n", rmse_a, rmse_a*rmse_a); + printf("Alpha Peak signal to noise ratio in dB: %f\n", psnr_a); + printf("Alpha Histogram of number of channels with indicated LSB error\n"); + for (int i = 0; i < 9; ++i) + if (errdist_a[i]) printf("%2d LSB error: %10d\n", i, errdist_a[i]); + + printf("\nRGBA Mean absolute error: %f\n", mabse_rgba); + printf("RGBA Root mean squared error: %f (MSE %f)\n", rmse_rgba, rmse_rgba*rmse_rgba); + printf("RGBA Peak signal to noise ratio in dB: %f\n", psnr_rgba); + printf("RGBA Histogram of number of channels with indicated LSB error\n"); + for (int i = 0; i < 9; ++i) + if (errdist_rgba[i]) printf("%2d LSB error: %10d\n", i, errdist_rgba[i]); + + printf("\nWorst tile RGBA PSNR %f at x %d y %d\n", worstpsnr, worstx, worsty); +#if 0 + printf("Histogram of per-tile PSNR\n"); + for (int i = 0; i < 100; ++i) + if (psnrhist[i]) + printf("[%2d,%2d) %6d\n", i, i+1, psnrhist[i]); +#endif +} + +static bool ext(string inf, char *extension) +{ + size_t n = inf.rfind('.', inf.length()-1); + if (n != string::npos) + return inf.substr(n, inf.length()) == extension; + else if (*extension != '\0') + return false; + else + return true; // extension is null and we didn't find a . +} + +template +std::string toString(const T &thing) +{ + std::stringstream os; + os << thing; + return os.str(); +} + +static int str2int(std::string s) +{ + int thing; + std::stringstream str (stringstream::in | stringstream::out); + str << s; + str >> thing; + return thing; +} + +static void usage() +{ + cout << endl << + "Usage:" << endl << + "avpclc infile.tga outroot generates outroot-w-h.avpcl and outroot-avpcl.tga" << endl << + "avpclc foo-w-h.avpcl outroot generates outroot-avpcl.tga" << endl << + "avpclc infile.tga outfile.tga compares the two images" << endl << endl << + "Flags:" << endl << + "-p use a metric based on AR AG AB A (note: if the image has alpha constant 255 this option is overridden)" << endl << + "-n use a non-uniformly-weighed metric (weights .299 .587 .114)" << endl << + "-na use a non-uniformly-weighed metric (ATI weights .3086 .6094 .0820)" << endl << + "-e dump squared errors for each tile to outroot-errors.bin" << endl; +} + +bool AVPCL::flag_premult = false; +bool AVPCL::flag_nonuniform = false; +bool AVPCL::flag_nonuniform_ati = false; + +bool AVPCL::mode_rgb = false; + +int main(int argc, char* argv[]) +{ + bool noerrfile = true; +#ifdef EXTERNAL_RELEASE + cout << "avpcl/BC7L Targa RGBA Compressor/Decompressor version 1.41 (May 27, 2010)." << endl << + "Bug reports, questions, and suggestions to wdonovan a t nvidia d o t com." << endl; +#endif + try + { + char * args[2]; + int nargs = 0; + + // process flags, copy any non flag arg to args[] + for (int i = 1; i < argc; ++i) + if ((argv[i])[0] == '-') + switch ((argv[i])[1]) { + case 'p': AVPCL::flag_premult = true; break; + case 'n': if ((argv[i])[2] == 'a') { AVPCL::flag_nonuniform_ati = true; AVPCL::flag_nonuniform = false; } + else { AVPCL::flag_nonuniform = true; AVPCL::flag_nonuniform_ati = false; } + break; + case 'e': noerrfile = false; break; + default: throw "bad flag arg"; + } + else + { + if (nargs > 1) throw "Incorrect number of args"; + args[nargs++] = argv[i]; + } + + if (nargs != 2) throw "Incorrect number of args"; + + string inf(args[0]), outroot(args[1]); + + if (ext(outroot, "")) + { + if (ext(inf, ".tga")) + { + int width, height; + + Targa::fileinfo(inf, width, height, AVPCL::mode_rgb); + + string outf, avpclf, errf; + outf = outroot + "-avpcl.tga"; + avpclf = outroot + "-" + toString(width) + "-" + toString(height) + "-" + (AVPCL::mode_rgb ? "RGB" : "RGBA") + ".avpcl"; + cout << "Compressing " << (AVPCL::mode_rgb ? "RGB file " : "RGBA file ") << inf << " to " << avpclf << endl; + if (!noerrfile) + { + errf = outroot + "-errors" + ".bin"; + cout << "Errors output file is " << errf << endl; + } + else + errf = ""; + AVPCL::compress(inf, avpclf, errf); + cout << "Decompressing " << avpclf << " to " << outf << endl; + AVPCL::decompress(avpclf, outf); + analyze(inf, outf); + } + else if (ext(inf, ".avpcl")) + { + string outf; + outf = outroot + "-avpcl.tga"; + cout << "Decompressing " << inf << " to " << outf << endl; + AVPCL::decompress(inf, outf); + } + else throw "Invalid file args"; + } + else if (ext(inf, ".tga") && ext(outroot, ".tga")) + { + analyze(inf, outroot); + } + else throw "Invalid file args"; + + } + catch(const exception& e) + { + // Print error message and usage instructions + cerr << e.what() << endl; + usage(); + return 1; + } + catch(char * msg) + { + cerr << msg << endl; + usage(); + return 1; + } + return 0; +} diff --git a/src/nvtt/bc7/bits.h b/src/nvtt/bc7/bits.h new file mode 100644 index 0000000..3fa4af2 --- /dev/null +++ b/src/nvtt/bc7/bits.h @@ -0,0 +1,73 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +#ifndef _BITS_H +#define _BITS_H + +// read/write a bitstream + +#include + +class Bits +{ +public: + + Bits(char *data, int maxdatabits) { assert (data && maxdatabits > 0); bptr = bend = 0; bits = data; maxbits = maxdatabits; readonly = 0;} + Bits(const char *data, int availdatabits) { assert (data && availdatabits > 0); bptr = 0; bend = availdatabits; cbits = data; maxbits = availdatabits; readonly = 1;} + + void write(int value, int nbits) { + assert (nbits >= 0 && nbits < 32); + assert (sizeof(int)>= 4); + for (int i=0; i>i); + } + int read(int nbits) { + assert (nbits >= 0 && nbits < 32); + assert (sizeof(int)>= 4); + int out = 0; + for (int i=0; i= 0 && ptr < maxbits); bptr = ptr; } + int getsize() { return bend; } + +private: + int bptr; // next bit to read + int bend; // last written bit + 1 + char *bits; // ptr to user bit stream + const char *cbits; // ptr to const user bit stream + int maxbits; // max size of user bit stream + char readonly; // 1 if this is a read-only stream + + int readone() { + assert (bptr < bend); + if (bptr >= bend) return 0; + int bit = (readonly ? cbits[bptr>>3] : bits[bptr>>3]) & (1 << (bptr & 7)); + ++bptr; + return bit != 0; + } + void writeone(int bit) { + if (readonly) + throw "Writing a read-only bit stream"; + assert (bptr < maxbits); + if (bptr >= maxbits) return; + if (bit&1) + bits[bptr>>3] |= 1 << (bptr & 7); + else + bits[bptr>>3] &= ~(1 << (bptr & 7)); + if (bptr++ >= bend) bend = bptr; + } +}; + +#endif \ No newline at end of file diff --git a/src/nvtt/bc7/endpts.h b/src/nvtt/bc7/endpts.h new file mode 100644 index 0000000..0b33eef --- /dev/null +++ b/src/nvtt/bc7/endpts.h @@ -0,0 +1,80 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +#ifndef _ENDPTS_H +#define _ENDPTS_H + +// endpoint definitions and routines to search through endpoint space + +#include "arvo/Vec4.h" + +using namespace ArvoMath; + +#define NCHANNELS_RGB 3 +#define NCHANNELS_RGBA 4 +#define CHANNEL_R 0 +#define CHANNEL_G 1 +#define CHANNEL_B 2 +#define CHANNEL_A 3 + +struct FltEndpts +{ + Vec4 A; + Vec4 B; +}; + +struct IntEndptsRGB +{ + int A[NCHANNELS_RGB]; + int B[NCHANNELS_RGB]; +}; + +struct IntEndptsRGB_1 +{ + int A[NCHANNELS_RGB]; + int B[NCHANNELS_RGB]; + int lsb; // shared lsb for A and B +}; + +struct IntEndptsRGB_2 +{ + int A[NCHANNELS_RGB]; + int B[NCHANNELS_RGB]; + int a_lsb; // lsb for A + int b_lsb; // lsb for B +}; + + +struct IntEndptsRGBA +{ + int A[NCHANNELS_RGBA]; + int B[NCHANNELS_RGBA]; +}; + +struct IntEndptsRGBA_2 +{ + int A[NCHANNELS_RGBA]; + int B[NCHANNELS_RGBA]; + int a_lsb; // lsb for A + int b_lsb; // lsb for B +}; + +struct IntEndptsRGBA_2a +{ + int A[NCHANNELS_RGBA]; + int B[NCHANNELS_RGBA]; + int a_lsb; // lsb for RGB channels of A + int b_lsb; // lsb for RGB channels of A +}; + +#endif + diff --git a/src/nvtt/bc7/rgba.h b/src/nvtt/bc7/rgba.h new file mode 100644 index 0000000..d356a10 --- /dev/null +++ b/src/nvtt/bc7/rgba.h @@ -0,0 +1,27 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +#ifndef _RGBA_H +#define _RGBA_H + +#define RGBA_MIN 0 +#define RGBA_MAX 255 // range of RGBA + +class RGBA +{ +public: + float r, g, b, a; + RGBA(): r(0), g(0), b(0), a(0){} + RGBA(float r, float g, float b, float a): r(r), g(g), b(b), a(a){} +}; + +#endif diff --git a/src/nvtt/bc7/shapes_three.h b/src/nvtt/bc7/shapes_three.h new file mode 100644 index 0000000..c618d22 --- /dev/null +++ b/src/nvtt/bc7/shapes_three.h @@ -0,0 +1,132 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +#ifndef _SHAPES_THREE_H +#define _SHAPES_THREE_H + +// shapes for 3 regions + +#define NREGIONS 3 +#define NSHAPES 64 +#define SHAPEBITS 6 + +static int shapes[NSHAPES*16] = +{ +0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 2, 2, +0, 0, 1, 1, 0, 0, 1, 1, 2, 0, 0, 1, 0, 0, 2, 2, +0, 2, 2, 1, 2, 2, 1, 1, 2, 2, 1, 1, 0, 0, 1, 1, +2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 1, 0, 1, 1, 1, + +0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 1, 1, +0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 1, 1, +1, 1, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1, +1, 1, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1, + +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, +0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 2, +1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 1, 2, +2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 1, 2, + +0, 1, 1, 2, 0, 1, 2, 2, 0, 0, 1, 1, 0, 0, 1, 1, +0, 1, 1, 2, 0, 1, 2, 2, 0, 1, 1, 2, 2, 0, 0, 1, +0, 1, 1, 2, 0, 1, 2, 2, 1, 1, 2, 2, 2, 2, 0, 0, +0, 1, 1, 2, 0, 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 0, + +0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 2, 2, +0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 2, 2, +0, 1, 1, 2, 2, 0, 0, 1, 1, 1, 2, 2, 0, 0, 2, 2, +1, 1, 2, 2, 2, 2, 0, 0, 1, 1, 2, 2, 1, 1, 1, 1, + +0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, +0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, +0, 2, 2, 2, 2, 2, 2, 1, 0, 1, 2, 2, 2, 2, 1, 0, +0, 2, 2, 2, 2, 2, 2, 1, 0, 1, 2, 2, 2, 2, 1, 0, + +0, 1, 2, 2, 0, 0, 1, 2, 0, 1, 1, 0, 0, 0, 0, 0, +0, 1, 2, 2, 0, 0, 1, 2, 1, 2, 2, 1, 0, 1, 1, 0, +0, 0, 1, 1, 1, 1, 2, 2, 1, 2, 2, 1, 1, 2, 2, 1, +0, 0, 0, 0, 2, 2, 2, 2, 0, 1, 1, 0, 1, 2, 2, 1, + +0, 0, 2, 2, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, +1, 1, 0, 2, 0, 1, 1, 0, 0, 1, 2, 2, 2, 0, 0, 0, +1, 1, 0, 2, 2, 0, 0, 2, 0, 1, 2, 2, 2, 2, 1, 1, +0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 1, 1, 2, 2, 2, 1, + +0, 0, 0, 0, 0, 2, 2, 2, 0, 0, 1, 1, 0, 1, 2, 0, +0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 1, 2, 0, 1, 2, 0, +1, 1, 2, 2, 0, 0, 1, 2, 0, 0, 2, 2, 0, 1, 2, 0, +1, 2, 2, 2, 0, 0, 1, 1, 0, 2, 2, 2, 0, 1, 2, 0, + +0, 0, 0, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 0, 1, 1, +1, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 2, 2, 2, 0, 0, +2, 2, 2, 2, 2, 0, 1, 2, 1, 2, 0, 1, 1, 1, 2, 2, +0, 0, 0, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 0, 1, 1, + +0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 2, 2, +1, 1, 2, 2, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 2, 2, +2, 2, 0, 0, 2, 2, 2, 2, 2, 1, 2, 1, 0, 0, 2, 2, +0, 0, 1, 1, 2, 2, 2, 2, 2, 1, 2, 1, 1, 1, 2, 2, + +0, 0, 2, 2, 0, 2, 2, 0, 0, 1, 0, 1, 0, 0, 0, 0, +0, 0, 1, 1, 1, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 1, +0, 0, 2, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 1, 2, 1, +0, 0, 1, 1, 1, 2, 2, 1, 0, 1, 0, 1, 2, 1, 2, 1, + +0, 1, 0, 1, 0, 2, 2, 2, 0, 0, 0, 2, 0, 0, 0, 0, +0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, +0, 1, 0, 1, 0, 2, 2, 2, 0, 0, 0, 2, 2, 1, 1, 2, +2, 2, 2, 2, 0, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, + +0, 2, 2, 2, 0, 0, 0, 2, 0, 1, 1, 0, 0, 0, 0, 0, +0, 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 0, 0, 0, 0, 0, +0, 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 0, 2, 1, 1, 2, +0, 2, 2, 2, 0, 0, 0, 2, 2, 2, 2, 2, 2, 1, 1, 2, + +0, 1, 1, 0, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 0, 0, +0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 0, 0, +2, 2, 2, 2, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 0, 0, +2, 2, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 2, 1, 1, 2, + +0, 0, 0, 2, 0, 2, 2, 2, 0, 1, 0, 1, 0, 1, 1, 1, +0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 1, +0, 0, 0, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, +0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, +}; + +#define REGION(x,y,si) shapes[((si)&3)*4+((si)>>2)*64+(x)+(y)*16] + +static int shapeindex_to_compressed_indices[NSHAPES*3] = +{ + 0, 3,15, 0, 3, 8, 0,15, 8, 0,15, 3, + 0, 8,15, 0, 3,15, 0,15, 3, 0,15, 8, + 0, 8,15, 0, 8,15, 0, 6,15, 0, 6,15, + 0, 6,15, 0, 5,15, 0, 3,15, 0, 3, 8, + + 0, 3,15, 0, 3, 8, 0, 8,15, 0,15, 3, + 0, 3,15, 0, 3, 8, 0, 6,15, 0,10, 8, + 0, 5, 3, 0, 8,15, 0, 8, 6, 0, 6,10, + 0, 8,15, 0, 5,15, 0,15,10, 0,15, 8, + + 0, 8,15, 0,15, 3, 0, 3,15, 0, 5,10, + 0, 6,10, 0,10, 8, 0, 8, 9, 0,15,10, + 0,15, 6, 0, 3,15, 0,15, 8, 0, 5,15, + 0,15, 3, 0,15, 6, 0,15, 6, 0,15, 8, + + 0, 3,15, 0,15, 3, 0, 5,15, 0, 5,15, + 0, 5,15, 0, 8,15, 0, 5,15, 0,10,15, + 0, 5,15, 0,10,15, 0, 8,15, 0,13,15, + 0,15, 3, 0,12,15, 0, 3,15, 0, 3, 8 + +}; +#define SHAPEINDEX_TO_COMPRESSED_INDICES(si,region) shapeindex_to_compressed_indices[(si)*3+(region)] + +#endif diff --git a/src/nvtt/bc7/shapes_two.h b/src/nvtt/bc7/shapes_two.h new file mode 100644 index 0000000..d9a52ef --- /dev/null +++ b/src/nvtt/bc7/shapes_two.h @@ -0,0 +1,133 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +#ifndef _SHAPES_TWO_H +#define _SHAPES_TWO_H + +// shapes for two regions + +#define NREGIONS 2 +#define NSHAPES 64 +#define SHAPEBITS 6 + +static int shapes[NSHAPES*16] = +{ +0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, +0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, +0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, +0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, + +0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, +0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, +0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, +0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, + +0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, +0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, +0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, + +0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + +0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, +1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, +1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, +1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, + +0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, +0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, +0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, +0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, + +0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, +0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, +0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, +0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, + +0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, +0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, +1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, +1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, + +0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, +0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, +0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, +0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, + +0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, +1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, +0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, +1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, + +0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, +0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, +1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, +1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, + +0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, +1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, +1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, +0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, + +0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, +1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, +0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, + +0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, +1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, +1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, +0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, + +0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, +1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, +1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, +1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, + +0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, +1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, +0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, +0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, + +}; + +#define REGION(x,y,si) shapes[((si)&3)*4+((si)>>2)*64+(x)+(y)*16] + +static int shapeindex_to_compressed_indices[NSHAPES*2] = +{ + 0,15, 0,15, 0,15, 0,15, + 0,15, 0,15, 0,15, 0,15, + 0,15, 0,15, 0,15, 0,15, + 0,15, 0,15, 0,15, 0,15, + + 0,15, 0, 2, 0, 8, 0, 2, + 0, 2, 0, 8, 0, 8, 0,15, + 0, 2, 0, 8, 0, 2, 0, 2, + 0, 8, 0, 8, 0, 2, 0, 2, + + 0,15, 0,15, 0, 6, 0, 8, + 0, 2, 0, 8, 0,15, 0,15, + 0, 2, 0, 8, 0, 2, 0, 2, + 0, 2, 0,15, 0,15, 0, 6, + + 0, 6, 0, 2, 0, 6, 0, 8, + 0,15, 0,15, 0, 2, 0, 2, + 0,15, 0,15, 0,15, 0,15, + 0,15, 0, 2, 0, 2, 0,15 + +}; +#define SHAPEINDEX_TO_COMPRESSED_INDICES(si,region) shapeindex_to_compressed_indices[(si)*2+(region)] + +#endif diff --git a/src/nvtt/bc7/targa.cpp b/src/nvtt/bc7/targa.cpp new file mode 100644 index 0000000..18a2ddf --- /dev/null +++ b/src/nvtt/bc7/targa.cpp @@ -0,0 +1,179 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +// Quick and dirty Targa file I/O -- doesn't handle compressed format targa files, though. + +#include +#include + +#include "ImfArray.h" +#include "targa.h" +#include "rgba.h" + +Targa::Targa() {} + +Targa::~Targa() {} + +// read either RGB or RGBA files +static int readTgaHeader(FILE *fp, int& width, int& height, int &bpp, int &origin) +{ + unsigned char hdr[18]; + + if (fread(hdr, sizeof(hdr), 1, fp ) != 1) + return 0; + + if (hdr[2] != 2) + return 0; + + bpp = hdr[16]; + if (bpp != 24 && bpp != 32) + return 0; + + int alphabpp = hdr[17] & 0xF; + origin = (hdr[17] >> 4) & 0x3; + + if (bpp == 24 && alphabpp != 0) + return 0; + if (bpp == 32 && alphabpp != 8) + return 0; + + width = (hdr[13] << 8) | hdr[12]; + height = (hdr[15] << 8) | hdr[14]; + + // skip image ID field + int idsize = hdr[0]; + for (; idsize; --idsize) + (void) getc(fp); + + return 1; +} + +static void read_file(FILE *fp, Imf::Array2D& pixels, int width, int height, int bpp, int origin) +{ + pixels.resizeErase(height, width); + + // bottom to top order + for (int y = 0; y < height; ++y) + for (int x = 0; x < width; ++x) + { + float b = float(getc(fp)); + float g = float(getc(fp)); + float r = float(getc(fp)); + float a = (bpp == 24) ? RGBA_MAX : float(getc(fp)); + + int xt, yt; + + // transform based on origin + switch (origin) + { + case 0: xt = x; yt = height-1-y; break; // bottom left + case 1: xt = width-1-x; yt = y; break; // bottom right + case 2: xt = x; yt = y; break; // top left + case 3: xt = width-1-x; yt = height-1-y; break; // top right + default: throw "impossible origin value"; + } + + pixels[yt][xt].a = a; + pixels[yt][xt].r = r; + pixels[yt][xt].g = g; + pixels[yt][xt].b = b; + } +} + +void Targa::fileinfo(const std::string& filename, int& width, int& height, bool& const_alpha) +{ + int bpp, origin; + + FILE *fp = fopen(filename.c_str(), "rb"); + + if (fp == (FILE *) 0) + throw "Unable to open infile"; + + if (readTgaHeader(fp, width, height, bpp, origin) == 0) + throw "Invalid or unimplemented format for infile, needs to be a 24 or 32 bit uncompressed TGA file"; + + if (bpp == 24) + const_alpha = true; + else + { + // even if file is 32bpp the alpha may still be constant. so read file and check + Imf::Array2D pixels; + + read_file(fp, pixels, width, height, bpp, origin); + + bool const_alpha = true; + + for (int y=0; y& pixels, int& width, int& height) +{ + int bpp, origin; + + FILE *fp = fopen(filename.c_str(), "rb"); + + if (fp == (FILE *) 0) + throw "Unable to open infile"; + + if (readTgaHeader(fp, width, height, bpp, origin) == 0) + throw "Invalid or unimplemented format for infile, needs to be a 24 or 32 bit uncompressed TGA file"; + + read_file(fp, pixels, width, height, bpp, origin); + + fclose(fp); +} + +void Targa::write(const std::string& filename, const Imf::Array2D& pixels, int width, int height) +{ + FILE *fp = fopen(filename.c_str(), "wb"); + + if (fp == (FILE *) 0) + throw "Unable to open outfile"; + + unsigned char hdr[18]; + + // we're lazy, always write this as a 32bpp file, even if the alpha is constant 255 + + memset(hdr, 0, sizeof(hdr)); + hdr[2] = 2; + hdr[12] = width & 0xFF; + hdr[13] = width >> 8; + hdr[14] = height & 0xFF; + hdr[15] = height >> 8; + hdr[16] = 32; + hdr[17] = 0x28; + + fwrite( hdr, sizeof(hdr), 1, fp ); + + // top to bottom order + for (int y = 0; y < height; ++y) + for (int x = 0; x < width; ++x) + { + int a = int((pixels[y][x]).a + 0.5f); + int r = int((pixels[y][x]).r + 0.5f); + int g = int((pixels[y][x]).g + 0.5f); + int b = int((pixels[y][x]).b + 0.5f); + + if (b < RGBA_MIN) b = RGBA_MIN; if (b > RGBA_MAX) b = RGBA_MAX; fputc(b, fp); + if (g < RGBA_MIN) g = RGBA_MIN; if (g > RGBA_MAX) g = RGBA_MAX; fputc(g, fp); + if (r < RGBA_MIN) r = RGBA_MIN; if (r > RGBA_MAX) r = RGBA_MAX; fputc(r, fp); + if (a < RGBA_MIN) a = RGBA_MIN; if (a > RGBA_MAX) a = RGBA_MAX; fputc(a, fp); + } + fclose(fp); +} diff --git a/src/nvtt/bc7/targa.h b/src/nvtt/bc7/targa.h new file mode 100644 index 0000000..995df8e --- /dev/null +++ b/src/nvtt/bc7/targa.h @@ -0,0 +1,30 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +#ifndef _targa_h_ +#define _targa_h_ + +#include "ImfArray.h" +#include "rgba.h" + +class Targa +{ +public: + Targa(); + ~Targa(); + + static void fileinfo( const std::string& filename, int& width, int& height, bool& const_alpha); + static void read( const std::string& filename, Imf::Array2D& pixels, int& width, int& height ); + static void write(const std::string& filename, const Imf::Array2D& pixels, int width, int height ); +}; + +#endif /* _targa_h_ */ diff --git a/src/nvtt/bc7/tile.h b/src/nvtt/bc7/tile.h new file mode 100644 index 0000000..620ae2b --- /dev/null +++ b/src/nvtt/bc7/tile.h @@ -0,0 +1,67 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +#ifndef _TILE_H +#define _TILE_H + +#include "ImfArray.h" +#include +#include "arvo/Vec4.h" +#include "utils.h" +#include "rgba.h" + +using namespace Imf; +using namespace ArvoMath; + +// extract a tile of pixels from an array + +class Tile +{ +public: + static const int TILE_H = 4; + static const int TILE_W = 4; + static const int TILE_TOTAL = TILE_H * TILE_W; + Vec4 data[TILE_H][TILE_W]; + int size_x, size_y; // actual size of tile + + Tile() {}; + ~Tile(){}; + Tile(int xs, int ys) {size_x = xs; size_y = ys;} + + // pixels -> tile + void inline insert(const Array2D &pixels, int x, int y) + { + for (int y0=0; y0 pixels + void inline extract(Array2D &pixels, int x, int y) + { + for (int y0=0; y0 +#include +#include "rgba.h" +#include "arvo/Vec3.h" +#include "arvo/Vec4.h" + +static int denom7_weights[] = {0, 9, 18, 27, 37, 46, 55, 64}; // divided by 64 +static int denom15_weights[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64}; // divided by 64 + +int Utils::lerp(int a, int b, int i, int bias, int denom) +{ +#ifdef USE_ZOH_INTERP + assert (denom == 3 || denom == 7 || denom == 15); + assert (i >= 0 && i <= denom); + assert (bias >= 0 && bias <= denom/2); + assert (a >= 0 && b >= 0); + + int round = 0; +#ifdef USE_ZOH_INTERP_ROUNDED + round = 32; +#endif + + switch (denom) + { + case 3: denom *= 5; i *= 5; // fall through to case 15 + case 15:return (a*denom15_weights[denom-i] + b*denom15_weights[i] + round) >> 6; + case 7: return (a*denom7_weights[denom-i] + b*denom7_weights[i] + round) >> 6; + default: assert(0); return 0; + } +#else + return (((a)*((denom)-i)+(b)*(i)+(bias))/(denom)); // simple exact interpolation +#endif +} + +Vec4 Utils::lerp(const Vec4& a, const Vec4 &b, int i, int bias, int denom) +{ +#ifdef USE_ZOH_INTERP + assert (denom == 3 || denom == 7 || denom == 15); + assert (i >= 0 && i <= denom); + assert (bias >= 0 && bias <= denom/2); +// assert (a >= 0 && b >= 0); + + // no need to bias these as this is an exact division + + switch (denom) + { + case 3: denom *= 5; i *= 5; // fall through to case 15 + case 15:return (a*denom15_weights[denom-i] + b*denom15_weights[i]) / 64.0; + case 7: return (a*denom7_weights[denom-i] + b*denom7_weights[i]) / 64.0; + default: assert(0); return 0; + } +#else + return (((a)*((denom)-i)+(b)*(i)+(bias))/(denom)); // simple exact interpolation +#endif +} + + +int Utils::unquantize(int q, int prec) +{ + int unq; + + assert (prec > 3); // we only want to do one replicate + assert (RGBA_MIN == 0); + +#ifdef USE_ZOH_QUANT + if (prec >= 8) + unq = q; + else if (q == 0) + unq = 0; + else if (q == ((1<> prec; +#else + // avpcl unquantizer -- bit replicate + unq = (q << (8-prec)) | (q >> (2*prec-8)); +#endif + + return unq; +} + +// quantize to the best value -- i.e., minimize unquantize error +int Utils::quantize(float value, int prec) +{ + int q, unq; + + assert (prec > 3); // we only want to do one replicate + assert (RGBA_MIN == 0); + + unq = (int)floor(value + 0.5); + assert (unq >= RGBA_MIN && unq <= RGBA_MAX); + +#ifdef USE_ZOH_QUANT + q = (prec >= 8) ? unq : (unq << prec) / (RGBA_MAX+1); +#else + // avpcl quantizer -- scale properly for best possible bit-replicated result + q = (unq * ((1<= 0 && q < (1 << prec)); + + return q; +} + +double Utils::metric4(const Vec4& a, const Vec4& b) +{ + Vec4 err = a - b; + + // if nonuniform, select weights and weigh away + if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati) + { + double rwt, gwt, bwt; + if (AVPCL::flag_nonuniform) + { + rwt = 0.299; gwt = 0.587; bwt = 0.114; + } + else if (AVPCL::flag_nonuniform_ati) + { + rwt = 0.3086; gwt = 0.6094; bwt = 0.0820; + } + + // weigh the components + err.X() *= rwt; + err.Y() *= gwt; + err.Z() *= bwt; + } + + return err * err; +} + +// WORK -- implement rotatemode for the below -- that changes where the rwt, gwt, and bwt's go. +double Utils::metric3(const Vec3& a, const Vec3& b, int rotatemode) +{ + Vec3 err = a - b; + + // if nonuniform, select weights and weigh away + if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati) + { + double rwt, gwt, bwt; + if (AVPCL::flag_nonuniform) + { + rwt = 0.299; gwt = 0.587; bwt = 0.114; + } + else if (AVPCL::flag_nonuniform_ati) + { + rwt = 0.3086; gwt = 0.6094; bwt = 0.0820; + } + + // adjust weights based on rotatemode + switch(rotatemode) + { + case ROTATEMODE_RGBA_RGBA: break; + case ROTATEMODE_RGBA_AGBR: rwt = 1.0; break; + case ROTATEMODE_RGBA_RABG: gwt = 1.0; break; + case ROTATEMODE_RGBA_RGAB: bwt = 1.0; break; + default: assert(0); + } + + // weigh the components + err.X() *= rwt; + err.Y() *= gwt; + err.Z() *= bwt; + } + + return err * err; +} + +double Utils::metric1(const float a, const float b, int rotatemode) +{ + float err = a - b; + + // if nonuniform, select weights and weigh away + if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati) + { + double rwt, gwt, bwt, awt; + if (AVPCL::flag_nonuniform) + { + rwt = 0.299; gwt = 0.587; bwt = 0.114; + } + else if (AVPCL::flag_nonuniform_ati) + { + rwt = 0.3086; gwt = 0.6094; bwt = 0.0820; + } + + // adjust weights based on rotatemode + switch(rotatemode) + { + case ROTATEMODE_RGBA_RGBA: awt = 1.0; break; + case ROTATEMODE_RGBA_AGBR: awt = rwt; break; + case ROTATEMODE_RGBA_RABG: awt = gwt; break; + case ROTATEMODE_RGBA_RGAB: awt = bwt; break; + default: assert(0); + } + + // weigh the components + err *= awt; + } + + return err * err; +} + +float Utils::premult(float r, float a) +{ + // note that the args are really integers stored in floats + int R = r, A = a; + + assert ((R==r) && (A==a)); + + return float((R*A + RGBA_MAX/2)/RGBA_MAX); +} + +static void premult4(Vec4& rgba) +{ + rgba.X() = Utils::premult(rgba.X(), rgba.W()); + rgba.Y() = Utils::premult(rgba.Y(), rgba.W()); + rgba.Z() = Utils::premult(rgba.Z(), rgba.W()); +} + +static void premult3(Vec3& rgb, float a) +{ + rgb.X() = Utils::premult(rgb.X(), a); + rgb.Y() = Utils::premult(rgb.Y(), a); + rgb.Z() = Utils::premult(rgb.Z(), a); +} + +double Utils::metric4premult(const Vec4& a, const Vec4& b) +{ + Vec4 pma = a, pmb = b; + + premult4(pma); + premult4(pmb); + + Vec4 err = pma - pmb; + + // if nonuniform, select weights and weigh away + if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati) + { + double rwt, gwt, bwt; + if (AVPCL::flag_nonuniform) + { + rwt = 0.299; gwt = 0.587; bwt = 0.114; + } + else if (AVPCL::flag_nonuniform_ati) + { + rwt = 0.3086; gwt = 0.6094; bwt = 0.0820; + } + + // weigh the components + err.X() *= rwt; + err.Y() *= gwt; + err.Z() *= bwt; + } + + return err * err; +} + +double Utils::metric3premult_alphaout(const Vec3& rgb0, float a0, const Vec3& rgb1, float a1) +{ + Vec3 pma = rgb0, pmb = rgb1; + + premult3(pma, a0); + premult3(pmb, a1); + + Vec3 err = pma - pmb; + + // if nonuniform, select weights and weigh away + if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati) + { + double rwt, gwt, bwt; + if (AVPCL::flag_nonuniform) + { + rwt = 0.299; gwt = 0.587; bwt = 0.114; + } + else if (AVPCL::flag_nonuniform_ati) + { + rwt = 0.3086; gwt = 0.6094; bwt = 0.0820; + } + + // weigh the components + err.X() *= rwt; + err.Y() *= gwt; + err.Z() *= bwt; + } + + return err * err; +} + +double Utils::metric3premult_alphain(const Vec3& rgb0, const Vec3& rgb1, int rotatemode) +{ + Vec3 pma = rgb0, pmb = rgb1; + + switch(rotatemode) + { + case ROTATEMODE_RGBA_RGBA: + // this function isn't supposed to be called for this rotatemode + assert(0); + break; + case ROTATEMODE_RGBA_AGBR: + pma.Y() = premult(pma.Y(), pma.X()); + pma.Z() = premult(pma.Z(), pma.X()); + pmb.Y() = premult(pmb.Y(), pmb.X()); + pmb.Z() = premult(pmb.Z(), pmb.X()); + break; + case ROTATEMODE_RGBA_RABG: + pma.X() = premult(pma.X(), pma.Y()); + pma.Z() = premult(pma.Z(), pma.Y()); + pmb.X() = premult(pmb.X(), pmb.Y()); + pmb.Z() = premult(pmb.Z(), pmb.Y()); + break; + case ROTATEMODE_RGBA_RGAB: + pma.X() = premult(pma.X(), pma.Z()); + pma.Y() = premult(pma.Y(), pma.Z()); + pmb.X() = premult(pmb.X(), pmb.Z()); + pmb.Y() = premult(pmb.Y(), pmb.Z()); + break; + default: assert(0); + } + + Vec3 err = pma - pmb; + + // if nonuniform, select weights and weigh away + if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati) + { + double rwt, gwt, bwt; + if (AVPCL::flag_nonuniform) + { + rwt = 0.299; gwt = 0.587; bwt = 0.114; + } + else if (AVPCL::flag_nonuniform_ati) + { + rwt = 0.3086; gwt = 0.6094; bwt = 0.0820; + } + + // weigh the components + err.X() *= rwt; + err.Y() *= gwt; + err.Z() *= bwt; + } + + return err * err; +} + +double Utils::metric1premult(float rgb0, float a0, float rgb1, float a1, int rotatemode) +{ + float err = premult(rgb0, a0) - premult(rgb1, a1); + + // if nonuniform, select weights and weigh away + if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati) + { + double rwt, gwt, bwt, awt; + if (AVPCL::flag_nonuniform) + { + rwt = 0.299; gwt = 0.587; bwt = 0.114; + } + else if (AVPCL::flag_nonuniform_ati) + { + rwt = 0.3086; gwt = 0.6094; bwt = 0.0820; + } + + // adjust weights based on rotatemode + switch(rotatemode) + { + case ROTATEMODE_RGBA_RGBA: awt = 1.0; break; + case ROTATEMODE_RGBA_AGBR: awt = rwt; break; + case ROTATEMODE_RGBA_RABG: awt = gwt; break; + case ROTATEMODE_RGBA_RGAB: awt = bwt; break; + default: assert(0); + } + + // weigh the components + err *= awt; + } + + return err * err; +} diff --git a/src/nvtt/bc7/utils.h b/src/nvtt/bc7/utils.h new file mode 100644 index 0000000..5c08ffd --- /dev/null +++ b/src/nvtt/bc7/utils.h @@ -0,0 +1,69 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +// utility class holding common routines +#ifndef _UTILS_H +#define _UTILS_H + +#include "arvo/Vec4.h" + +using namespace ArvoMath; + +#ifndef MIN +#define MIN(x,y) ((x)<(y)?(x):(y)) +#endif + +#ifndef MAX +#define MAX(x,y) ((x)>(y)?(x):(y)) +#endif + +#define PALETTE_LERP(a, b, i, bias, denom) Utils::lerp(a, b, i, bias, denom) + +#define SIGN_EXTEND(x,nb) ((((x)&(1<<((nb)-1)))?((~0)<<(nb)):0)|(x)) + +#define INDEXMODE_BITS 1 // 2 different index modes +#define NINDEXMODES (1<<(INDEXMODE_BITS)) +#define INDEXMODE_ALPHA_IS_3BITS 0 +#define INDEXMODE_ALPHA_IS_2BITS 1 + +#define ROTATEMODE_BITS 2 // 4 different rotate modes +#define NROTATEMODES (1<<(ROTATEMODE_BITS)) +#define ROTATEMODE_RGBA_RGBA 0 +#define ROTATEMODE_RGBA_AGBR 1 +#define ROTATEMODE_RGBA_RABG 2 +#define ROTATEMODE_RGBA_RGAB 3 + +class Utils +{ +public: + // error metrics + static double metric4(const Vec4& a, const Vec4& b); + static double metric3(const Vec3& a, const Vec3& b, int rotatemode); + static double metric1(float a, float b, int rotatemode); + + static double metric4premult(const Vec4& rgba0, const Vec4& rgba1); + static double metric3premult_alphaout(const Vec3& rgb0, float a0, const Vec3& rgb1, float a1); + static double metric3premult_alphain(const Vec3& rgb0, const Vec3& rgb1, int rotatemode); + static double metric1premult(float rgb0, float a0, float rgb1, float a1, int rotatemode); + + static float Utils::premult(float r, float a); + + // quantization and unquantization + static int unquantize(int q, int prec); + static int quantize(float value, int prec); + + // lerping + static int lerp(int a, int b, int i, int bias, int denom); + static Vec4 lerp(const Vec4& a, const Vec4 &b, int i, int bias, int denom); +}; + +#endif \ No newline at end of file