Add BC6 support to nvtt lib and utils.

- Use 3x3 eigensolver for initial fit in ZOH. Slightly better perf and RMSE than power method. - Remove use of double precision in ZOH - speeds up by 12%. - Fixed RGBM encoding that was broken for HDR images. - Use gamma-2.0 space for RGBM for HDR images (improves precision in darks). - Use UNORM instead of TYPELESS formats when saving a DX10 .dds file. The TYPELESS formats break most viewers. - Cleaned up warnings in ZOH code. - Command-line utils will warn if you give them an unrecognized parameter. - Added VS2010 profiling results.
2013-10-25 17:30:55 +00:00
parent 77188a42ac
commit 474239c784
43 changed files with 1610 additions and 1161 deletions
--- a/src/nvmath/Fitting.cpp
+++ b/src/nvmath/Fitting.cpp
@ -13,7 +13,7 @@ using namespace nv;
 // @@ Move to EigenSolver.h

 // @@ We should be able to do something cheaper...
-static Vector3 estimatePrincipleComponent(const float * __restrict matrix)
+static Vector3 estimatePrincipalComponent(const float * __restrict matrix)
 {
 	const Vector3 row0(matrix[0], matrix[1], matrix[2]);
 	const Vector3 row1(matrix[1], matrix[3], matrix[4]);
@ -36,7 +36,7 @@ static inline Vector3 firstEigenVector_PowerMethod(const float *__restrict matri
        return Vector3(0.0f);
    }

-    Vector3 v = estimatePrincipleComponent(matrix);
+    Vector3 v = estimatePrincipalComponent(matrix);

    const int NUM = 8;
    for (int i = 0; i < NUM; i++)
@ -136,7 +136,7 @@ Vector3 nv::Fit::computeCovariance(int n, const Vector3 *__restrict points, cons
    return centroid;
 }

-Vector3 nv::Fit::computePrincipalComponent(int n, const Vector3 *__restrict points)
+Vector3 nv::Fit::computePrincipalComponent_PowerMethod(int n, const Vector3 *__restrict points)
 {
    float matrix[6];
    computeCovariance(n, points, matrix);
@ -144,7 +144,7 @@ Vector3 nv::Fit::computePrincipalComponent(int n, const Vector3 *__restrict poin
    return firstEigenVector_PowerMethod(matrix);
 }

-Vector3 nv::Fit::computePrincipalComponent(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric)
+Vector3 nv::Fit::computePrincipalComponent_PowerMethod(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric)
 {
    float matrix[6];
    computeCovariance(n, points, weights, metric, matrix);
@ -153,6 +153,42 @@ Vector3 nv::Fit::computePrincipalComponent(int n, const Vector3 *__restrict poin
 }


+
+static inline Vector3 firstEigenVector_EigenSolver(const float *__restrict matrix)
+{
+    if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0)
+    {
+        return Vector3(0.0f);
+    }
+
+    float eigenValues[3];
+    Vector3 eigenVectors[3];
+	if (!nv::Fit::eigenSolveSymmetric(matrix, eigenValues, eigenVectors))
+	{
+		return Vector3(0.0f);
+	}
+
+	return eigenVectors[0];
+}
+
+Vector3 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector3 *__restrict points)
+{
+    float matrix[6];
+    computeCovariance(n, points, matrix);
+
+    return firstEigenVector_EigenSolver(matrix);
+}
+
+Vector3 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric)
+{
+    float matrix[6];
+    computeCovariance(n, points, weights, metric, matrix);
+
+    return firstEigenVector_EigenSolver(matrix);
+}
+
+
+
 Plane nv::Fit::bestPlane(int n, const Vector3 *__restrict points)
 {
    // compute the centroid and covariance
@ -199,7 +235,7 @@ bool nv::Fit::isPlanar(int n, const Vector3 * points, float epsilon/*=NV_EPSILON
 static void EigenSolver_Tridiagonal(double mat[3][3],double * diag,double * subd);
 static bool EigenSolver_QLAlgorithm(double mat[3][3],double * diag,double * subd);

-bool nv::Fit::eigenSolveSymmetric(float matrix[6], float eigenValues[3], Vector3 eigenVectors[3])
+bool nv::Fit::eigenSolveSymmetric(const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3])
 {
    nvDebugCheck(matrix != NULL && eigenValues != NULL && eigenVectors != NULL);

--- a/src/nvmath/Fitting.h
+++ b/src/nvmath/Fitting.h
@ -17,13 +17,16 @@ namespace nv
        Vector3 computeCovariance(int n, const Vector3 * points, float * covariance);
        Vector3 computeCovariance(int n, const Vector3 * points, const float * weights, const Vector3 & metric, float * covariance);

-        Vector3 computePrincipalComponent(int n, const Vector3 * points);
-        Vector3 computePrincipalComponent(int n, const Vector3 * points, const float * weights, const Vector3 & metric);
+        Vector3 computePrincipalComponent_PowerMethod(int n, const Vector3 * points);
+        Vector3 computePrincipalComponent_PowerMethod(int n, const Vector3 * points, const float * weights, const Vector3 & metric);
+
+        Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points);
+        Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points, const float * weights, const Vector3 & metric);

        Plane bestPlane(int n, const Vector3 * points);
        bool isPlanar(int n, const Vector3 * points, float epsilon = NV_EPSILON);

-        bool eigenSolveSymmetric (float matrix[6], float eigenValues[3], Vector3 eigenVectors[3]);
+        bool eigenSolveSymmetric (const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3]);


        // Returns number of clusters [1-4].
--- a/src/nvmath/Half.h
+++ b/src/nvmath/Half.h
@ -23,7 +23,9 @@ namespace nv {
    // http://www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
    inline uint32 fast_half_to_float(uint16 h)
    {
-        nvDebugCheck(mantissa_table[0] == 0); // Make sure table was initialized.
+		// Initialize table if necessary.
+		if (mantissa_table[0] != 0)
+			half_init_tables();
 	    uint exp = h >> 10;
 	    return mantissa_table[offset_table[exp] + (h & 0x3ff)] + exponent_table[exp];
    }