Ported over James Arvo's SVD code from ZOH. It's a good deal slower than the eigensolver, and no better in RMSE - so didn't use it, but left the code in place for future reference.

Also replaced doubles with floats in the eigensolver code.  Speeds up BC6 another 5%.  No change to RMSE in test suite.
This commit is contained in:
nathaniel.reed@gmail.com 2013-12-05 02:13:17 +00:00
parent 474239c784
commit f2fa0517b5
2 changed files with 346 additions and 23 deletions

View File

@ -7,6 +7,7 @@
#include "nvcore/Utils.h" // max, swap #include "nvcore/Utils.h" // max, swap
#include <float.h> // FLT_MAX #include <float.h> // FLT_MAX
#include <vector>
using namespace nv; using namespace nv;
@ -187,6 +188,29 @@ Vector3 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector3 *__r
return firstEigenVector_EigenSolver(matrix); return firstEigenVector_EigenSolver(matrix);
} }
void ArvoSVD(int rows, int cols, float * Q, float * diag, float * R);
Vector3 nv::Fit::computePrincipalComponent_SVD(int n, const Vector3 *__restrict points)
{
// Store the points in an n x n matrix
std::vector<float> Q(n*n, 0.0f);
for (int i = 0; i < n; ++i)
{
Q[i*n+0] = points[i].x;
Q[i*n+1] = points[i].y;
Q[i*n+2] = points[i].z;
}
// Alloc space for the SVD outputs
std::vector<float> diag(n, 0.0f);
std::vector<float> R(n*n, 0.0f);
ArvoSVD(n, n, &Q[0], &diag[0], &R[0]);
// Get the principal component
return Vector3(R[0], R[1], R[2]);
}
Plane nv::Fit::bestPlane(int n, const Vector3 *__restrict points) Plane nv::Fit::bestPlane(int n, const Vector3 *__restrict points)
@ -232,16 +256,16 @@ bool nv::Fit::isPlanar(int n, const Vector3 * points, float epsilon/*=NV_EPSILON
// Householder transforms followed by QL decomposition. // Householder transforms followed by QL decomposition.
// Seems to be based on the code from Numerical Recipes in C. // Seems to be based on the code from Numerical Recipes in C.
static void EigenSolver_Tridiagonal(double mat[3][3],double * diag,double * subd); static void EigenSolver_Tridiagonal(float mat[3][3],float * diag,float * subd);
static bool EigenSolver_QLAlgorithm(double mat[3][3],double * diag,double * subd); static bool EigenSolver_QLAlgorithm(float mat[3][3],float * diag,float * subd);
bool nv::Fit::eigenSolveSymmetric(const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3]) bool nv::Fit::eigenSolveSymmetric(const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3])
{ {
nvDebugCheck(matrix != NULL && eigenValues != NULL && eigenVectors != NULL); nvDebugCheck(matrix != NULL && eigenValues != NULL && eigenVectors != NULL);
double subd[3]; float subd[3];
double diag[3]; float diag[3];
double work[3][3]; float work[3][3];
work[0][0] = matrix[0]; work[0][0] = matrix[0];
work[0][1] = work[1][0] = matrix[1]; work[0][1] = work[1][0] = matrix[1];
@ -297,7 +321,7 @@ bool nv::Fit::eigenSolveSymmetric(const float matrix[6], float eigenValues[3], V
return true; return true;
} }
static void EigenSolver_Tridiagonal(double mat[3][3],double * diag,double * subd) static void EigenSolver_Tridiagonal(float mat[3][3],float * diag,float * subd)
{ {
// Householder reduction T = Q^t M Q // Householder reduction T = Q^t M Q
// Input: // Input:
@ -306,23 +330,23 @@ static void EigenSolver_Tridiagonal(double mat[3][3],double * diag,double * subd
// mat, orthogonal matrix Q // mat, orthogonal matrix Q
// diag, diagonal entries of T // diag, diagonal entries of T
// subd, subdiagonal entries of T (T is symmetric) // subd, subdiagonal entries of T (T is symmetric)
const double epsilon = 1e-08f; const float epsilon = 1e-08f;
double a = mat[0][0]; float a = mat[0][0];
double b = mat[0][1]; float b = mat[0][1];
double c = mat[0][2]; float c = mat[0][2];
double d = mat[1][1]; float d = mat[1][1];
double e = mat[1][2]; float e = mat[1][2];
double f = mat[2][2]; float f = mat[2][2];
diag[0] = a; diag[0] = a;
subd[2] = 0.f; subd[2] = 0.f;
if ( fabs(c) >= epsilon ) if ( fabs(c) >= epsilon )
{ {
const double ell = sqrt(b*b+c*c); const float ell = sqrtf(b*b+c*c);
b /= ell; b /= ell;
c /= ell; c /= ell;
const double q = 2*b*e+c*(f-d); const float q = 2*b*e+c*(f-d);
diag[1] = d+c*q; diag[1] = d+c*q;
diag[2] = f-c*q; diag[2] = f-c*q;
subd[0] = ell; subd[0] = ell;
@ -343,7 +367,7 @@ static void EigenSolver_Tridiagonal(double mat[3][3],double * diag,double * subd
} }
} }
static bool EigenSolver_QLAlgorithm(double mat[3][3],double * diag,double * subd) static bool EigenSolver_QLAlgorithm(float mat[3][3],float * diag,float * subd)
{ {
// QL iteration with implicit shifting to reduce matrix from tridiagonal // QL iteration with implicit shifting to reduce matrix from tridiagonal
// to diagonal // to diagonal
@ -357,34 +381,34 @@ static bool EigenSolver_QLAlgorithm(double mat[3][3],double * diag,double * subd
int m; int m;
for (m = ell; m <= 1; m++) for (m = ell; m <= 1; m++)
{ {
double dd = fabs(diag[m]) + fabs(diag[m+1]); float dd = fabs(diag[m]) + fabs(diag[m+1]);
if ( fabs(subd[m]) + dd == dd ) if ( fabs(subd[m]) + dd == dd )
break; break;
} }
if ( m == ell ) if ( m == ell )
break; break;
double g = (diag[ell+1]-diag[ell])/(2*subd[ell]); float g = (diag[ell+1]-diag[ell])/(2*subd[ell]);
double r = sqrt(g*g+1); float r = sqrtf(g*g+1);
if ( g < 0 ) if ( g < 0 )
g = diag[m]-diag[ell]+subd[ell]/(g-r); g = diag[m]-diag[ell]+subd[ell]/(g-r);
else else
g = diag[m]-diag[ell]+subd[ell]/(g+r); g = diag[m]-diag[ell]+subd[ell]/(g+r);
double s = 1, c = 1, p = 0; float s = 1, c = 1, p = 0;
for (int i = m-1; i >= ell; i--) for (int i = m-1; i >= ell; i--)
{ {
double f = s*subd[i], b = c*subd[i]; float f = s*subd[i], b = c*subd[i];
if ( fabs(f) >= fabs(g) ) if ( fabs(f) >= fabs(g) )
{ {
c = g/f; c = g/f;
r = sqrt(c*c+1); r = sqrtf(c*c+1);
subd[i+1] = f*r; subd[i+1] = f*r;
c *= (s = 1/r); c *= (s = 1/r);
} }
else else
{ {
s = f/g; s = f/g;
r = sqrt(s*s+1); r = sqrtf(s*s+1);
subd[i+1] = g*r; subd[i+1] = g*r;
s *= (c = 1/r); s *= (c = 1/r);
} }
@ -504,3 +528,300 @@ int nv::Fit::compute4Means(int n, const Vector3 *__restrict points, const float
} }
} }
// Adaptation of James Arvo's SVD code, as found in ZOH.
inline float Sqr(float x) { return x*x; }
inline float svd_pythag( float a, float b )
{
float at = fabsf(a);
float bt = fabsf(b);
if( at > bt )
return at * sqrtf( 1.0f + Sqr( bt / at ) );
else if( bt > 0.0f )
return bt * sqrtf( 1.0f + Sqr( at / bt ) );
else return 0.0f;
}
inline float SameSign( float a, float b )
{
float t;
if( b >= 0.0f ) t = fabsf( a );
else t = -fabsf( a );
return t;
}
void ArvoSVD(int rows, int cols, float * Q, float * diag, float * R)
{
static const int MaxIterations = 30;
int i, j, k, l, p, q, iter;
float c, f, h, s, x, y, z;
float norm = 0.0f;
float g = 0.0f;
float scale = 0.0f;
std::vector<float> temp(cols, 0.0f);
for( i = 0; i < cols; i++ )
{
temp[i] = scale * g;
scale = 0.0f;
g = 0.0f;
s = 0.0f;
l = i + 1;
if( i < rows )
{
for( k = i; k < rows; k++ ) scale += fabsf( Q[k*cols+i] );
if( scale != 0.0f )
{
for( k = i; k < rows; k++ )
{
Q[k*cols+i] /= scale;
s += Sqr( Q[k*cols+i] );
}
f = Q[i*cols+i];
g = -SameSign( sqrtf(s), f );
h = f * g - s;
Q[i*cols+i] = f - g;
if( i != cols - 1 )
{
for( j = l; j < cols; j++ )
{
s = 0.0f;
for( k = i; k < rows; k++ ) s += Q[k*cols+i] * Q[k*cols+j];
f = s / h;
for( k = i; k < rows; k++ ) Q[k*cols+j] += f * Q[k*cols+i];
}
}
for( k = i; k < rows; k++ ) Q[k*cols+i] *= scale;
}
}
diag[i] = scale * g;
g = 0.0f;
s = 0.0f;
scale = 0.0f;
if( i < rows && i != cols - 1 )
{
for( k = l; k < cols; k++ ) scale += fabsf( Q[i*cols+k] );
if( scale != 0.0f )
{
for( k = l; k < cols; k++ )
{
Q[i*cols+k] /= scale;
s += Sqr( Q[i*cols+k] );
}
f = Q[i*cols+l];
g = -SameSign( sqrtf(s), f );
h = f * g - s;
Q[i*cols+l] = f - g;
for( k = l; k < cols; k++ ) temp[k] = Q[i*cols+k] / h;
if( i != rows - 1 )
{
for( j = l; j < rows; j++ )
{
s = 0.0f;
for( k = l; k < cols; k++ ) s += Q[j*cols+k] * Q[i*cols+k];
for( k = l; k < cols; k++ ) Q[j*cols+k] += s * temp[k];
}
}
for( k = l; k < cols; k++ ) Q[i*cols+k] *= scale;
}
}
norm = max( norm, fabsf( diag[i] ) + fabsf( temp[i] ) );
}
for( i = cols - 1; i >= 0; i-- )
{
if( i < cols - 1 )
{
if( g != 0.0f )
{
for( j = l; j < cols; j++ ) R[i*cols+j] = ( Q[i*cols+j] / Q[i*cols+l] ) / g;
for( j = l; j < cols; j++ )
{
s = 0.0f;
for( k = l; k < cols; k++ ) s += Q[i*cols+k] * R[j*cols+k];
for( k = l; k < cols; k++ ) R[j*cols+k] += s * R[i*cols+k];
}
}
for( j = l; j < cols; j++ )
{
R[i*cols+j] = 0.0f;
R[j*cols+i] = 0.0f;
}
}
R[i*cols+i] = 1.0f;
g = temp[i];
l = i;
}
for( i = cols - 1; i >= 0; i-- )
{
l = i + 1;
g = diag[i];
if( i < cols - 1 ) for( j = l; j < cols; j++ ) Q[i*cols+j] = 0.0f;
if( g != 0.0f )
{
g = 1.0f / g;
if( i != cols - 1 )
{
for( j = l; j < cols; j++ )
{
s = 0.0f;
for( k = l; k < rows; k++ ) s += Q[k*cols+i] * Q[k*cols+j];
f = ( s / Q[i*cols+i] ) * g;
for( k = i; k < rows; k++ ) Q[k*cols+j] += f * Q[k*cols+i];
}
}
for( j = i; j < rows; j++ ) Q[j*cols+i] *= g;
}
else
{
for( j = i; j < rows; j++ ) Q[j*cols+i] = 0.0f;
}
Q[i*cols+i] += 1.0f;
}
for( k = cols - 1; k >= 0; k-- )
{
for( iter = 1; iter <= MaxIterations; iter++ )
{
int jump;
for( l = k; l >= 0; l-- )
{
q = l - 1;
if( fabsf( temp[l] ) + norm == norm ) { jump = 1; break; }
if( fabsf( diag[q] ) + norm == norm ) { jump = 0; break; }
}
if( !jump )
{
c = 0.0f;
s = 1.0f;
for( i = l; i <= k; i++ )
{
f = s * temp[i];
temp[i] *= c;
if( fabsf( f ) + norm == norm ) break;
g = diag[i];
h = svd_pythag( f, g );
diag[i] = h;
h = 1.0f / h;
c = g * h;
s = -f * h;
for( j = 0; j < rows; j++ )
{
y = Q[j*cols+q];
z = Q[j*cols+i];
Q[j*cols+q] = y * c + z * s;
Q[j*cols+i] = z * c - y * s;
}
}
}
z = diag[k];
if( l == k )
{
if( z < 0.0f )
{
diag[k] = -z;
for( j = 0; j < cols; j++ ) R[k*cols+j] *= -1.0f;
}
break;
}
if( iter >= MaxIterations ) return;
x = diag[l];
q = k - 1;
y = diag[q];
g = temp[q];
h = temp[k];
f = ( ( y - z ) * ( y + z ) + ( g - h ) * ( g + h ) ) / ( 2.0f * h * y );
g = svd_pythag( f, 1.0f );
f = ( ( x - z ) * ( x + z ) + h * ( ( y / ( f + SameSign( g, f ) ) ) - h ) ) / x;
c = 1.0f;
s = 1.0f;
for( j = l; j <= q; j++ )
{
i = j + 1;
g = temp[i];
y = diag[i];
h = s * g;
g = c * g;
z = svd_pythag( f, h );
temp[j] = z;
c = f / z;
s = h / z;
f = x * c + g * s;
g = g * c - x * s;
h = y * s;
y = y * c;
for( p = 0; p < cols; p++ )
{
x = R[j*cols+p];
z = R[i*cols+p];
R[j*cols+p] = x * c + z * s;
R[i*cols+p] = z * c - x * s;
}
z = svd_pythag( f, h );
diag[j] = z;
if( z != 0.0f )
{
z = 1.0f / z;
c = f * z;
s = h * z;
}
f = c * g + s * y;
x = c * y - s * g;
for( p = 0; p < rows; p++ )
{
y = Q[p*cols+j];
z = Q[p*cols+i];
Q[p*cols+j] = y * c + z * s;
Q[p*cols+i] = z * c - y * s;
}
}
temp[l] = 0.0f;
temp[k] = f;
diag[k] = x;
}
}
// Sort the singular values into descending order.
for( i = 0; i < cols - 1; i++ )
{
float biggest = diag[i]; // Biggest singular value so far.
int bindex = i; // The row/col it occurred in.
for( j = i + 1; j < cols; j++ )
{
if( diag[j] > biggest )
{
biggest = diag[j];
bindex = j;
}
}
if( bindex != i ) // Need to swap rows and columns.
{
// Swap columns in Q.
for (int j = 0; j < rows; ++j)
swap(Q[j*cols+i], Q[j*cols+bindex]);
// Swap rows in R.
for (int j = 0; j < rows; ++j)
swap(R[i*cols+j], R[bindex*cols+j]);
// Swap elements in diag.
swap(diag[i], diag[bindex]);
}
}
}

View File

@ -23,6 +23,8 @@ namespace nv
Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points); Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points);
Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points, const float * weights, const Vector3 & metric); Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points, const float * weights, const Vector3 & metric);
Vector3 computePrincipalComponent_SVD(int n, const Vector3 * points);
Plane bestPlane(int n, const Vector3 * points); Plane bestPlane(int n, const Vector3 * points);
bool isPlanar(int n, const Vector3 * points, float epsilon = NV_EPSILON); bool isPlanar(int n, const Vector3 * points, float epsilon = NV_EPSILON);