Merge private branch.

This commit is contained in:
castano 2008-04-17 06:58:43 +00:00
parent cb91740591
commit 17a4f765fb
20 changed files with 2440 additions and 43 deletions

View File

@ -49,7 +49,7 @@ void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/)
return;
}
}
normal = ::normalize(normal, epsilon);
normal = nv::normalize(normal, epsilon);
tangent -= normal * dot(normal, tangent);
bitangent -= normal * dot(normal, bitangent);
@ -68,7 +68,8 @@ void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/)
}
else
{
tangent = ::normalize(tangent, epsilon);
#if 0
tangent = nv::normalize(tangent, epsilon);
bitangent -= tangent * dot(tangent, bitangent);
if (length(bitangent) < epsilon)
@ -78,11 +79,47 @@ void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/)
}
else
{
tangent = ::normalize(tangent, epsilon);
bitangent = nv::normalize(bitangent, epsilon);
}
#else
if (length(bitangent) < epsilon)
{
bitangent = cross(tangent, normal);
nvCheck(isNormalized(bitangent));
}
else
{
tangent = nv::normalize(tangent);
bitangent = nv::normalize(bitangent);
Vector3 bisector = nv::normalize(tangent + bitangent);
Vector3 axis = cross(bisector, normal);
nvDebugCheck(isNormalized(axis, epsilon));
nvDebugCheck(equal(dot(axis, tangent), -dot(axis, bitangent), epsilon));
if (dot(axis, tangent) > 0)
{
tangent = nv::normalize(bisector + axis);
bitangent = nv::normalize(bisector - axis);
}
else
{
tangent = nv::normalize(bisector - axis);
bitangent = nv::normalize(bisector + axis);
}
}
#endif
}
// Check vector lengths.
/*// Check vector lengths.
if (!isNormalized(normal, epsilon))
{
nvDebug("%f %f %f\n", normal.x(), normal.y(), normal.z());
nvDebug("%f %f %f\n", tangent.x(), tangent.y(), tangent.z());
nvDebug("%f %f %f\n", bitangent.x(), bitangent.y(), bitangent.z());
}*/
nvCheck(isNormalized(normal, epsilon));
nvCheck(isNormalized(tangent, epsilon));
nvCheck(isNormalized(bitangent, epsilon));
@ -125,9 +162,18 @@ void Basis::buildFrameForDirection(Vector3::Arg d)
bitangent = cross(normal, tangent);
}
bool Basis::isValid() const
{
if (equal(normal, Vector3(zero))) return false;
if (equal(tangent, Vector3(zero))) return false;
if (equal(bitangent, Vector3(zero))) return false;
if (equal(determinant(), 0.0f)) return false;
return true;
}
/*
/// Transform by this basis. (From this basis to object space).
Vector3 Basis::transform(Vector3::Arg v) const
{
@ -144,30 +190,31 @@ Vector3 Basis::transformT(Vector3::Arg v)
}
/// Transform by the inverse. (From object space to this basis).
/// @note Uses Kramer's rule so the inverse is not accurate if the basis is ill-conditioned.
/// @note Uses Cramer's rule so the inverse is not accurate if the basis is ill-conditioned.
Vector3 Basis::transformI(Vector3::Arg v) const
{
const float det = determinant();
nvCheck(!equalf(det, 0.0f));
nvDebugCheck(!equal(det, 0.0f, 0.0f));
const float idet = 1.0f / det;
// Rows of the inverse matrix.
Vector3 r0, r1, r2;
r0.x = (bitangent.y() * normal.z() - bitangent.z() * normal.y()) * idet;
r0.y = -(bitangent.x() * normal.z() - bitangent.z() * normal.x()) * idet;
r0.z = (bitangent.x() * normal.y() - bitangent.y() * normal.x()) * idet;
Vector3 r0(
(bitangent.y() * normal.z() - bitangent.z() * normal.y()),
-(bitangent.x() * normal.z() - bitangent.z() * normal.x()),
(bitangent.x() * normal.y() - bitangent.y() * normal.x()));
r1.x = -(tangent.y() * normal.z() - tangent.z() * normal.y()) * idet;
r1.y = (tangent.x() * normal.z() - tangent.z() * normal.x()) * idet;
r1.z = -(tangent.x() * normal.y() - tangent.y() * normal.x()) * idet;
Vector3 r1(
-(tangent.y() * normal.z() - tangent.z() * normal.y()),
(tangent.x() * normal.z() - tangent.z() * normal.x()),
-(tangent.x() * normal.y() - tangent.y() * normal.x()));
r2.x = (tangent.y() * bitangent.z() - tangent.z() * bitangent.y()) * idet;
r2.y = -(tangent.x() * bitangent.z() - tangent.z() * bitangent.x()) * idet;
r2.z = (tangent.x() * bitangent.y() - tangent.y() * bitangent.x()) * idet;
Vector3 r2(
(tangent.y() * bitangent.z() - tangent.z() * bitangent.y()),
-(tangent.x() * bitangent.z() - tangent.z() * bitangent.x()),
(tangent.x() * bitangent.y() - tangent.y() * bitangent.x()));
return Vector3(dot(v, r0), dot(v, r1), dot(v, r2));
return Vector3(dot(v, r0), dot(v, r1), dot(v, r2)) * idet;
}
*/

View File

@ -54,7 +54,8 @@ namespace nv
tangent.z() * bitangent.x() * normal.y() - tangent.x() * bitangent.z() * normal.y();
}
/*
bool isValid() const;
// Get transform matrix for this basis.
NVMATH_API Matrix matrix() const;
@ -66,7 +67,7 @@ namespace nv
// Transform by the inverse. (From object space to this basis).
NVMATH_API Vector3 transformI(Vector3::Arg v) const;
*/
Vector3 tangent;
Vector3 bitangent;

View File

@ -9,6 +9,7 @@
namespace nv
{
class Stream;
/// Axis Aligned Bounding Box.
class Box
@ -27,11 +28,13 @@ public:
// Cast operators.
operator const float * () const { return reinterpret_cast<const float *>(this); }
/// Min corner of the box.
Vector3 mins() const { return m_mins; }
// Min corner of the box.
Vector3 minCorner() const { return m_mins; }
Vector3 & minCorner() { return m_mins; }
/// Max corner of the box.
Vector3 maxs() const { return m_maxs; }
// Max corner of the box.
Vector3 maxCorner() const { return m_maxs; }
Vector3 & maxCorner() { return m_maxs; }
/// Clear the bounds.
void clearBounds()
@ -108,7 +111,7 @@ public:
float area() const
{
const Vector3 d = extents();
return 4.0f * (d.x()*d.y() + d.x()*d.z() + d.y()*d.z());
return 8.0f * (d.x()*d.y() + d.x()*d.z() + d.y()*d.z());
}
/// Get the volume of the box.
@ -118,6 +121,16 @@ public:
return 8.0f * (d.x() * d.y() * d.z());
}
/// Return true if the box contains the given point.
bool contains(Vector3::Arg p) const
{
return
m_mins.x() < p.x() && m_mins.y() < p.y() && m_mins.z() < p.z() &&
m_maxs.x() > p.x() && m_maxs.y() > p.y() && m_maxs.z() > p.z();
}
friend Stream & operator<< (Stream & s, Box & box);
private:
Vector3 m_mins;
@ -125,15 +138,6 @@ private:
};
/*
/// Point inside box test.
inline bool pointInsideBox(const Box & b, Vector3::Arg p) const
{
return (m_mins.x() < p.x() && m_mins.y() < p.y() && m_mins.z() < p.z() &&
m_maxs.x() > p.x() && m_maxs.y() > p.y() && m_maxs.z() > p.z());
}
*/
} // nv namespace

View File

@ -5,13 +5,19 @@ SET(MATH_SRCS
Vector.h
Matrix.h
Quaternion.h
Plane.h Plane.cpp
Box.h
Color.h
Montecarlo.h Montecarlo.cpp
Random.h Random.cpp
SphericalHarmonic.h SphericalHarmonic.cpp
Basis.h Basis.cpp
Triangle.h Triangle.cpp TriBox.cpp)
Triangle.h Triangle.cpp TriBox.cpp
Polygon.h Polygon.cpp
TypeSerialization.h TypeSerialization.cpp
Sparse.h Sparse.cpp
Solver.h Solver.cpp
KahanSum.h)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})

38
src/nvmath/KahanSum.h Normal file
View File

@ -0,0 +1,38 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_KAHANSUM_H
#define NV_MATH_KAHANSUM_H
#include <nvmath/nvmath.h>
namespace nv
{
class KahanSum
{
public:
KahanSum() : accum(0.0f), err(0) {};
void add(float f)
{
float compensated = f + err;
float tmp = accum + compensated;
err = accum - tmp;
err += compensated;
accum = tmp;
}
float sum() const
{
return accum;
}
private:
float accum;
float err;
};
} // nv namespace
#endif // NV_MATH_KAHANSUM_H

17
src/nvmath/Plane.cpp Normal file
View File

@ -0,0 +1,17 @@
// This code is in the public domain -- castanyo@yahoo.es
#include "Plane.h"
#include "Matrix.h"
namespace nv
{
Plane transformPlane(const Matrix& m, Plane::Arg p)
{
Vector3 newVec = transformVector(m, p.vector());
Vector3 ptInPlane = p.offset() * p.vector();
ptInPlane = transformPoint(m, ptInPlane);
return Plane(newVec, ptInPlane);
}
}

77
src/nvmath/Plane.h Normal file
View File

@ -0,0 +1,77 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_PLANE_H
#define NV_MATH_PLANE_H
#include <nvmath/nvmath.h>
#include <nvmath/Vector.h>
namespace nv
{
class Matrix;
class NVMATH_CLASS Plane
{
public:
typedef Plane const & Arg;
Plane();
Plane(float x, float y, float z, float w);
Plane(Vector4::Arg v);
Plane(Vector3::Arg v, float d);
Plane(Vector3::Arg normal, Vector3::Arg point);
const Plane & operator=(Plane::Arg v);
Vector3 vector() const;
scalar offset() const;
const Vector4 & asVector() const;
Vector4 & asVector();
void operator*=(scalar s);
private:
Vector4 p;
};
inline Plane::Plane() {}
inline Plane::Plane(float x, float y, float z, float w) : p(x, y, z, w) {}
inline Plane::Plane(Vector4::Arg v) : p(v) {}
inline Plane::Plane(Vector3::Arg v, float d) : p(v, d) {}
inline Plane::Plane(Vector3::Arg normal, Vector3::Arg point) : p(normal, dot(normal, point)) {}
inline const Plane & Plane::operator=(Plane::Arg v) { p = v.p; return *this; }
inline Vector3 Plane::vector() const { return p.xyz(); }
inline scalar Plane::offset() const { return p.w(); }
inline const Vector4 & Plane::asVector() const { return p; }
inline Vector4 & Plane::asVector() { return p; }
// Normalize plane.
inline Plane normalize(Plane::Arg plane, float epsilon = NV_EPSILON)
{
const float len = length(plane.vector());
nvDebugCheck(!isZero(len, epsilon));
const float inv = 1.0f / len;
return Plane(plane.asVector() * inv);
}
// Get the distance from the given point to this plane.
inline float distance(Plane::Arg plane, Vector3::Arg point)
{
return dot(plane.vector(), point) - plane.offset();
}
inline void Plane::operator*=(scalar s)
{
scale(p, s);
}
Plane transformPlane(const Matrix&, Plane::Arg);
} // nv namespace
#endif // NV_MATH_PLANE_H

168
src/nvmath/Polygon.cpp Normal file
View File

@ -0,0 +1,168 @@
// This code is in the public domain -- Ignacio Castaño <castanyo@yahoo.es>
#include <nvmath/Polygon.h>
#include <nvmath/Triangle.h>
#include <nvmath/Plane.h>
using namespace nv;
Polygon::Polygon()
{
}
Polygon::Polygon(const Triangle & t)
{
pointArray.resize(3);
pointArray[0] = t.v[0];
pointArray[1] = t.v[1];
pointArray[2] = t.v[2];
}
Polygon::Polygon(const Vector3 * points, uint vertexCount)
{
pointArray.resize(vertexCount);
for (uint i = 0; i < vertexCount; i++)
{
pointArray[i] = points[i];
}
}
/// Compute polygon area.
float Polygon::area() const
{
float total = 0;
const uint pointCount = pointArray.count();
for (uint i = 2; i < pointCount; i++)
{
Vector3 v1 = pointArray[i-1] - pointArray[0];
Vector3 v2 = pointArray[i] - pointArray[0];
total += 0.5f * length(cross(v1, v2));
}
return total;
}
/// Get the bounds of the polygon.
Box Polygon::bounds() const
{
Box bounds;
bounds.clearBounds();
foreach(p, pointArray)
{
bounds.addPointToBounds(pointArray[p]);
}
return bounds;
}
/// Get the plane of the polygon.
Plane Polygon::plane() const
{
// @@ Do something better than this?
Vector3 n = cross(pointArray[1] - pointArray[0], pointArray[2] - pointArray[0]);
return Vector4(n, dot(n, pointArray[0]));
}
/// Clip polygon to box.
uint Polygon::clipTo(const Box & box)
{
const Plane posX( 1, 0, 0, box.maxCorner().x());
const Plane negX(-1, 0, 0,-box.minCorner().x());
const Plane posY( 0, 1, 0, box.maxCorner().y());
const Plane negY( 0,-1, 0,-box.minCorner().y());
const Plane posZ( 0, 0, 1, box.maxCorner().z());
const Plane negZ( 0, 0,-1,-box.minCorner().z());
if (clipTo(posX) == 0) return 0;
if (clipTo(negX) == 0) return 0;
if (clipTo(posY) == 0) return 0;
if (clipTo(negY) == 0) return 0;
if (clipTo(posZ) == 0) return 0;
if (clipTo(negZ) == 0) return 0;
return pointArray.count();
}
/// Clip polygon to plane.
uint Polygon::clipTo(const Plane & plane)
{
int count = 0;
const uint pointCount = pointArray.count();
Array<Vector3> newPointArray(pointCount + 1); // @@ Do not create copy every time.
Vector3 prevPoint = pointArray[pointCount - 1];
float prevDist = dot(plane.vector(), prevPoint) - plane.offset();
for (uint i = 0; i < pointCount; i++)
{
const Vector3 point = pointArray[i];
float dist = dot(plane.vector(), point) - plane.offset();
// @@ Handle points on plane better.
if (dist <= 0) // interior.
{
if (prevDist > 0) // exterior
{
// Add segment intersection point.
Vector3 dp = point - prevPoint;
float t = dist / prevDist;
newPointArray.append(point - dp * t);
}
// Add interior point.
newPointArray.append(point);
}
else if (dist > 0 && prevDist < 0)
{
// Add segment intersection point.
Vector3 dp = point - prevPoint;
float t = dist / prevDist;
newPointArray.append(point - dp * t);
}
prevPoint = point;
prevDist = dist;
}
swap(pointArray, newPointArray);
return count;
}
void Polygon::removeColinearPoints()
{
const uint pointCount = pointArray.count();
Array<Vector3> newPointArray(pointCount);
for (uint i = 0 ; i < pointCount; i++)
{
int j = (i + 1) % pointCount;
int k = (i + pointCount - 1) % pointCount;
Vector3 v1 = normalize(pointArray[j] - pointArray[i]);
Vector3 v2 = normalize(pointArray[i] - pointArray[k]);
if (dot(v1, v2) < 0.999)
{
newPointArray.append(pointArray[i]);
}
}
swap(pointArray, newPointArray);
}

45
src/nvmath/Polygon.h Normal file
View File

@ -0,0 +1,45 @@
// This code is in the public domain -- Ignacio Castaño <castanyo@yahoo.es>
#ifndef NV_MATH_POLYGON_H
#define NV_MATH_POLYGON_H
#include <nvcore/Containers.h>
#include <nvmath/nvmath.h>
#include <nvmath/Vector.h>
#include <nvmath/Box.h>
namespace nv
{
class Box;
class Plane;
class Triangle;
class Polygon
{
NV_FORBID_COPY(Polygon);
public:
Polygon();
Polygon(const Triangle & t);
Polygon(const Vector3 * points, uint vertexCount);
float area() const;
Box bounds() const;
Plane plane() const;
uint clipTo(const Box & box);
uint clipTo(const Plane & plane);
void removeColinearPoints();
private:
Array<Vector3> pointArray;
};
} // nv namespace
#endif // NV_MATH_POLYGON_H

View File

@ -51,7 +51,6 @@ namespace nv
inline Quaternion mul(Quaternion::Arg a, Quaternion::Arg b)
{
// @@ Efficient SIMD implementation?
return Quaternion(
+ a.x() * b.w() + a.y()*b.z() - a.z()*b.y() + a.w()*b.x(),
- a.x() * b.z() + a.y()*b.w() + a.z()*b.x() + a.w()*b.y(),
@ -59,6 +58,40 @@ namespace nv
- a.x() * b.x() - a.y()*b.y() - a.z()*b.z() + a.w()*b.w());
}
inline Quaternion mul(Quaternion::Arg a, Vector3::Arg b)
{
return Quaternion(
+ a.y()*b.z() - a.z()*b.y() + a.w()*b.x(),
- a.x() * b.z() + a.z()*b.x() + a.w()*b.y(),
+ a.x() * b.y() - a.y()*b.x() + a.w()*b.z(),
- a.x() * b.x() - a.y()*b.y() - a.z()*b.z() );
}
inline Quaternion mul(Vector3::Arg a, Quaternion::Arg b)
{
return Quaternion(
+ a.x() * b.w() + a.y()*b.z() - a.z()*b.y(),
- a.x() * b.z() + a.y()*b.w() + a.z()*b.x(),
+ a.x() * b.y() - a.y()*b.x() + a.z()*b.w(),
- a.x() * b.x() - a.y()*b.y() - a.z()*b.z());
}
inline Quaternion operator *(Quaternion::Arg a, Quaternion::Arg b)
{
return mul(a, b);
}
inline Quaternion operator *(Quaternion::Arg a, Vector3::Arg b)
{
return mul(a, b);
}
inline Quaternion operator *(Vector3::Arg a, Quaternion::Arg b)
{
return mul(a, b);
}
inline Quaternion scale(Quaternion::Arg q, float s)
{
return scale(q.asVector(), s);
@ -122,6 +155,24 @@ namespace nv
return Quaternion(Vector4(v * s, c));
}
inline Vector3 imag(Quaternion::Arg q)
{
return q.asVector().xyz();
}
inline float real(Quaternion::Arg q)
{
return q.w();
}
/// Transform vector.
inline Vector3 transform(Quaternion::Arg q, Vector3::Arg v)
{
Quaternion t = q * v * conjugate(q);
return imag(t);
}
} // nv namespace

726
src/nvmath/Solver.cpp Normal file
View File

@ -0,0 +1,726 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvmath/Solver.h>
using namespace nv;
namespace
{
class Preconditioner
{
public:
// Virtual dtor.
virtual ~Preconditioner() { }
// Apply preconditioning step.
virtual void apply(const FullVector & x, FullVector & y) const = 0;
};
// Jacobi preconditioner.
class JacobiPreconditioner : public Preconditioner
{
public:
JacobiPreconditioner(const SparseMatrix & M, bool symmetric) : m_inverseDiagonal(M.width())
{
nvCheck(M.isSquare());
for(uint x = 0; x < M.width(); x++)
{
float elem = M.getCoefficient(x, x);
nvDebugCheck( elem != 0.0f );
if (symmetric)
{
m_inverseDiagonal[x] = 1.0f / sqrt(fabs(elem));
}
else
{
m_inverseDiagonal[x] = 1.0f / elem;
}
}
}
void apply(const FullVector & x, FullVector & y) const
{
y *= x;
}
private:
FullVector m_inverseDiagonal;
};
} // namespace
static int ConjugateGradientSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon);
static int ConjugateGradientSolver(const Preconditioner & preconditioner, const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon);
// Solve the symmetric system: At·A·x = At·b
void nv::LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon/*1e-5f*/)
{
nvDebugCheck(A.width() == x.dimension());
nvDebugCheck(A.height() == b.dimension());
nvDebugCheck(A.height() >= A.width()); // @@ If height == width we could solve it directly...
const uint D = A.width();
FullVector Atb(D);
mult(Transposed, A, b, Atb);
SparseMatrix AtA(D);
mult(Transposed, A, NoTransposed, A, AtA);
SymmetricSolver(AtA, Atb, x, epsilon);
}
// See section 10.4.3 in: Mesh Parameterization: Theory and Practice, Siggraph Course Notes, August 2007
void nv::LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, const uint * lockedParameters, uint lockedCount, float epsilon/*= 1e-5f*/)
{
nvDebugCheck(A.width() == x.dimension());
nvDebugCheck(A.height() == b.dimension());
nvDebugCheck(A.height() >= A.width() - lockedCount);
// @@ This is not the most efficient way of building a system with reduced degrees of freedom. It would be faster to do it on the fly.
const uint D = A.width() - lockedCount;
nvDebugCheck(D > 0);
// Compute: b - Al * xl
FullVector b_Alxl(b);
for (uint y = 0; y < A.height(); y++)
{
const uint count = A.getRow(y).count();
for (uint e = 0; e < count; e++)
{
uint column = A.getRow(y)[e].x;
bool isFree = true;
for (uint i = 0; i < lockedCount; i++)
{
isFree &= (lockedParameters[i] != column);
}
if (!isFree)
{
b_Alxl[y] -= x[column] * A.getRow(y)[e].v;
}
}
}
// Remove locked columns from A.
SparseMatrix Af(D, A.height());
for (uint y = 0; y < A.height(); y++)
{
const uint count = A.getRow(y).count();
for (uint e = 0; e < count; e++)
{
uint column = A.getRow(y)[e].x;
uint ix = column;
bool isFree = true;
for (uint i = 0; i < lockedCount; i++)
{
isFree &= (lockedParameters[i] != column);
if (column > lockedParameters[i]) ix--; // shift columns
}
if (isFree)
{
Af.setCoefficient(ix, y, A.getRow(y)[e].v);
}
}
}
// Remove elements from x
FullVector xf(D);
for (uint i = 0, j = 0; i < A.width(); i++)
{
bool isFree = true;
for (uint l = 0; l < lockedCount; l++)
{
isFree &= (lockedParameters[l] != i);
}
if (isFree)
{
xf[j++] = x[i];
}
}
// Solve reduced system.
LeastSquaresSolver(Af, b_Alxl, xf, epsilon);
// Copy results back to x.
for (uint i = 0, j = 0; i < A.width(); i++)
{
bool isFree = true;
for (uint l = 0; l < lockedCount; l++)
{
isFree &= (lockedParameters[l] != i);
}
if (isFree)
{
x[i] = xf[j++];
}
}
}
void nv::SymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon/*1e-5f*/)
{
nvDebugCheck(A.height() == A.width());
nvDebugCheck(A.height() == b.dimension());
nvDebugCheck(b.dimension() == x.dimension());
// JacobiPreconditioner jacobi(A, true);
// ConjugateGradientSolver(jacobi, A, b, x, epsilon);
ConjugateGradientSolver(A, b, x, epsilon);
}
/**
* Compute the solution of the sparse linear system Ab=x using the Conjugate
* Gradient method.
*
* Solving sparse linear systems:
* (1) A·x = b
*
* The conjugate gradient algorithm solves (1) only in the case that A is
* symmetric and positive definite. It is based on the idea of minimizing the
* function
*
* (2) f(x) = 1/2·x·A·x - b·x
*
* This function is minimized when its gradient
*
* (3) df = A·x - b
*
* is zero, which is equivalent to (1). The minimization is carried out by
* generating a succession of search directions p.k and improved minimizers x.k.
* At each stage a quantity alfa.k is found that minimizes f(x.k + alfa.k·p.k),
* and x.k+1 is set equal to the new point x.k + alfa.k·p.k. The p.k and x.k are
* built up in such a way that x.k+1 is also the minimizer of f over the whole
* vector space of directions already taken, {p.1, p.2, . . . , p.k}. After N
* iterations you arrive at the minimizer over the entire vector space, i.e., the
* solution to (1).
*
* For a really good explanation of the method see:
*
* "An Introduction to the Conjugate Gradient Method Without the Agonizing Pain",
* Jonhathan Richard Shewchuk.
*
**/
/*static*/ int ConjugateGradientSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon)
{
nvDebugCheck( A.isSquare() );
nvDebugCheck( A.width() == b.dimension() );
nvDebugCheck( A.width() == x.dimension() );
int i = 0;
const int D = A.width();
const int i_max = 4 * D; // Convergence should be linear, but in some cases, it's not.
FullVector r(D); // residual
FullVector p(D); // search direction
FullVector q(D); //
float delta_0;
float delta_old;
float delta_new;
float alpha;
float beta;
// r = b - A·x;
copy(b, r);
sgemv(-1, A, x, 1, r);
// p = r;
copy(r, p);
delta_new = dot( r, r );
delta_0 = delta_new;
while (i < i_max && delta_new > epsilon*epsilon*delta_0)
{
i++;
// q = A·p
mult(A, p, q);
// alpha = delta_new / p·q
alpha = delta_new / dot( p, q );
// x = alfa·p + x
saxpy(alpha, p, x);
if ((i & 31) == 0) // recompute r after 32 steps
{
// r = b - A·x
copy(b, r);
sgemv(-1, A, x, 1, r);
}
else
{
// r = r - alpha·q
saxpy(-alpha, q, r);
}
delta_old = delta_new;
delta_new = dot( r, r );
beta = delta_new / delta_old;
// p = r + beta·p
copy(r, p);
saxpy(beta, p, r);
}
return i;
}
// Conjugate gradient with preconditioner.
/*static*/ int ConjugateGradientSolver(const Preconditioner & preconditioner, const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon)
{
nvDebugCheck( A.isSquare() );
nvDebugCheck( A.width() == b.dimension() );
nvDebugCheck( A.width() == x.dimension() );
int i = 0;
const int D = A.width();
const int i_max = 4 * D; // Convergence should be linear, but in some cases, it's not.
FullVector r(D); // residual
FullVector p(D); // search direction
FullVector q(D); //
FullVector s(D); // preconditioned
float delta_0;
float delta_old;
float delta_new;
float alpha;
float beta;
// r = b - A·x
copy(b, r);
sgemv(-1, A, x, 1, r);
// p = M^-1 · r
preconditioner.apply(r, p);
//copy(r, p);
delta_new = dot(r, p);
delta_0 = delta_new;
while (i < i_max && delta_new > epsilon*epsilon*delta_0)
{
i++;
// q = A·p
mult(A, p, q);
// alpha = delta_new / p·q
alpha = delta_new / dot(p, q);
// x = alfa·p + x
saxpy(alpha, p, x);
if ((i & 31) == 0) // recompute r after 32 steps
{
// r = b - A·x
copy(b, r);
sgemv(-1, A, x, 1, r);
}
else
{
// r = r - alfa·q
saxpy(-alpha, q, r);
}
// s = M^-1 · r
preconditioner.apply(r, s);
//copy(r, s);
delta_old = delta_new;
delta_new = dot( r, s );
beta = delta_new / delta_old;
// p = s + beta·p
copy(s, p);
saxpy(beta, p, s);
}
return i;
}
#if 0 // Nonsymmetric solvers
/** Bi-conjugate gradient method. */
MATHLIB_API int BiConjugateGradientSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, float epsilon ) {
piDebugCheck( A.IsSquare() );
piDebugCheck( A.Width() == b.Dim() );
piDebugCheck( A.Width() == x.Dim() );
int i = 0;
const int D = A.Width();
const int i_max = 4 * D;
float resid;
float rho_1 = 0;
float rho_2 = 0;
float alpha;
float beta;
DenseVector r(D);
DenseVector rtilde(D);
DenseVector p(D);
DenseVector ptilde(D);
DenseVector q(D);
DenseVector qtilde(D);
DenseVector tmp(D); // temporal vector.
// r = b - A·x;
A.Product( x, tmp );
r.Sub( b, tmp );
// rtilde = r
rtilde.Set( r );
// p = r;
p.Set( r );
// ptilde = rtilde
ptilde.Set( rtilde );
float normb = b.Norm();
if( normb == 0.0 ) normb = 1;
// test convergence
resid = r.Norm() / normb;
if( resid < epsilon ) {
// method converges?
return 0;
}
while( i < i_max ) {
i++;
rho_1 = DenseVectorDotProduct( r, rtilde );
if( rho_1 == 0 ) {
// method fails.
return -i;
}
if (i == 1) {
p.Set( r );
ptilde.Set( rtilde );
}
else {
beta = rho_1 / rho_2;
// p = r + beta * p;
p.Mad( r, p, beta );
// ptilde = ztilde + beta * ptilde;
ptilde.Mad( rtilde, ptilde, beta );
}
// q = A * p;
A.Product( p, q );
// qtilde = A^t * ptilde;
A.TransProduct( ptilde, qtilde );
alpha = rho_1 / DenseVectorDotProduct( ptilde, q );
// x += alpha * p;
x.Mad( x, p, alpha );
// r -= alpha * q;
r.Mad( r, q, -alpha );
// rtilde -= alpha * qtilde;
rtilde.Mad( rtilde, qtilde, -alpha );
rho_2 = rho_1;
// test convergence
resid = r.Norm() / normb;
if( resid < epsilon ) {
// method converges
return i;
}
}
return i;
}
/** Bi-conjugate gradient stabilized method. */
int BiCGSTABSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, float epsilon ) {
piDebugCheck( A.IsSquare() );
piDebugCheck( A.Width() == b.Dim() );
piDebugCheck( A.Width() == x.Dim() );
int i = 0;
const int D = A.Width();
const int i_max = 2 * D;
float resid;
float rho_1 = 0;
float rho_2 = 0;
float alpha = 0;
float beta = 0;
float omega = 0;
DenseVector p(D);
DenseVector phat(D);
DenseVector s(D);
DenseVector shat(D);
DenseVector t(D);
DenseVector v(D);
DenseVector r(D);
DenseVector rtilde(D);
DenseVector tmp(D);
// r = b - A·x;
A.Product( x, tmp );
r.Sub( b, tmp );
// rtilde = r
rtilde.Set( r );
float normb = b.Norm();
if( normb == 0.0 ) normb = 1;
// test convergence
resid = r.Norm() / normb;
if( resid < epsilon ) {
// method converges?
return 0;
}
while( i<i_max ) {
i++;
rho_1 = DenseVectorDotProduct( rtilde, r );
if( rho_1 == 0 ) {
// method fails
return -i;
}
if( i == 1 ) {
p.Set( r );
}
else {
beta = (rho_1 / rho_2) * (alpha / omega);
// p = r + beta * (p - omega * v);
p.Mad( p, v, -omega );
p.Mad( r, p, beta );
}
//phat = M.solve(p);
phat.Set( p );
//Precond( &phat, p );
//v = A * phat;
A.Product( phat, v );
alpha = rho_1 / DenseVectorDotProduct( rtilde, v );
// s = r - alpha * v;
s.Mad( r, v, -alpha );
resid = s.Norm() / normb;
if( resid < epsilon ) {
// x += alpha * phat;
x.Mad( x, phat, alpha );
return i;
}
//shat = M.solve(s);
shat.Set( s );
//Precond( &shat, s );
//t = A * shat;
A.Product( shat, t );
omega = DenseVectorDotProduct( t, s ) / DenseVectorDotProduct( t, t );
// x += alpha * phat + omega * shat;
x.Mad( x, shat, omega );
x.Mad( x, phat, alpha );
//r = s - omega * t;
r.Mad( s, t, -omega );
rho_2 = rho_1;
resid = r.Norm() / normb;
if( resid < epsilon ) {
return i;
}
if( omega == 0 ) {
return -i; // ???
}
}
return i;
}
/** Bi-conjugate gradient stabilized method. */
int BiCGSTABPrecondSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, const IPreconditioner &M, float epsilon ) {
piDebugCheck( A.IsSquare() );
piDebugCheck( A.Width() == b.Dim() );
piDebugCheck( A.Width() == x.Dim() );
int i = 0;
const int D = A.Width();
const int i_max = D;
// const int i_max = 1000;
float resid;
float rho_1 = 0;
float rho_2 = 0;
float alpha = 0;
float beta = 0;
float omega = 0;
DenseVector p(D);
DenseVector phat(D);
DenseVector s(D);
DenseVector shat(D);
DenseVector t(D);
DenseVector v(D);
DenseVector r(D);
DenseVector rtilde(D);
DenseVector tmp(D);
// r = b - A·x;
A.Product( x, tmp );
r.Sub( b, tmp );
// rtilde = r
rtilde.Set( r );
float normb = b.Norm();
if( normb == 0.0 ) normb = 1;
// test convergence
resid = r.Norm() / normb;
if( resid < epsilon ) {
// method converges?
return 0;
}
while( i<i_max ) {
i++;
rho_1 = DenseVectorDotProduct( rtilde, r );
if( rho_1 == 0 ) {
// method fails
return -i;
}
if( i == 1 ) {
p.Set( r );
}
else {
beta = (rho_1 / rho_2) * (alpha / omega);
// p = r + beta * (p - omega * v);
p.Mad( p, v, -omega );
p.Mad( r, p, beta );
}
//phat = M.solve(p);
//phat.Set( p );
M.Precond( &phat, p );
//v = A * phat;
A.Product( phat, v );
alpha = rho_1 / DenseVectorDotProduct( rtilde, v );
// s = r - alpha * v;
s.Mad( r, v, -alpha );
resid = s.Norm() / normb;
//printf( "--- Iteration %d: residual = %f\n", i, resid );
if( resid < epsilon ) {
// x += alpha * phat;
x.Mad( x, phat, alpha );
return i;
}
//shat = M.solve(s);
//shat.Set( s );
M.Precond( &shat, s );
//t = A * shat;
A.Product( shat, t );
omega = DenseVectorDotProduct( t, s ) / DenseVectorDotProduct( t, t );
// x += alpha * phat + omega * shat;
x.Mad( x, shat, omega );
x.Mad( x, phat, alpha );
//r = s - omega * t;
r.Mad( s, t, -omega );
rho_2 = rho_1;
resid = r.Norm() / normb;
if( resid < epsilon ) {
return i;
}
if( omega == 0 ) {
return -i; // ???
}
}
return i;
}
#endif

20
src/nvmath/Solver.h Normal file
View File

@ -0,0 +1,20 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_SOLVER_H
#define NV_MATH_SOLVER_H
#include <nvmath/Sparse.h>
namespace nv
{
// Linear solvers.
NVMATH_API void LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f);
NVMATH_API void LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, const uint * lockedParameters, uint lockedCount, float epsilon = 1e-5f);
NVMATH_API void SymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f);
// NVMATH_API void NonSymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f);
} // nv namespace
#endif // NV_MATH_SOLVER_H

831
src/nvmath/Sparse.cpp Normal file
View File

@ -0,0 +1,831 @@
// This code is in the public domain -- Ignacio Castaño <castanyo@yahoo.es>
#include <nvmath/Sparse.h>
#include <nvmath/KahanSum.h>
using namespace nv;
/// Ctor.
FullVector::FullVector(uint dim)
{
m_array.resize(dim);
}
/// Copy ctor.
FullVector::FullVector(const FullVector & v) : m_array(v.m_array)
{
}
/// Copy operator
const FullVector & FullVector::operator=(const FullVector & v)
{
nvCheck(dimension() == v.dimension());
m_array = v.m_array;
return *this;
}
void FullVector::fill(float f)
{
const uint dim = dimension();
for (uint i = 0; i < dim; i++)
{
m_array[i] = f;
}
}
void FullVector::operator+= (const FullVector & v)
{
nvDebugCheck(dimension() == v.dimension());
const uint dim = dimension();
for (uint i = 0; i < dim; i++)
{
m_array[i] += v.m_array[i];
}
}
void FullVector::operator-= (const FullVector & v)
{
nvDebugCheck(dimension() == v.dimension());
const uint dim = dimension();
for (uint i = 0; i < dim; i++)
{
m_array[i] -= v.m_array[i];
}
}
void FullVector::operator*= (const FullVector & v)
{
nvDebugCheck(dimension() == v.dimension());
const uint dim = dimension();
for (uint i = 0; i < dim; i++)
{
m_array[i] *= v.m_array[i];
}
}
void FullVector::operator+= (float f)
{
const uint dim = dimension();
for (uint i = 0; i < dim; i++)
{
m_array[i] += f;
}
}
void FullVector::operator-= (float f)
{
const uint dim = dimension();
for (uint i = 0; i < dim; i++)
{
m_array[i] -= f;
}
}
void FullVector::operator*= (float f)
{
const uint dim = dimension();
for (uint i = 0; i < dim; i++)
{
m_array[i] *= f;
}
}
void nv::saxpy(float a, const FullVector & x, FullVector & y)
{
nvDebugCheck(x.dimension() == y.dimension());
const uint dim = x.dimension();
for (uint i = 0; i < dim; i++)
{
y[i] += a * x[i];
}
}
void nv::copy(const FullVector & x, FullVector & y)
{
nvDebugCheck(x.dimension() == y.dimension());
const uint dim = x.dimension();
for (uint i = 0; i < dim; i++)
{
y[i] = x[i];
}
}
void nv::scal(float a, FullVector & x)
{
const uint dim = x.dimension();
for (uint i = 0; i < dim; i++)
{
x[i] *= a;
}
}
float nv::dot(const FullVector & x, const FullVector & y)
{
nvDebugCheck(x.dimension() == y.dimension());
const uint dim = x.dimension();
/*float sum = 0;
for (uint i = 0; i < dim; i++)
{
sum += x[i] * y[i];
}
return sum;*/
KahanSum kahan;
for (uint i = 0; i < dim; i++)
{
kahan.add(x[i] * y[i]);
}
return kahan.sum();
}
FullMatrix::FullMatrix(uint d) : m_width(d), m_height(d)
{
m_array.resize(d*d, 0.0f);
}
FullMatrix::FullMatrix(uint w, uint h) : m_width(w), m_height(h)
{
m_array.resize(w*h, 0.0f);
}
FullMatrix::FullMatrix(const FullMatrix & m) : m_width(m.m_width), m_height(m.m_height)
{
m_array = m.m_array;
}
const FullMatrix & FullMatrix::operator=(const FullMatrix & m)
{
nvCheck(width() == m.width());
nvCheck(height() == m.height());
m_array = m.m_array;
return *this;
}
float FullMatrix::getCoefficient(uint x, uint y) const
{
nvDebugCheck( x < width() );
nvDebugCheck( y < height() );
return m_array[y * width() + x];
}
void FullMatrix::setCoefficient(uint x, uint y, float f)
{
nvDebugCheck( x < width() );
nvDebugCheck( y < height() );
m_array[y * width() + x] = f;
}
void FullMatrix::addCoefficient(uint x, uint y, float f)
{
nvDebugCheck( x < width() );
nvDebugCheck( y < height() );
m_array[y * width() + x] += f;
}
void FullMatrix::mulCoefficient(uint x, uint y, float f)
{
nvDebugCheck( x < width() );
nvDebugCheck( y < height() );
m_array[y * width() + x] *= f;
}
float FullMatrix::dotRow(uint y, const FullVector & v) const
{
nvDebugCheck( v.dimension() == width() );
nvDebugCheck( y < height() );
float sum = 0;
const uint count = v.dimension();
for (uint i = 0; i < count; i++)
{
sum += m_array[y * count + i] * v[i];
}
return sum;
}
void FullMatrix::madRow(uint y, float alpha, FullVector & v) const
{
nvDebugCheck( v.dimension() == width() );
nvDebugCheck( y < height() );
const uint count = v.dimension();
for (uint i = 0; i < count; i++)
{
v[i] += m_array[y * count + i];
}
}
// y = M * x
void nv::mult(const FullMatrix & M, const FullVector & x, FullVector & y)
{
mult(NoTransposed, M, x, y);
}
void nv::mult(Transpose TM, const FullMatrix & M, const FullVector & x, FullVector & y)
{
const uint w = M.width();
const uint h = M.height();
if (TM == Transposed)
{
nvDebugCheck( h == x.dimension() );
nvDebugCheck( w == y.dimension() );
y.fill(0.0f);
for (uint i = 0; i < h; i++)
{
M.madRow(i, x[i], y);
}
}
else
{
nvDebugCheck( w == x.dimension() );
nvDebugCheck( h == y.dimension() );
for (uint i = 0; i < h; i++)
{
y[i] = M.dotRow(i, x);
}
}
}
// y = alpha*A*x + beta*y
void nv::sgemv(float alpha, const FullMatrix & A, const FullVector & x, float beta, FullVector & y)
{
sgemv(alpha, NoTransposed, A, x, beta, y);
}
void nv::sgemv(float alpha, Transpose TA, const FullMatrix & A, const FullVector & x, float beta, FullVector & y)
{
const uint w = A.width();
const uint h = A.height();
if (TA == Transposed)
{
nvDebugCheck( h == x.dimension() );
nvDebugCheck( w == y.dimension() );
for (uint i = 0; i < h; i++)
{
A.madRow(i, alpha * x[i], y);
}
}
else
{
nvDebugCheck( w == x.dimension() );
nvDebugCheck( h == y.dimension() );
for (uint i = 0; i < h; i++)
{
y[i] = alpha * A.dotRow(i, x) + beta * y[i];
}
}
}
// Multiply a row of A by a column of B.
static float dot(uint j, Transpose TA, const FullMatrix & A, uint i, Transpose TB, const FullMatrix & B)
{
const uint w = (TA == NoTransposed) ? A.width() : A.height();
nvDebugCheck(w == (TB == NoTransposed) ? B.height() : A.width());
float sum = 0.0f;
for (uint k = 0; k < w; k++)
{
const float a = (TA == NoTransposed) ? A.getCoefficient(k, j) : A.getCoefficient(j, k); // @@ Move branches out of the loop?
const float b = (TB == NoTransposed) ? B.getCoefficient(i, k) : A.getCoefficient(k, i);
sum += a * b;
}
return sum;
}
// C = A * B
void nv::mult(const FullMatrix & A, const FullMatrix & B, FullMatrix & C)
{
mult(NoTransposed, A, NoTransposed, B, C);
}
void nv::mult(Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, FullMatrix & C)
{
sgemm(1.0f, TA, A, TB, B, 0.0f, C);
}
// C = alpha*A*B + beta*C
void nv::sgemm(float alpha, const FullMatrix & A, const FullMatrix & B, float beta, FullMatrix & C)
{
sgemm(alpha, NoTransposed, A, NoTransposed, B, beta, C);
}
void nv::sgemm(float alpha, Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, float beta, FullMatrix & C)
{
const uint w = C.width();
const uint h = C.height();
uint aw = (TA == NoTransposed) ? A.width() : A.height();
uint ah = (TA == NoTransposed) ? A.height() : A.width();
uint bw = (TB == NoTransposed) ? B.width() : B.height();
uint bh = (TB == NoTransposed) ? B.height() : B.width();
nvDebugCheck(aw == bh);
nvDebugCheck(bw == ah);
nvDebugCheck(w == bw);
nvDebugCheck(h == ah);
for (uint y = 0; y < h; y++)
{
for (uint x = 0; x < w; x++)
{
float c = alpha * ::dot(x, TA, A, y, TB, B) + beta * C.getCoefficient(x, y);
C.setCoefficient(x, y, c);
}
}
}
/// Ctor. Init the size of the sparse matrix.
SparseMatrix::SparseMatrix(uint d) : m_width(d)
{
m_array.resize(d);
}
/// Ctor. Init the size of the sparse matrix.
SparseMatrix::SparseMatrix(uint w, uint h) : m_width(w)
{
m_array.resize(h);
}
SparseMatrix::SparseMatrix(const SparseMatrix & m) : m_width(m.m_width)
{
m_array = m.m_array;
}
const SparseMatrix & SparseMatrix::operator=(const SparseMatrix & m)
{
nvCheck(width() == m.width());
nvCheck(height() == m.height());
m_array = m.m_array;
return *this;
}
// x is column, y is row
float SparseMatrix::getCoefficient(uint x, uint y) const
{
nvDebugCheck( x < width() );
nvDebugCheck( y < height() );
const uint count = m_array[y].count();
for (uint i = 0; i < count; i++)
{
if (m_array[y][i].x == x) return m_array[y][i].v;
}
return 0.0f;
}
void SparseMatrix::setCoefficient(uint x, uint y, float f)
{
nvDebugCheck( x < width() );
nvDebugCheck( y < height() );
const uint count = m_array[y].count();
for (uint i = 0; i < count; i++)
{
if (m_array[y][i].x == x)
{
m_array[y][i].v = f;
return;
}
}
Coefficient c = { x, f };
m_array[y].append( c );
}
void SparseMatrix::addCoefficient(uint x, uint y, float f)
{
nvDebugCheck( x < width() );
nvDebugCheck( y < height() );
const uint count = m_array[y].count();
for (uint i = 0; i < count; i++)
{
if (m_array[y][i].x == x)
{
m_array[y][i].v += f;
return;
}
}
Coefficient c = { x, f };
m_array[y].append( c );
}
void SparseMatrix::mulCoefficient(uint x, uint y, float f)
{
nvDebugCheck( x < width() );
nvDebugCheck( y < height() );
const uint count = m_array[y].count();
for (uint i = 0; i < count; i++)
{
if (m_array[y][i].x == x)
{
m_array[y][i].v *= f;
return;
}
}
Coefficient c = { x, f };
m_array[y].append( c );
}
float SparseMatrix::sumRow(uint y) const
{
nvDebugCheck( y < height() );
const uint count = m_array[y].count();
/*float sum = 0;
for (uint i = 0; i < count; i++)
{
sum += m_array[y][i].v;
}
return sum;*/
KahanSum kahan;
for (uint i = 0; i < count; i++)
{
kahan.add(m_array[y][i].v);
}
return kahan.sum();
}
float SparseMatrix::dotRow(uint y, const FullVector & v) const
{
nvDebugCheck( y < height() );
const uint count = m_array[y].count();
/*float sum = 0;
for (uint i = 0; i < count; i++)
{
sum += m_array[y][i].v * v[m_array[y][i].x];
}
return sum;*/
KahanSum kahan;
for (uint i = 0; i < count; i++)
{
kahan.add(m_array[y][i].v * v[m_array[y][i].x]);
}
return kahan.sum();
}
void SparseMatrix::madRow(uint y, float alpha, FullVector & v) const
{
nvDebugCheck(y < height());
const uint count = m_array[y].count();
for (uint i = 0; i < count; i++)
{
v[m_array[y][i].x] += alpha * m_array[y][i].v;
}
}
void SparseMatrix::clearRow(uint y)
{
nvDebugCheck( y < height() );
m_array[y].clear();
}
void SparseMatrix::scaleRow(uint y, float f)
{
nvDebugCheck( y < height() );
const uint count = m_array[y].count();
for (uint i = 0; i < count; i++)
{
m_array[y][i].v *= f;
}
}
void SparseMatrix::normalizeRow(uint y)
{
nvDebugCheck( y < height() );
float norm = 0.0f;
const uint count = m_array[y].count();
for (uint i = 0; i < count; i++)
{
float f = m_array[y][i].v;
norm += f * f;
}
scaleRow(y, 1.0f / sqrtf(norm));
}
void SparseMatrix::clearColumn(uint x)
{
nvDebugCheck(x < width());
for (uint y = 0; y < height(); y++)
{
const uint count = m_array[y].count();
for (uint e = 0; e < count; e++)
{
if (m_array[y][e].x == x)
{
m_array[y][e].v = 0.0f;
break;
}
}
}
}
void SparseMatrix::scaleColumn(uint x, float f)
{
nvDebugCheck(x < width());
for (uint y = 0; y < height(); y++)
{
const uint count = m_array[y].count();
for (uint e = 0; e < count; e++)
{
if (m_array[y][e].x == x)
{
m_array[y][e].v *= f;
break;
}
}
}
}
const Array<SparseMatrix::Coefficient> & SparseMatrix::getRow(uint y) const
{
return m_array[y];
}
// y = M * x
void nv::mult(const SparseMatrix & M, const FullVector & x, FullVector & y)
{
mult(NoTransposed, M, x, y);
}
void nv::mult(Transpose TM, const SparseMatrix & M, const FullVector & x, FullVector & y)
{
const uint w = M.width();
const uint h = M.height();
if (TM == Transposed)
{
nvDebugCheck( h == x.dimension() );
nvDebugCheck( w == y.dimension() );
y.fill(0.0f);
for (uint i = 0; i < h; i++)
{
M.madRow(i, x[i], y);
}
}
else
{
nvDebugCheck( w == x.dimension() );
nvDebugCheck( h == y.dimension() );
for (uint i = 0; i < h; i++)
{
y[i] = M.dotRow(i, x);
}
}
}
// y = alpha*A*x + beta*y
void nv::sgemv(float alpha, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y)
{
sgemv(alpha, NoTransposed, A, x, beta, y);
}
void nv::sgemv(float alpha, Transpose TA, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y)
{
const uint w = A.width();
const uint h = A.height();
if (TA == Transposed)
{
nvDebugCheck( h == x.dimension() );
nvDebugCheck( w == y.dimension() );
for (uint i = 0; i < h; i++)
{
A.madRow(i, alpha * x[i], y);
}
}
else
{
nvDebugCheck( w == x.dimension() );
nvDebugCheck( h == y.dimension() );
for (uint i = 0; i < h; i++)
{
y[i] = alpha * A.dotRow(i, x) + beta * y[i];
}
}
}
// dot y-row of A by x-column of B
static float dotRowColumn(int y, const SparseMatrix & A, int x, const SparseMatrix & B)
{
const Array<SparseMatrix::Coefficient> & row = A.getRow(y);
const uint count = row.count();
/*float sum = 0.0f;
for (uint i = 0; i < count; i++)
{
const SparseMatrix::Coefficient & c = row[i];
sum += c.v * B.getCoefficient(x, c.x);
}
return sum;*/
KahanSum kahan;
for (uint i = 0; i < count; i++)
{
const SparseMatrix::Coefficient & c = row[i];
kahan.add(c.v * B.getCoefficient(x, c.x));
}
return kahan.sum();
}
// dot y-row of A by x-row of B
static float dotRowRow(int y, const SparseMatrix & A, int x, const SparseMatrix & B)
{
const Array<SparseMatrix::Coefficient> & row = A.getRow(y);
const uint count = row.count();
/*float sum = 0.0f;
for (uint i = 0; i < count; i++)
{
const SparseMatrix::Coefficient & c = row[i];
sum += c.v * B.getCoefficient(c.x, x);
}
//return sum;*/
KahanSum kahan;
for (uint i = 0; i < count; i++)
{
const SparseMatrix::Coefficient & c = row[i];
kahan.add(c.v * B.getCoefficient(c.x, x));
}
return kahan.sum();
}
// dot y-column of A by x-column of B
static float dotColumnColumn(int y, const SparseMatrix & A, int x, const SparseMatrix & B)
{
nvDebugCheck(A.height() == B.height());
const uint h = A.height();
/*float sum = 0.0f;
for (uint i = 0; i < h; i++)
{
sum += A.getCoefficient(y, i) * B.getCoefficient(x, i);
}
//return sum;*/
KahanSum kahan;
for (uint i = 0; i < h; i++)
{
kahan.add(A.getCoefficient(y, i) * B.getCoefficient(x, i));
}
return kahan.sum();
}
// C = A * B
void nv::mult(const SparseMatrix & A, const SparseMatrix & B, SparseMatrix & C)
{
mult(NoTransposed, A, NoTransposed, B, C);
}
void nv::mult(Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, SparseMatrix & C)
{
sgemm(1.0f, TA, A, TB, B, 0.0f, C);
}
// C = alpha*A*B + beta*C
void nv::sgemm(float alpha, const SparseMatrix & A, const SparseMatrix & B, float beta, SparseMatrix & C)
{
sgemm(alpha, NoTransposed, A, NoTransposed, B, beta, C);
}
void nv::sgemm(float alpha, Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, float beta, SparseMatrix & C)
{
const uint w = C.width();
const uint h = C.height();
uint aw = (TA == NoTransposed) ? A.width() : A.height();
uint ah = (TA == NoTransposed) ? A.height() : A.width();
uint bw = (TB == NoTransposed) ? B.width() : B.height();
uint bh = (TB == NoTransposed) ? B.height() : B.width();
nvDebugCheck(aw == bh);
nvDebugCheck(bw == ah);
nvDebugCheck(w == bw);
nvDebugCheck(h == ah);
for (uint y = 0; y < h; y++)
{
for (uint x = 0; x < w; x++)
{
float c = beta * C.getCoefficient(x, y);
if (TA == NoTransposed && TB == NoTransposed)
{
// dot y-row of A by x-column of B.
c += alpha * dotRowColumn(y, A, x, B);
}
else if (TA == Transposed && TB == Transposed)
{
// dot y-column of A by x-row of B.
c += alpha * dotRowColumn(x, B, y, A);
}
else if (TA == Transposed && TB == NoTransposed)
{
// dot y-column of A by x-column of B.
c += alpha * dotColumnColumn(y, A, x, B);
}
else if (TA == NoTransposed && TB == Transposed)
{
// dot y-row of A by x-row of B.
c += alpha * dotRowRow(y, A, x, B);
}
if (c != 0.0f)
{
C.setCoefficient(x, y, c);
}
}
}
}
// C = At * A
void nv::sqm(const SparseMatrix & A, SparseMatrix & C)
{
// This is quite expensive...
mult(Transposed, A, NoTransposed, A, C);
}

198
src/nvmath/Sparse.h Normal file
View File

@ -0,0 +1,198 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_SPARSE_H
#define NV_MATH_SPARSE_H
#include <nvcore/Containers.h>
#include <nvmath/nvmath.h>
// Full and sparse vector and matrix classes. BLAS subset.
namespace nv
{
class FullVector;
class FullMatrix;
class SparseMatrix;
/// Fixed size vector class.
class FullVector
{
public:
FullVector(uint dim);
FullVector(const FullVector & v);
const FullVector & operator=(const FullVector & v);
uint dimension() const { return m_array.count(); }
const float & operator[]( uint index ) const { return m_array[index]; }
float & operator[] ( uint index ) { return m_array[index]; }
void fill(float f);
void operator+= (const FullVector & v);
void operator-= (const FullVector & v);
void operator*= (const FullVector & v);
void operator+= (float f);
void operator-= (float f);
void operator*= (float f);
private:
Array<float> m_array;
};
// Pseudo-BLAS interface.
NVMATH_API void saxpy(float a, const FullVector & x, FullVector & y); // y = a * x + y
NVMATH_API void copy(const FullVector & x, FullVector & y);
NVMATH_API void scal(float a, FullVector & x);
NVMATH_API float dot(const FullVector & x, const FullVector & y);
enum Transpose
{
NoTransposed = 0,
Transposed = 1
};
/// Full matrix class.
class FullMatrix
{
public:
FullMatrix(uint d);
FullMatrix(uint w, uint h);
FullMatrix(const FullMatrix & m);
const FullMatrix & operator=(const FullMatrix & m);
uint width() const { return m_width; }
uint height() const { return m_height; }
bool isSquare() const { return m_width == m_height; }
float getCoefficient(uint x, uint y) const;
void setCoefficient(uint x, uint y, float f);
void addCoefficient(uint x, uint y, float f);
void mulCoefficient(uint x, uint y, float f);
float dotRow(uint y, const FullVector & v) const;
void madRow(uint y, float alpha, FullVector & v) const;
protected:
bool isValid() const {
return m_array.size() == (m_width * m_height);
}
private:
const uint m_width;
const uint m_height;
Array<float> m_array;
};
NVMATH_API void mult(const FullMatrix & M, const FullVector & x, FullVector & y);
NVMATH_API void mult(Transpose TM, const FullMatrix & M, const FullVector & x, FullVector & y);
// y = alpha*A*x + beta*y
NVMATH_API void sgemv(float alpha, const FullMatrix & A, const FullVector & x, float beta, FullVector & y);
NVMATH_API void sgemv(float alpha, Transpose TA, const FullMatrix & A, const FullVector & x, float beta, FullVector & y);
NVMATH_API void mult(const FullMatrix & A, const FullMatrix & B, FullMatrix & C);
NVMATH_API void mult(Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, FullMatrix & C);
// C = alpha*A*B + beta*C
NVMATH_API void sgemm(float alpha, const FullMatrix & A, const FullMatrix & B, float beta, FullMatrix & C);
NVMATH_API void sgemm(float alpha, Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, float beta, FullMatrix & C);
/**
* Sparse matrix class. The matrix is assumed to be sparse and to have
* very few non-zero elements, for this reason it's stored in indexed
* format. To multiply column vectors efficiently, the matrix stores
* the elements in indexed-column order, there is a list of indexed
* elements for each row of the matrix. As with the FullVector the
* dimension of the matrix is constant.
**/
class SparseMatrix
{
friend class FullMatrix;
public:
// An element of the sparse array.
struct Coefficient {
uint x; // column
float v; // value
};
public:
SparseMatrix(uint d);
SparseMatrix(uint w, uint h);
SparseMatrix(const SparseMatrix & m);
const SparseMatrix & operator=(const SparseMatrix & m);
uint width() const { return m_width; }
uint height() const { return m_array.count(); }
bool isSquare() const { return width() == height(); }
float getCoefficient(uint x, uint y) const; // x is column, y is row
void setCoefficient(uint x, uint y, float f);
void addCoefficient(uint x, uint y, float f);
void mulCoefficient(uint x, uint y, float f);
float sumRow(uint y) const;
float dotRow(uint y, const FullVector & v) const;
void madRow(uint y, float alpha, FullVector & v) const;
void clearRow(uint y);
void scaleRow(uint y, float f);
void normalizeRow(uint y);
void clearColumn(uint x);
void scaleColumn(uint x, float f);
const Array<Coefficient> & getRow(uint y) const;
private:
/// Number of columns.
const uint m_width;
/// Array of matrix elements.
Array< Array<Coefficient> > m_array;
};
NVMATH_API void mult(const SparseMatrix & M, const FullVector & x, FullVector & y);
NVMATH_API void mult(Transpose TM, const SparseMatrix & M, const FullVector & x, FullVector & y);
// y = alpha*A*x + beta*y
NVMATH_API void sgemv(float alpha, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y);
NVMATH_API void sgemv(float alpha, Transpose TA, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y);
NVMATH_API void mult(const SparseMatrix & A, const SparseMatrix & B, SparseMatrix & C);
NVMATH_API void mult(Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, SparseMatrix & C);
// C = alpha*A*B + beta*C
NVMATH_API void sgemm(float alpha, const SparseMatrix & A, const SparseMatrix & B, float beta, SparseMatrix & C);
NVMATH_API void sgemm(float alpha, Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, float beta, SparseMatrix & C);
// C = At * A
NVMATH_API void sqm(const SparseMatrix & A, SparseMatrix & C);
} // nv namespace
#endif // NV_MATH_SPARSE_H

View File

@ -3,6 +3,7 @@
#ifndef NV_MATH_SPHERICALHARMONIC_H
#define NV_MATH_SPHERICALHARMONIC_H
#include <string.h> // memcpy
#include <nvmath/Vector.h>
namespace nv

View File

@ -96,7 +96,7 @@ static bool planeBoxOverlap(Vector3::Arg normal, Vector3::Arg vert, Vector3::Arg
if(min>rad || max<-rad) return false;
bool triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri)
bool nv::triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri)
{
// use separating axis theorem to test overlap between triangle and box
// need to test for overlap in these directions:
@ -170,7 +170,7 @@ bool triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Trian
}
bool triBoxOverlapNoBounds(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri)
bool nv::triBoxOverlapNoBounds(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri)
{
// use separating axis theorem to test overlap between triangle and box
// need to test for overlap in these directions:

View File

@ -0,0 +1,82 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvcore/Stream.h>
#include <nvmath/Vector.h>
#include <nvmath/Matrix.h>
#include <nvmath/Quaternion.h>
#include <nvmath/Basis.h>
#include <nvmath/Box.h>
#include <nvmath/TypeSerialization.h>
using namespace nv;
Stream & nv::operator<< (Stream & s, Vector2 & v)
{
float x = v.x();
float y = v.y();
s << x << y;
if (s.isLoading())
{
v.set(x, y);
}
return s;
}
Stream & nv::operator<< (Stream & s, Vector3 & v)
{
float x = v.x();
float y = v.y();
float z = v.z();
s << x << y << z;
if (s.isLoading())
{
v.set(x, y, z);
}
return s;
}
Stream & nv::operator<< (Stream & s, Vector4 & v)
{
float x = v.x();
float y = v.y();
float z = v.z();
float w = v.w();
s << x << y << z << w;
if (s.isLoading())
{
v.set(x, y, z, w);
}
return s;
}
Stream & nv::operator<< (Stream & s, Matrix & m)
{
return s;
}
Stream & nv::operator<< (Stream & s, Quaternion & q)
{
return s << q.asVector();
}
Stream & nv::operator<< (Stream & s, Basis & basis)
{
return s << basis.tangent << basis.bitangent << basis.normal;
}
Stream & nv::operator<< (Stream & s, Box & box)
{
return s << box.m_mins << box.m_maxs;
}

View File

@ -0,0 +1,32 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_TYPESERIALIZATION_H
#define NV_MATH_TYPESERIALIZATION_H
#include <nvmath/nvmath.h>
namespace nv
{
class Stream;
class Vector2;
class Vector3;
class Vector4;
class Matrix;
class Quaternion;
struct Basis;
class Box;
NVMATH_API Stream & operator<< (Stream & s, Vector2 & obj);
NVMATH_API Stream & operator<< (Stream & s, Vector3 & obj);
NVMATH_API Stream & operator<< (Stream & s, Vector4 & obj);
NVMATH_API Stream & operator<< (Stream & s, Matrix & obj);
NVMATH_API Stream & operator<< (Stream & s, Quaternion & obj);
NVMATH_API Stream & operator<< (Stream & s, Basis & obj);
NVMATH_API Stream & operator<< (Stream & s, Box & obj);
} // nv namespace
#endif // NV_MATH_TYPESERIALIZATION_H

View File

@ -4,7 +4,7 @@
#define NV_MATH_VECTOR_H
#include <nvmath/nvmath.h>
#include <nvcore/Containers.h> // min, max
#include <nvcore/Algorithms.h> // min, max
namespace nv
{
@ -71,6 +71,7 @@ public:
const Vector2 & xy() const;
scalar component(uint idx) const;
void setComponent(uint idx, scalar f);
const scalar * ptr() const;
@ -239,13 +240,21 @@ inline const Vector2 & Vector3::xy() const
inline scalar Vector3::component(uint idx) const
{
nvDebugCheck(idx < 3);
if (idx == 0) return x();
if (idx == 1) return y();
if (idx == 2) return z();
if (idx == 0) return m_x;
if (idx == 1) return m_y;
if (idx == 2) return m_z;
nvAssume(false);
return 0.0f;
}
inline void Vector3::setComponent(uint idx, float f)
{
nvDebugCheck(idx < 3);
if (idx == 0) m_x = f;
else if (idx == 1) m_y = f;
else if (idx == 2) m_z = f;
}
inline const scalar * Vector3::ptr() const
{
return &m_x;
@ -477,6 +486,35 @@ inline scalar length(Vector2::Arg v)
return sqrtf(length_squared(v));
}
inline scalar inverse_length(Vector2::Arg v)
{
return 1.0f / sqrtf(length_squared(v));
}
inline bool isNormalized(Vector2::Arg v, float epsilon = NV_NORMAL_EPSILON)
{
return equal(length(v), 1, epsilon);
}
inline Vector2 normalize(Vector2::Arg v, float epsilon = NV_EPSILON)
{
float l = length(v);
nvDebugCheck(!isZero(l, epsilon));
Vector2 n = scale(v, 1.0f / l);
nvDebugCheck(isNormalized(n));
return n;
}
inline Vector2 normalizeSafe(Vector2::Arg v, Vector2::Arg fallback, float epsilon = NV_EPSILON)
{
float l = length(v);
if (isZero(l, epsilon)) {
return fallback;
}
return scale(v, 1.0f / l);
}
inline bool equal(Vector2::Arg v1, Vector2::Arg v2, float epsilon = NV_EPSILON)
{
return equal(v1.x(), v2.x(), epsilon) && equal(v1.y(), v2.y(), epsilon);
@ -595,6 +633,11 @@ inline scalar length(Vector3::Arg v)
return sqrtf(length_squared(v));
}
inline scalar inverse_length(Vector3::Arg v)
{
return 1.0f / sqrtf(length_squared(v));
}
inline bool isNormalized(Vector3::Arg v, float epsilon = NV_NORMAL_EPSILON)
{
return equal(length(v), 1, epsilon);
@ -716,6 +759,11 @@ inline scalar length(Vector4::Arg v)
return sqrtf(length_squared(v));
}
inline scalar inverse_length(Vector4::Arg v)
{
return 1.0f / sqrtf(length_squared(v));
}
inline bool isNormalized(Vector4::Arg v, float epsilon = NV_NORMAL_EPSILON)
{
return equal(length(v), 1, epsilon);

View File

@ -136,6 +136,11 @@ inline float lerp(float f0, float f1, float t)
return f0 * s + f1 * t;
}
inline float square(float f)
{
return f * f;
}
} // nv
#endif // NV_MATH_H